diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 143 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 1 |
6 files changed, 75 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9e9a94a0..0816878a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -71,6 +71,8 @@ | |||
71 | 71 | ||
72 | #define GK20A_NUM_CDEVS 6 | 72 | #define GK20A_NUM_CDEVS 6 |
73 | 73 | ||
74 | #define EMC3D_DEFAULT_RATIO 750 | ||
75 | |||
74 | #if defined(GK20A_DEBUG) | 76 | #if defined(GK20A_DEBUG) |
75 | u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; | 77 | u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; |
76 | u32 gk20a_dbg_ftrace; | 78 | u32 gk20a_dbg_ftrace; |
@@ -1462,6 +1464,8 @@ static int gk20a_probe(struct platform_device *dev) | |||
1462 | return err; | 1464 | return err; |
1463 | } | 1465 | } |
1464 | 1466 | ||
1467 | gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO; | ||
1468 | |||
1465 | /* Initialise scaling */ | 1469 | /* Initialise scaling */ |
1466 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | 1470 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) |
1467 | gk20a_scale_init(dev); | 1471 | gk20a_scale_init(dev); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 77300203..730ef43e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -297,6 +297,8 @@ struct gk20a { | |||
297 | bool forced_reset; | 297 | bool forced_reset; |
298 | bool allow_all; | 298 | bool allow_all; |
299 | 299 | ||
300 | u32 emc3d_ratio; | ||
301 | |||
300 | #ifdef CONFIG_DEBUG_FS | 302 | #ifdef CONFIG_DEBUG_FS |
301 | spinlock_t debugfs_lock; | 303 | spinlock_t debugfs_lock; |
302 | struct dentry *debugfs_ltc_enabled; | 304 | struct dentry *debugfs_ltc_enabled; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index bec18328..fceed5e9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | |||
@@ -469,6 +469,32 @@ static ssize_t allow_all_enable_store(struct device *device, | |||
469 | static DEVICE_ATTR(allow_all, ROOTRW, | 469 | static DEVICE_ATTR(allow_all, ROOTRW, |
470 | allow_all_enable_read, allow_all_enable_store); | 470 | allow_all_enable_read, allow_all_enable_store); |
471 | 471 | ||
472 | static ssize_t emc3d_ratio_store(struct device *device, | ||
473 | struct device_attribute *attr, const char *buf, size_t count) | ||
474 | { | ||
475 | struct platform_device *ndev = to_platform_device(device); | ||
476 | struct gk20a *g = get_gk20a(ndev); | ||
477 | unsigned long val = 0; | ||
478 | |||
479 | if (kstrtoul(buf, 10, &val) < 0) | ||
480 | return -EINVAL; | ||
481 | |||
482 | g->emc3d_ratio = val; | ||
483 | |||
484 | return count; | ||
485 | } | ||
486 | |||
487 | static ssize_t emc3d_ratio_read(struct device *device, | ||
488 | struct device_attribute *attr, char *buf) | ||
489 | { | ||
490 | struct platform_device *ndev = to_platform_device(device); | ||
491 | struct gk20a *g = get_gk20a(ndev); | ||
492 | |||
493 | return sprintf(buf, "%d\n", g->emc3d_ratio); | ||
494 | } | ||
495 | |||
496 | static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); | ||
497 | |||
472 | #ifdef CONFIG_PM_RUNTIME | 498 | #ifdef CONFIG_PM_RUNTIME |
473 | static ssize_t force_idle_store(struct device *device, | 499 | static ssize_t force_idle_store(struct device *device, |
474 | struct device_attribute *attr, const char *buf, size_t count) | 500 | struct device_attribute *attr, const char *buf, size_t count) |
@@ -566,6 +592,7 @@ void gk20a_remove_sysfs(struct device *dev) | |||
566 | device_remove_file(dev, &dev_attr_slcg_enable); | 592 | device_remove_file(dev, &dev_attr_slcg_enable); |
567 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); | 593 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); |
568 | device_remove_file(dev, &dev_attr_elpg_enable); | 594 | device_remove_file(dev, &dev_attr_elpg_enable); |
595 | device_remove_file(dev, &dev_attr_emc3d_ratio); | ||
569 | device_remove_file(dev, &dev_attr_counters); | 596 | device_remove_file(dev, &dev_attr_counters); |
570 | device_remove_file(dev, &dev_attr_counters_reset); | 597 | device_remove_file(dev, &dev_attr_counters_reset); |
571 | device_remove_file(dev, &dev_attr_load); | 598 | device_remove_file(dev, &dev_attr_load); |
@@ -593,6 +620,7 @@ void gk20a_create_sysfs(struct platform_device *dev) | |||
593 | error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); | 620 | error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); |
594 | error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); | 621 | error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); |
595 | error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); | 622 | error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); |
623 | error |= device_create_file(&dev->dev, &dev_attr_emc3d_ratio); | ||
596 | error |= device_create_file(&dev->dev, &dev_attr_counters); | 624 | error |= device_create_file(&dev->dev, &dev_attr_counters); |
597 | error |= device_create_file(&dev->dev, &dev_attr_counters_reset); | 625 | error |= device_create_file(&dev->dev, &dev_attr_counters_reset); |
598 | error |= device_create_file(&dev->dev, &dev_attr_load); | 626 | error |= device_create_file(&dev->dev, &dev_attr_load); |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index f234fdec..e0e034a9 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -39,16 +39,15 @@ | |||
39 | #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ | 39 | #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ |
40 | #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ | 40 | #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ |
41 | 41 | ||
42 | #define TEGRA_GK20A_BW_PER_FREQ 32 | ||
43 | #define TEGRA_GM20B_BW_PER_FREQ 64 | ||
44 | #define TEGRA_DDR3_BW_PER_FREQ 16 | ||
45 | |||
42 | extern struct device tegra_vpr_dev; | 46 | extern struct device tegra_vpr_dev; |
43 | struct gk20a_platform t132_gk20a_tegra_platform; | 47 | struct gk20a_platform t132_gk20a_tegra_platform; |
44 | 48 | ||
45 | struct gk20a_emc_params { | 49 | struct gk20a_emc_params { |
46 | long emc_slope; | 50 | long bw_ratio; |
47 | long emc_offset; | ||
48 | long emc_dip_slope; | ||
49 | long emc_dip_offset; | ||
50 | long emc_xmid; | ||
51 | bool linear; | ||
52 | }; | 51 | }; |
53 | 52 | ||
54 | /* | 53 | /* |
@@ -189,20 +188,17 @@ fail: | |||
189 | * This function returns the minimum emc clock based on gpu frequency | 188 | * This function returns the minimum emc clock based on gpu frequency |
190 | */ | 189 | */ |
191 | 190 | ||
192 | long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) | 191 | long gk20a_tegra_get_emc_rate(struct gk20a *g, |
192 | struct gk20a_emc_params *emc_params, long freq) | ||
193 | { | 193 | { |
194 | long hz; | 194 | long hz; |
195 | 195 | ||
196 | freq = INT_TO_FX(HZ_TO_MHZ(freq)); | 196 | freq = HZ_TO_MHZ(freq); |
197 | hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset; | ||
198 | 197 | ||
199 | hz -= FXMUL(emc_params->emc_dip_slope, | 198 | hz = (freq * emc_params->bw_ratio); |
200 | FXMUL(freq - emc_params->emc_xmid, | 199 | hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000; |
201 | freq - emc_params->emc_xmid)) + | ||
202 | emc_params->emc_dip_offset; | ||
203 | 200 | ||
204 | hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ | 201 | hz = MHZ_TO_HZ(hz); |
205 | hz = (hz < 0) ? 0 : hz; | ||
206 | 202 | ||
207 | return hz; | 203 | return hz; |
208 | } | 204 | } |
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev, | |||
222 | struct gk20a *g = get_gk20a(pdev); | 218 | struct gk20a *g = get_gk20a(pdev); |
223 | 219 | ||
224 | long after = gk20a_clk_get_rate(g); | 220 | long after = gk20a_clk_get_rate(g); |
225 | long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); | 221 | long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after); |
226 | 222 | ||
227 | clk_set_rate(platform->clk[2], emc_target); | 223 | clk_set_rate(platform->clk[2], emc_target); |
228 | } | 224 | } |
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev) | |||
245 | /* | 241 | /* |
246 | * gk20a_tegra_calibrate_emc() | 242 | * gk20a_tegra_calibrate_emc() |
247 | * | 243 | * |
248 | * Compute emc scaling parameters | ||
249 | * | ||
250 | * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) | ||
251 | * | ||
252 | * Remc - 3d.emc rate | ||
253 | * R3d - 3d.cbus rate | ||
254 | * Rm - 3d.cbus 'middle' rate = (max + min)/2 | ||
255 | * S - emc_slope | ||
256 | * O - emc_offset | ||
257 | * Sd - emc_dip_slope | ||
258 | * Od - emc_dip_offset | ||
259 | * | ||
260 | * this superposes a quadratic dip centered around the middle 3d | ||
261 | * frequency over a linear correlation of 3d.emc to 3d clock | ||
262 | * rates. | ||
263 | * | ||
264 | * S, O are chosen so that the maximum 3d rate produces the | ||
265 | * maximum 3d.emc rate exactly, and the minimum 3d rate produces | ||
266 | * at least the minimum 3d.emc rate. | ||
267 | * | ||
268 | * Sd and Od are chosen to produce the largest dip that will | ||
269 | * keep 3d.emc frequencies monotonously decreasing with 3d | ||
270 | * frequencies. To achieve this, the first derivative of Remc | ||
271 | * with respect to R3d should be zero for the minimal 3d rate: | ||
272 | * | ||
273 | * R'emc = S - 2 * Sd * (R3d - Rm) | ||
274 | * R'emc(R3d-min) = 0 | ||
275 | * S = 2 * Sd * (R3d-min - Rm) | ||
276 | * = 2 * Sd * (R3d-min - R3d-max) / 2 | ||
277 | * | ||
278 | * +------------------------------+ | ||
279 | * | Sd = S / (R3d-min - R3d-max) | | ||
280 | * +------------------------------+ | ||
281 | * | ||
282 | * dip = Sd * (R3d - Rm)^2 + Od | ||
283 | * | ||
284 | * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives | ||
285 | * | ||
286 | * Sd * (R3d-min - Rm)^2 + Od = 0 | ||
287 | * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 | ||
288 | * = -Sd * ((R3d-min - R3d-max)^2) / 4 | ||
289 | * | ||
290 | * +------------------------------+ | ||
291 | * | Od = (emc-max - emc-min) / 4 | | ||
292 | * +------------------------------+ | ||
293 | * | ||
294 | */ | 244 | */ |
295 | 245 | ||
296 | void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, | 246 | void gk20a_tegra_calibrate_emc(struct platform_device *pdev, |
297 | struct clk *clk_3d, struct clk *clk_3d_emc) | 247 | struct gk20a_emc_params *emc_params) |
298 | { | 248 | { |
299 | long correction; | 249 | struct gk20a *g = get_gk20a(pdev); |
300 | unsigned long max_emc; | 250 | long gpu_bw, emc_bw; |
301 | unsigned long min_emc; | 251 | |
302 | unsigned long min_rate_3d; | 252 | /* Detect and store gpu bw */ |
303 | unsigned long max_rate_3d; | 253 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; |
304 | 254 | switch (ver) { | |
305 | max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); | 255 | case GK20A_GPUID_GK20A: |
306 | max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); | 256 | gpu_bw = TEGRA_GK20A_BW_PER_FREQ; |
307 | 257 | break; | |
308 | min_emc = clk_round_rate(clk_3d_emc, 0); | 258 | case GK20A_GPUID_GM20B: |
309 | min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); | 259 | gpu_bw = TEGRA_GM20B_BW_PER_FREQ; |
310 | 260 | break; | |
311 | max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); | 261 | default: |
312 | max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); | 262 | gpu_bw = 0; |
313 | 263 | break; | |
314 | min_rate_3d = clk_round_rate(clk_3d, 0); | 264 | } |
315 | min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); | 265 | |
316 | 266 | /* TODO detect DDR3 vs DDR4 */ | |
317 | emc_params->emc_slope = | 267 | emc_bw = TEGRA_DDR3_BW_PER_FREQ; |
318 | FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); | 268 | |
319 | emc_params->emc_offset = max_emc - | 269 | /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq |
320 | FXMUL(emc_params->emc_slope, max_rate_3d); | 270 | * NOTE the ratio must come out as an integer */ |
321 | /* Guarantee max 3d rate maps to max emc rate */ | 271 | emc_params->bw_ratio = (gpu_bw / emc_bw); |
322 | emc_params->emc_offset += max_emc - | ||
323 | (FXMUL(emc_params->emc_slope, max_rate_3d) + | ||
324 | emc_params->emc_offset); | ||
325 | |||
326 | emc_params->emc_dip_offset = (max_emc - min_emc) / 4; | ||
327 | emc_params->emc_dip_slope = | ||
328 | -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d); | ||
329 | emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2; | ||
330 | correction = | ||
331 | emc_params->emc_dip_offset + | ||
332 | FXMUL(emc_params->emc_dip_slope, | ||
333 | FXMUL(max_rate_3d - emc_params->emc_xmid, | ||
334 | max_rate_3d - emc_params->emc_xmid)); | ||
335 | emc_params->emc_dip_offset -= correction; | ||
336 | } | 272 | } |
337 | 273 | ||
338 | /* | 274 | /* |
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) | |||
427 | { | 363 | { |
428 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | 364 | struct gk20a_platform *platform = gk20a_get_platform(pdev); |
429 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | 365 | struct gk20a_scale_profile *profile = platform->g->scale_profile; |
430 | struct gk20a_emc_params *emc_params; | 366 | struct gk20a_emc_params *emc_params; |
431 | 367 | ||
432 | if (!profile) | 368 | if (!profile) |
433 | return; | 369 | return; |
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) | |||
436 | if (!emc_params) | 372 | if (!emc_params) |
437 | return; | 373 | return; |
438 | 374 | ||
439 | gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), | 375 | gk20a_tegra_calibrate_emc(pdev, emc_params); |
440 | platform->clk[2]); | ||
441 | 376 | ||
442 | profile->private_data = emc_params; | 377 | profile->private_data = emc_params; |
443 | } | 378 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 8bdbb106..177e3525 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -3688,6 +3688,7 @@ int gk20a_pmu_load_update(struct gk20a *g) | |||
3688 | 3688 | ||
3689 | pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); | 3689 | pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); |
3690 | pmu->load_shadow = _load / 10; | 3690 | pmu->load_shadow = _load / 10; |
3691 | pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); | ||
3691 | 3692 | ||
3692 | return 0; | 3693 | return 0; |
3693 | } | 3694 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 591ffbc6..81177f5c 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -1080,6 +1080,7 @@ struct pmu_gk20a { | |||
1080 | 1080 | ||
1081 | u32 sample_buffer; | 1081 | u32 sample_buffer; |
1082 | u32 load_shadow; | 1082 | u32 load_shadow; |
1083 | u32 load_avg; | ||
1083 | 1084 | ||
1084 | struct mutex isr_mutex; | 1085 | struct mutex isr_mutex; |
1085 | bool isr_enabled; | 1086 | bool isr_enabled; |