summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c143
1 files changed, 39 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index f234fdec..e0e034a9 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -39,16 +39,15 @@
39#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ 39#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
40#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ 40#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
41 41
42#define TEGRA_GK20A_BW_PER_FREQ 32
43#define TEGRA_GM20B_BW_PER_FREQ 64
44#define TEGRA_DDR3_BW_PER_FREQ 16
45
42extern struct device tegra_vpr_dev; 46extern struct device tegra_vpr_dev;
43struct gk20a_platform t132_gk20a_tegra_platform; 47struct gk20a_platform t132_gk20a_tegra_platform;
44 48
45struct gk20a_emc_params { 49struct gk20a_emc_params {
46 long emc_slope; 50 long bw_ratio;
47 long emc_offset;
48 long emc_dip_slope;
49 long emc_dip_offset;
50 long emc_xmid;
51 bool linear;
52}; 51};
53 52
54/* 53/*
@@ -189,20 +188,17 @@ fail:
189 * This function returns the minimum emc clock based on gpu frequency 188 * This function returns the minimum emc clock based on gpu frequency
190 */ 189 */
191 190
192long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) 191long gk20a_tegra_get_emc_rate(struct gk20a *g,
192 struct gk20a_emc_params *emc_params, long freq)
193{ 193{
194 long hz; 194 long hz;
195 195
196 freq = INT_TO_FX(HZ_TO_MHZ(freq)); 196 freq = HZ_TO_MHZ(freq);
197 hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
198 197
199 hz -= FXMUL(emc_params->emc_dip_slope, 198 hz = (freq * emc_params->bw_ratio);
200 FXMUL(freq - emc_params->emc_xmid, 199 hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;
201 freq - emc_params->emc_xmid)) +
202 emc_params->emc_dip_offset;
203 200
204 hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ 201 hz = MHZ_TO_HZ(hz);
205 hz = (hz < 0) ? 0 : hz;
206 202
207 return hz; 203 return hz;
208} 204}
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
222 struct gk20a *g = get_gk20a(pdev); 218 struct gk20a *g = get_gk20a(pdev);
223 219
224 long after = gk20a_clk_get_rate(g); 220 long after = gk20a_clk_get_rate(g);
225 long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); 221 long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
226 222
227 clk_set_rate(platform->clk[2], emc_target); 223 clk_set_rate(platform->clk[2], emc_target);
228} 224}
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev)
245/* 241/*
246 * gk20a_tegra_calibrate_emc() 242 * gk20a_tegra_calibrate_emc()
247 * 243 *
248 * Compute emc scaling parameters
249 *
250 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
251 *
252 * Remc - 3d.emc rate
253 * R3d - 3d.cbus rate
254 * Rm - 3d.cbus 'middle' rate = (max + min)/2
255 * S - emc_slope
256 * O - emc_offset
257 * Sd - emc_dip_slope
258 * Od - emc_dip_offset
259 *
260 * this superposes a quadratic dip centered around the middle 3d
261 * frequency over a linear correlation of 3d.emc to 3d clock
262 * rates.
263 *
264 * S, O are chosen so that the maximum 3d rate produces the
265 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
266 * at least the minimum 3d.emc rate.
267 *
268 * Sd and Od are chosen to produce the largest dip that will
269 * keep 3d.emc frequencies monotonously decreasing with 3d
270 * frequencies. To achieve this, the first derivative of Remc
271 * with respect to R3d should be zero for the minimal 3d rate:
272 *
273 * R'emc = S - 2 * Sd * (R3d - Rm)
274 * R'emc(R3d-min) = 0
275 * S = 2 * Sd * (R3d-min - Rm)
276 * = 2 * Sd * (R3d-min - R3d-max) / 2
277 *
278 * +------------------------------+
279 * | Sd = S / (R3d-min - R3d-max) |
280 * +------------------------------+
281 *
282 * dip = Sd * (R3d - Rm)^2 + Od
283 *
284 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
285 *
286 * Sd * (R3d-min - Rm)^2 + Od = 0
287 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
288 * = -Sd * ((R3d-min - R3d-max)^2) / 4
289 *
290 * +------------------------------+
291 * | Od = (emc-max - emc-min) / 4 |
292 * +------------------------------+
293 *
294 */ 244 */
295 245
296void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, 246void gk20a_tegra_calibrate_emc(struct platform_device *pdev,
297 struct clk *clk_3d, struct clk *clk_3d_emc) 247 struct gk20a_emc_params *emc_params)
298{ 248{
299 long correction; 249 struct gk20a *g = get_gk20a(pdev);
300 unsigned long max_emc; 250 long gpu_bw, emc_bw;
301 unsigned long min_emc; 251
302 unsigned long min_rate_3d; 252 /* Detect and store gpu bw */
303 unsigned long max_rate_3d; 253 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
304 254 switch (ver) {
305 max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); 255 case GK20A_GPUID_GK20A:
306 max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); 256 gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
307 257 break;
308 min_emc = clk_round_rate(clk_3d_emc, 0); 258 case GK20A_GPUID_GM20B:
309 min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); 259 gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
310 260 break;
311 max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); 261 default:
312 max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); 262 gpu_bw = 0;
313 263 break;
314 min_rate_3d = clk_round_rate(clk_3d, 0); 264 }
315 min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); 265
316 266 /* TODO detect DDR3 vs DDR4 */
317 emc_params->emc_slope = 267 emc_bw = TEGRA_DDR3_BW_PER_FREQ;
318 FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); 268
319 emc_params->emc_offset = max_emc - 269 /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
320 FXMUL(emc_params->emc_slope, max_rate_3d); 270 * NOTE the ratio must come out as an integer */
321 /* Guarantee max 3d rate maps to max emc rate */ 271 emc_params->bw_ratio = (gpu_bw / emc_bw);
322 emc_params->emc_offset += max_emc -
323 (FXMUL(emc_params->emc_slope, max_rate_3d) +
324 emc_params->emc_offset);
325
326 emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
327 emc_params->emc_dip_slope =
328 -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
329 emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
330 correction =
331 emc_params->emc_dip_offset +
332 FXMUL(emc_params->emc_dip_slope,
333 FXMUL(max_rate_3d - emc_params->emc_xmid,
334 max_rate_3d - emc_params->emc_xmid));
335 emc_params->emc_dip_offset -= correction;
336} 272}
337 273
338/* 274/*
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
427{ 363{
428 struct gk20a_platform *platform = gk20a_get_platform(pdev); 364 struct gk20a_platform *platform = gk20a_get_platform(pdev);
429 struct gk20a_scale_profile *profile = platform->g->scale_profile; 365 struct gk20a_scale_profile *profile = platform->g->scale_profile;
430 struct gk20a_emc_params *emc_params; 366 struct gk20a_emc_params *emc_params;
431 367
432 if (!profile) 368 if (!profile)
433 return; 369 return;
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
436 if (!emc_params) 372 if (!emc_params)
437 return; 373 return;
438 374
439 gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), 375 gk20a_tegra_calibrate_emc(pdev, emc_params);
440 platform->clk[2]);
441 376
442 profile->private_data = emc_params; 377 profile->private_data = emc_params;
443} 378}