diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 143 |
1 files changed, 39 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index f234fdec..e0e034a9 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -39,16 +39,15 @@ | |||
39 | #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ | 39 | #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ |
40 | #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ | 40 | #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ |
41 | 41 | ||
42 | #define TEGRA_GK20A_BW_PER_FREQ 32 | ||
43 | #define TEGRA_GM20B_BW_PER_FREQ 64 | ||
44 | #define TEGRA_DDR3_BW_PER_FREQ 16 | ||
45 | |||
42 | extern struct device tegra_vpr_dev; | 46 | extern struct device tegra_vpr_dev; |
43 | struct gk20a_platform t132_gk20a_tegra_platform; | 47 | struct gk20a_platform t132_gk20a_tegra_platform; |
44 | 48 | ||
45 | struct gk20a_emc_params { | 49 | struct gk20a_emc_params { |
46 | long emc_slope; | 50 | long bw_ratio; |
47 | long emc_offset; | ||
48 | long emc_dip_slope; | ||
49 | long emc_dip_offset; | ||
50 | long emc_xmid; | ||
51 | bool linear; | ||
52 | }; | 51 | }; |
53 | 52 | ||
54 | /* | 53 | /* |
@@ -189,20 +188,17 @@ fail: | |||
189 | * This function returns the minimum emc clock based on gpu frequency | 188 | * This function returns the minimum emc clock based on gpu frequency |
190 | */ | 189 | */ |
191 | 190 | ||
192 | long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) | 191 | long gk20a_tegra_get_emc_rate(struct gk20a *g, |
192 | struct gk20a_emc_params *emc_params, long freq) | ||
193 | { | 193 | { |
194 | long hz; | 194 | long hz; |
195 | 195 | ||
196 | freq = INT_TO_FX(HZ_TO_MHZ(freq)); | 196 | freq = HZ_TO_MHZ(freq); |
197 | hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset; | ||
198 | 197 | ||
199 | hz -= FXMUL(emc_params->emc_dip_slope, | 198 | hz = (freq * emc_params->bw_ratio); |
200 | FXMUL(freq - emc_params->emc_xmid, | 199 | hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000; |
201 | freq - emc_params->emc_xmid)) + | ||
202 | emc_params->emc_dip_offset; | ||
203 | 200 | ||
204 | hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ | 201 | hz = MHZ_TO_HZ(hz); |
205 | hz = (hz < 0) ? 0 : hz; | ||
206 | 202 | ||
207 | return hz; | 203 | return hz; |
208 | } | 204 | } |
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev, | |||
222 | struct gk20a *g = get_gk20a(pdev); | 218 | struct gk20a *g = get_gk20a(pdev); |
223 | 219 | ||
224 | long after = gk20a_clk_get_rate(g); | 220 | long after = gk20a_clk_get_rate(g); |
225 | long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); | 221 | long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after); |
226 | 222 | ||
227 | clk_set_rate(platform->clk[2], emc_target); | 223 | clk_set_rate(platform->clk[2], emc_target); |
228 | } | 224 | } |
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev) | |||
245 | /* | 241 | /* |
246 | * gk20a_tegra_calibrate_emc() | 242 | * gk20a_tegra_calibrate_emc() |
247 | * | 243 | * |
248 | * Compute emc scaling parameters | ||
249 | * | ||
250 | * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) | ||
251 | * | ||
252 | * Remc - 3d.emc rate | ||
253 | * R3d - 3d.cbus rate | ||
254 | * Rm - 3d.cbus 'middle' rate = (max + min)/2 | ||
255 | * S - emc_slope | ||
256 | * O - emc_offset | ||
257 | * Sd - emc_dip_slope | ||
258 | * Od - emc_dip_offset | ||
259 | * | ||
260 | * this superposes a quadratic dip centered around the middle 3d | ||
261 | * frequency over a linear correlation of 3d.emc to 3d clock | ||
262 | * rates. | ||
263 | * | ||
264 | * S, O are chosen so that the maximum 3d rate produces the | ||
265 | * maximum 3d.emc rate exactly, and the minimum 3d rate produces | ||
266 | * at least the minimum 3d.emc rate. | ||
267 | * | ||
268 | * Sd and Od are chosen to produce the largest dip that will | ||
269 | * keep 3d.emc frequencies monotonously decreasing with 3d | ||
270 | * frequencies. To achieve this, the first derivative of Remc | ||
271 | * with respect to R3d should be zero for the minimal 3d rate: | ||
272 | * | ||
273 | * R'emc = S - 2 * Sd * (R3d - Rm) | ||
274 | * R'emc(R3d-min) = 0 | ||
275 | * S = 2 * Sd * (R3d-min - Rm) | ||
276 | * = 2 * Sd * (R3d-min - R3d-max) / 2 | ||
277 | * | ||
278 | * +------------------------------+ | ||
279 | * | Sd = S / (R3d-min - R3d-max) | | ||
280 | * +------------------------------+ | ||
281 | * | ||
282 | * dip = Sd * (R3d - Rm)^2 + Od | ||
283 | * | ||
284 | * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives | ||
285 | * | ||
286 | * Sd * (R3d-min - Rm)^2 + Od = 0 | ||
287 | * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 | ||
288 | * = -Sd * ((R3d-min - R3d-max)^2) / 4 | ||
289 | * | ||
290 | * +------------------------------+ | ||
291 | * | Od = (emc-max - emc-min) / 4 | | ||
292 | * +------------------------------+ | ||
293 | * | ||
294 | */ | 244 | */ |
295 | 245 | ||
296 | void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, | 246 | void gk20a_tegra_calibrate_emc(struct platform_device *pdev, |
297 | struct clk *clk_3d, struct clk *clk_3d_emc) | 247 | struct gk20a_emc_params *emc_params) |
298 | { | 248 | { |
299 | long correction; | 249 | struct gk20a *g = get_gk20a(pdev); |
300 | unsigned long max_emc; | 250 | long gpu_bw, emc_bw; |
301 | unsigned long min_emc; | 251 | |
302 | unsigned long min_rate_3d; | 252 | /* Detect and store gpu bw */ |
303 | unsigned long max_rate_3d; | 253 | u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; |
304 | 254 | switch (ver) { | |
305 | max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); | 255 | case GK20A_GPUID_GK20A: |
306 | max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); | 256 | gpu_bw = TEGRA_GK20A_BW_PER_FREQ; |
307 | 257 | break; | |
308 | min_emc = clk_round_rate(clk_3d_emc, 0); | 258 | case GK20A_GPUID_GM20B: |
309 | min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); | 259 | gpu_bw = TEGRA_GM20B_BW_PER_FREQ; |
310 | 260 | break; | |
311 | max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); | 261 | default: |
312 | max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); | 262 | gpu_bw = 0; |
313 | 263 | break; | |
314 | min_rate_3d = clk_round_rate(clk_3d, 0); | 264 | } |
315 | min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); | 265 | |
316 | 266 | /* TODO detect DDR3 vs DDR4 */ | |
317 | emc_params->emc_slope = | 267 | emc_bw = TEGRA_DDR3_BW_PER_FREQ; |
318 | FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); | 268 | |
319 | emc_params->emc_offset = max_emc - | 269 | /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq |
320 | FXMUL(emc_params->emc_slope, max_rate_3d); | 270 | * NOTE the ratio must come out as an integer */ |
321 | /* Guarantee max 3d rate maps to max emc rate */ | 271 | emc_params->bw_ratio = (gpu_bw / emc_bw); |
322 | emc_params->emc_offset += max_emc - | ||
323 | (FXMUL(emc_params->emc_slope, max_rate_3d) + | ||
324 | emc_params->emc_offset); | ||
325 | |||
326 | emc_params->emc_dip_offset = (max_emc - min_emc) / 4; | ||
327 | emc_params->emc_dip_slope = | ||
328 | -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d); | ||
329 | emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2; | ||
330 | correction = | ||
331 | emc_params->emc_dip_offset + | ||
332 | FXMUL(emc_params->emc_dip_slope, | ||
333 | FXMUL(max_rate_3d - emc_params->emc_xmid, | ||
334 | max_rate_3d - emc_params->emc_xmid)); | ||
335 | emc_params->emc_dip_offset -= correction; | ||
336 | } | 272 | } |
337 | 273 | ||
338 | /* | 274 | /* |
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) | |||
427 | { | 363 | { |
428 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | 364 | struct gk20a_platform *platform = gk20a_get_platform(pdev); |
429 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | 365 | struct gk20a_scale_profile *profile = platform->g->scale_profile; |
430 | struct gk20a_emc_params *emc_params; | 366 | struct gk20a_emc_params *emc_params; |
431 | 367 | ||
432 | if (!profile) | 368 | if (!profile) |
433 | return; | 369 | return; |
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) | |||
436 | if (!emc_params) | 372 | if (!emc_params) |
437 | return; | 373 | return; |
438 | 374 | ||
439 | gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), | 375 | gk20a_tegra_calibrate_emc(pdev, emc_params); |
440 | platform->clk[2]); | ||
441 | 376 | ||
442 | profile->private_data = emc_params; | 377 | profile->private_data = emc_params; |
443 | } | 378 | } |