diff options
Diffstat (limited to 'include/os/linux/platform_gp10b_tegra.c')
-rw-r--r-- | include/os/linux/platform_gp10b_tegra.c | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/include/os/linux/platform_gp10b_tegra.c b/include/os/linux/platform_gp10b_tegra.c new file mode 100644 index 0000000..9bf8d63 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * GP10B Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/of_platform.h> | ||
17 | #include <linux/debugfs.h> | ||
18 | #include <linux/dma-buf.h> | ||
19 | #include <linux/nvmap.h> | ||
20 | #include <linux/reset.h> | ||
21 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
22 | |||
23 | #include <uapi/linux/nvgpu.h> | ||
24 | |||
25 | #include <soc/tegra/tegra_bpmp.h> | ||
26 | #include <soc/tegra/tegra_powergate.h> | ||
27 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
28 | |||
29 | #include <dt-bindings/memory/tegra-swgroup.h> | ||
30 | |||
31 | #include <nvgpu/kmem.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/enabled.h> | ||
34 | #include <nvgpu/hashtable.h> | ||
35 | #include <nvgpu/gk20a.h> | ||
36 | #include <nvgpu/nvhost.h> | ||
37 | |||
38 | #include "os_linux.h" | ||
39 | |||
40 | #include "clk.h" | ||
41 | |||
42 | #include "platform_gk20a.h" | ||
43 | #include "platform_gk20a_tegra.h" | ||
44 | #include "platform_gp10b.h" | ||
45 | #include "platform_gp10b_tegra.h" | ||
46 | #include "scale.h" | ||
47 | |||
48 | /* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ | ||
49 | #define GP10B_FREQ_SELECT_STEP 8 | ||
50 | /* Allow limited set of frequencies to be available */ | ||
51 | #define GP10B_NUM_SUPPORTED_FREQS 15 | ||
52 | /* Max number of freq supported in h/w */ | ||
53 | #define GP10B_MAX_SUPPORTED_FREQS 120 | ||
54 | static unsigned long | ||
55 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; | ||
56 | |||
57 | static bool freq_table_init_complete; | ||
58 | static int num_supported_freq; | ||
59 | |||
60 | #define TEGRA_GP10B_BW_PER_FREQ 64 | ||
61 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
62 | |||
63 | #define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) | ||
64 | |||
65 | #define GPCCLK_INIT_RATE 1000000000 | ||
66 | |||
67 | static struct { | ||
68 | char *name; | ||
69 | unsigned long default_rate; | ||
70 | } tegra_gp10b_clocks[] = { | ||
71 | {"gpu", GPCCLK_INIT_RATE}, | ||
72 | {"gpu_sys", 204000000} }; | ||
73 | |||
74 | /* | ||
75 | * gp10b_tegra_get_clocks() | ||
76 | * | ||
77 | * This function finds clocks in tegra platform and populates | ||
78 | * the clock information to gp10b platform data. | ||
79 | */ | ||
80 | |||
81 | int gp10b_tegra_get_clocks(struct device *dev) | ||
82 | { | ||
83 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
84 | unsigned int i; | ||
85 | |||
86 | platform->num_clks = 0; | ||
87 | for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { | ||
88 | long rate = tegra_gp10b_clocks[i].default_rate; | ||
89 | struct clk *c; | ||
90 | |||
91 | c = clk_get(dev, tegra_gp10b_clocks[i].name); | ||
92 | if (IS_ERR(c)) { | ||
93 | nvgpu_err(platform->g, "cannot get clock %s", | ||
94 | tegra_gp10b_clocks[i].name); | ||
95 | } else { | ||
96 | clk_set_rate(c, rate); | ||
97 | platform->clk[i] = c; | ||
98 | } | ||
99 | } | ||
100 | platform->num_clks = i; | ||
101 | |||
102 | if (platform->clk[0]) { | ||
103 | i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, | ||
104 | tegra_gp10b_clocks[0].name); | ||
105 | if (i > 0) | ||
106 | platform->maxmin_clk_id = i; | ||
107 | } | ||
108 | |||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | void gp10b_tegra_scale_init(struct device *dev) | ||
113 | { | ||
114 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
115 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
116 | struct tegra_bwmgr_client *bwmgr_handle; | ||
117 | |||
118 | if (!profile) | ||
119 | return; | ||
120 | |||
121 | if ((struct tegra_bwmgr_client *)profile->private_data) | ||
122 | return; | ||
123 | |||
124 | bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
125 | if (!bwmgr_handle) | ||
126 | return; | ||
127 | |||
128 | profile->private_data = (void *)bwmgr_handle; | ||
129 | } | ||
130 | |||
131 | static void gp10b_tegra_scale_exit(struct device *dev) | ||
132 | { | ||
133 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
134 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
135 | |||
136 | if (profile && profile->private_data) | ||
137 | tegra_bwmgr_unregister( | ||
138 | (struct tegra_bwmgr_client *)profile->private_data); | ||
139 | } | ||
140 | |||
141 | static int gp10b_tegra_probe(struct device *dev) | ||
142 | { | ||
143 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
144 | bool joint_xpu_rail = false; | ||
145 | struct gk20a *g = platform->g; | ||
146 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
147 | int ret; | ||
148 | |||
149 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
150 | if (ret) | ||
151 | return ret; | ||
152 | #endif | ||
153 | |||
154 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | |||
158 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
159 | |||
160 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
161 | = false; | ||
162 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
163 | = false; | ||
164 | |||
165 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
166 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
167 | |||
168 | #ifdef CONFIG_OF | ||
169 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
170 | "nvidia,tegra-joint_xpu_rail"); | ||
171 | #endif | ||
172 | |||
173 | if (joint_xpu_rail) { | ||
174 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
175 | platform->can_railgate_init = false; | ||
176 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); | ||
177 | } | ||
178 | |||
179 | gp10b_tegra_get_clocks(dev); | ||
180 | nvgpu_linux_init_clk_support(platform->g); | ||
181 | |||
182 | nvgpu_mutex_init(&platform->clk_get_freq_lock); | ||
183 | |||
184 | platform->g->ops.clk.support_clk_freq_controller = true; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int gp10b_tegra_late_probe(struct device *dev) | ||
190 | { | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static int gp10b_tegra_remove(struct device *dev) | ||
195 | { | ||
196 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
197 | |||
198 | /* deinitialise tegra specific scaling quirks */ | ||
199 | gp10b_tegra_scale_exit(dev); | ||
200 | |||
201 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
202 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
203 | #endif | ||
204 | |||
205 | nvgpu_mutex_destroy(&platform->clk_get_freq_lock); | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static bool gp10b_tegra_is_railgated(struct device *dev) | ||
211 | { | ||
212 | bool ret = false; | ||
213 | |||
214 | if (tegra_bpmp_running()) | ||
215 | ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); | ||
216 | |||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | static int gp10b_tegra_railgate(struct device *dev) | ||
221 | { | ||
222 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
223 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
224 | |||
225 | /* remove emc frequency floor */ | ||
226 | if (profile) | ||
227 | tegra_bwmgr_set_emc( | ||
228 | (struct tegra_bwmgr_client *)profile->private_data, | ||
229 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
230 | |||
231 | if (tegra_bpmp_running() && | ||
232 | tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { | ||
233 | int i; | ||
234 | for (i = 0; i < platform->num_clks; i++) { | ||
235 | if (platform->clk[i]) | ||
236 | clk_disable_unprepare(platform->clk[i]); | ||
237 | } | ||
238 | tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
239 | } | ||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | static int gp10b_tegra_unrailgate(struct device *dev) | ||
244 | { | ||
245 | int ret = 0; | ||
246 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
247 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
248 | |||
249 | if (tegra_bpmp_running()) { | ||
250 | int i; | ||
251 | ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
252 | for (i = 0; i < platform->num_clks; i++) { | ||
253 | if (platform->clk[i]) | ||
254 | clk_prepare_enable(platform->clk[i]); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | /* to start with set emc frequency floor to max rate*/ | ||
259 | if (profile) | ||
260 | tegra_bwmgr_set_emc( | ||
261 | (struct tegra_bwmgr_client *)profile->private_data, | ||
262 | tegra_bwmgr_get_max_emc_rate(), | ||
263 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int gp10b_tegra_suspend(struct device *dev) | ||
268 | { | ||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | int gp10b_tegra_reset_assert(struct device *dev) | ||
273 | { | ||
274 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
275 | int ret = 0; | ||
276 | |||
277 | if (!platform->reset_control) | ||
278 | return -EINVAL; | ||
279 | |||
280 | ret = reset_control_assert(platform->reset_control); | ||
281 | |||
282 | return ret; | ||
283 | } | ||
284 | |||
285 | int gp10b_tegra_reset_deassert(struct device *dev) | ||
286 | { | ||
287 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
288 | int ret = 0; | ||
289 | |||
290 | if (!platform->reset_control) | ||
291 | return -EINVAL; | ||
292 | |||
293 | ret = reset_control_deassert(platform->reset_control); | ||
294 | |||
295 | return ret; | ||
296 | } | ||
297 | |||
298 | void gp10b_tegra_prescale(struct device *dev) | ||
299 | { | ||
300 | struct gk20a *g = get_gk20a(dev); | ||
301 | u32 avg = 0; | ||
302 | |||
303 | nvgpu_log_fn(g, " "); | ||
304 | |||
305 | nvgpu_pmu_load_norm(g, &avg); | ||
306 | |||
307 | nvgpu_log_fn(g, "done"); | ||
308 | } | ||
309 | |||
310 | void gp10b_tegra_postscale(struct device *pdev, | ||
311 | unsigned long freq) | ||
312 | { | ||
313 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
314 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
315 | struct gk20a *g = get_gk20a(pdev); | ||
316 | unsigned long emc_rate; | ||
317 | |||
318 | nvgpu_log_fn(g, " "); | ||
319 | if (profile && profile->private_data && | ||
320 | !platform->is_railgated(pdev)) { | ||
321 | unsigned long emc_scale; | ||
322 | |||
323 | if (freq <= gp10b_freq_table[0]) | ||
324 | emc_scale = 0; | ||
325 | else | ||
326 | emc_scale = g->emc3d_ratio; | ||
327 | |||
328 | emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; | ||
329 | |||
330 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
331 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
332 | |||
333 | tegra_bwmgr_set_emc( | ||
334 | (struct tegra_bwmgr_client *)profile->private_data, | ||
335 | emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
336 | } | ||
337 | nvgpu_log_fn(g, "done"); | ||
338 | } | ||
339 | |||
340 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate) | ||
341 | { | ||
342 | struct gk20a *g = get_gk20a(dev); | ||
343 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
344 | unsigned long *freq_table = profile->devfreq_profile.freq_table; | ||
345 | int max_states = profile->devfreq_profile.max_state; | ||
346 | int i; | ||
347 | |||
348 | for (i = 0; i < max_states; ++i) | ||
349 | if (freq_table[i] >= rate) | ||
350 | return freq_table[i]; | ||
351 | |||
352 | return freq_table[max_states - 1]; | ||
353 | } | ||
354 | |||
355 | int gp10b_clk_get_freqs(struct device *dev, | ||
356 | unsigned long **freqs, int *num_freqs) | ||
357 | { | ||
358 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
359 | struct gk20a *g = platform->g; | ||
360 | unsigned long max_rate; | ||
361 | unsigned long new_rate = 0, prev_rate = 0; | ||
362 | int i, freq_counter = 0; | ||
363 | int sel_freq_cnt; | ||
364 | unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; | ||
365 | |||
366 | nvgpu_mutex_acquire(&platform->clk_get_freq_lock); | ||
367 | |||
368 | if (freq_table_init_complete) { | ||
369 | |||
370 | *freqs = gp10b_freq_table; | ||
371 | *num_freqs = num_supported_freq; | ||
372 | |||
373 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
374 | |||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); | ||
379 | |||
380 | /* | ||
381 | * Walk the h/w frequency table and update the local table | ||
382 | */ | ||
383 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
384 | prev_rate = new_rate; | ||
385 | new_rate = clk_round_rate(platform->clk[0], | ||
386 | prev_rate + 1); | ||
387 | loc_freq_table[i] = new_rate; | ||
388 | if (new_rate == max_rate) | ||
389 | break; | ||
390 | } | ||
391 | freq_counter = i + 1; | ||
392 | WARN_ON(freq_counter == GP10B_MAX_SUPPORTED_FREQS); | ||
393 | |||
394 | /* | ||
395 | * If the number of achievable frequencies is less than or | ||
396 | * equal to GP10B_NUM_SUPPORTED_FREQS, select all frequencies | ||
397 | * else, select one out of every 8 frequencies | ||
398 | */ | ||
399 | if (freq_counter <= GP10B_NUM_SUPPORTED_FREQS) { | ||
400 | for (sel_freq_cnt = 0; sel_freq_cnt < freq_counter; ++sel_freq_cnt) | ||
401 | gp10b_freq_table[sel_freq_cnt] = | ||
402 | loc_freq_table[sel_freq_cnt]; | ||
403 | } else { | ||
404 | /* | ||
405 | * Walk the h/w frequency table and only select | ||
406 | * GP10B_FREQ_SELECT_STEP'th frequencies and | ||
407 | * add MAX freq to last | ||
408 | */ | ||
409 | sel_freq_cnt = 0; | ||
410 | for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
411 | new_rate = loc_freq_table[i]; | ||
412 | |||
413 | if (i % GP10B_FREQ_SELECT_STEP == 0 || | ||
414 | new_rate == max_rate) { | ||
415 | gp10b_freq_table[sel_freq_cnt++] = | ||
416 | new_rate; | ||
417 | |||
418 | if (new_rate == max_rate) | ||
419 | break; | ||
420 | } | ||
421 | } | ||
422 | WARN_ON(sel_freq_cnt == GP10B_MAX_SUPPORTED_FREQS); | ||
423 | } | ||
424 | |||
425 | /* Fill freq table */ | ||
426 | *freqs = gp10b_freq_table; | ||
427 | *num_freqs = sel_freq_cnt; | ||
428 | num_supported_freq = sel_freq_cnt; | ||
429 | |||
430 | freq_table_init_complete = true; | ||
431 | |||
432 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", | ||
433 | gp10b_freq_table[0], max_rate, *num_freqs); | ||
434 | |||
435 | nvgpu_mutex_release(&platform->clk_get_freq_lock); | ||
436 | |||
437 | return 0; | ||
438 | } | ||
439 | |||
440 | struct gk20a_platform gp10b_tegra_platform = { | ||
441 | .has_syncpoints = true, | ||
442 | |||
443 | /* power management configuration */ | ||
444 | .railgate_delay_init = 500, | ||
445 | |||
446 | /* ldiv slowdown factor */ | ||
447 | .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, | ||
448 | |||
449 | /* power management configuration */ | ||
450 | .can_railgate_init = true, | ||
451 | .enable_elpg = true, | ||
452 | .can_elpg_init = true, | ||
453 | .enable_blcg = true, | ||
454 | .enable_slcg = true, | ||
455 | .enable_elcg = true, | ||
456 | .can_slcg = true, | ||
457 | .can_blcg = true, | ||
458 | .can_elcg = true, | ||
459 | .enable_aelpg = true, | ||
460 | .enable_perfmon = true, | ||
461 | |||
462 | /* ptimer src frequency in hz*/ | ||
463 | .ptimer_src_freq = 31250000, | ||
464 | |||
465 | .ch_wdt_timeout_ms = 5000, | ||
466 | |||
467 | .probe = gp10b_tegra_probe, | ||
468 | .late_probe = gp10b_tegra_late_probe, | ||
469 | .remove = gp10b_tegra_remove, | ||
470 | |||
471 | /* power management callbacks */ | ||
472 | .suspend = gp10b_tegra_suspend, | ||
473 | .railgate = gp10b_tegra_railgate, | ||
474 | .unrailgate = gp10b_tegra_unrailgate, | ||
475 | .is_railgated = gp10b_tegra_is_railgated, | ||
476 | |||
477 | .busy = gk20a_tegra_busy, | ||
478 | .idle = gk20a_tegra_idle, | ||
479 | |||
480 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
481 | |||
482 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
483 | .has_cde = true, | ||
484 | #endif | ||
485 | |||
486 | .clk_round_rate = gp10b_round_clk_rate, | ||
487 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
488 | |||
489 | /* frequency scaling configuration */ | ||
490 | .initscale = gp10b_tegra_scale_init, | ||
491 | .prescale = gp10b_tegra_prescale, | ||
492 | .postscale = gp10b_tegra_postscale, | ||
493 | .devfreq_governor = "nvhost_podgov", | ||
494 | |||
495 | .qos_notify = gk20a_scale_qos_notify, | ||
496 | |||
497 | .reset_assert = gp10b_tegra_reset_assert, | ||
498 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
499 | |||
500 | .force_reset_in_do_idle = false, | ||
501 | |||
502 | .soc_name = "tegra18x", | ||
503 | |||
504 | .unified_memory = true, | ||
505 | .dma_mask = DMA_BIT_MASK(36), | ||
506 | |||
507 | .ltc_streamid = TEGRA_SID_GPUB, | ||
508 | |||
509 | .secure_buffer_size = 401408, | ||
510 | }; | ||