diff options
author | Arto Merilainen <amerilainen@nvidia.com> | 2014-03-19 03:38:25 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:08:53 -0400 |
commit | a9785995d5f22aaeb659285f8aeb64d8b56982e0 (patch) | |
tree | cc75f75bcf43db316a002a7a240b81f299bf6d7f /drivers/gpu/nvgpu/gk20a/clk_gk20a.c | |
parent | 61efaf843c22b85424036ec98015121c08f5f16c (diff) |
gpu: nvgpu: Add NVIDIA GPU Driver
This patch moves the NVIDIA GPU driver to a new location.
Bug 1482562
Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/383722
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/clk_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/clk_gk20a.c | 865 |
1 files changed, 865 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/clk_gk20a.c b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c new file mode 100644 index 00000000..151a332b --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/clk_gk20a.c | |||
@@ -0,0 +1,865 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gk20a/clk_gk20a.c | ||
3 | * | ||
4 | * GK20A Clocks | ||
5 | * | ||
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <linux/clk.h> | ||
22 | #include <linux/delay.h> /* for mdelay */ | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/clk/tegra.h> | ||
26 | #include <mach/thermal.h> | ||
27 | |||
28 | #include "gk20a.h" | ||
29 | #include "hw_trim_gk20a.h" | ||
30 | #include "hw_timer_gk20a.h" | ||
31 | |||
32 | #define gk20a_dbg_clk(fmt, arg...) \ | ||
33 | gk20a_dbg(gpu_dbg_clk, fmt, ##arg) | ||
34 | |||
35 | /* from vbios PLL info table */ | ||
36 | struct pll_parms gpc_pll_params = { | ||
37 | 144, 2064, /* freq */ | ||
38 | 1000, 2064, /* vco */ | ||
39 | 12, 38, /* u */ | ||
40 | 1, 255, /* M */ | ||
41 | 8, 255, /* N */ | ||
42 | 1, 32, /* PL */ | ||
43 | }; | ||
44 | |||
45 | static int num_gpu_cooling_freq; | ||
46 | static struct gpufreq_table_data *gpu_cooling_freq; | ||
47 | |||
48 | struct gpufreq_table_data *tegra_gpufreq_table_get(void) | ||
49 | { | ||
50 | return gpu_cooling_freq; | ||
51 | } | ||
52 | |||
53 | unsigned int tegra_gpufreq_table_size_get(void) | ||
54 | { | ||
55 | return num_gpu_cooling_freq; | ||
56 | } | ||
57 | |||
58 | static u8 pl_to_div[] = { | ||
59 | /* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */ | ||
60 | /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 }; | ||
61 | |||
62 | /* Calculate and update M/N/PL as well as pll->freq | ||
63 | ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a) | ||
64 | u_f = ref_clk_f / M; | ||
65 | PLL output = vco_f = u_f * N = ref_clk_f * N / M; | ||
66 | gpc2clk = target clock frequency = vco_f / PL; | ||
67 | gpcclk = gpc2clk / 2; */ | ||
68 | static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll, | ||
69 | struct pll_parms *pll_params, u32 *target_freq, bool best_fit) | ||
70 | { | ||
71 | u32 min_vco_f, max_vco_f; | ||
72 | u32 best_M, best_N; | ||
73 | u32 low_PL, high_PL, best_PL; | ||
74 | u32 m, n, n2; | ||
75 | u32 target_vco_f, vco_f; | ||
76 | u32 ref_clk_f, target_clk_f, u_f; | ||
77 | u32 delta, lwv, best_delta = ~0; | ||
78 | int pl; | ||
79 | |||
80 | BUG_ON(target_freq == NULL); | ||
81 | |||
82 | gk20a_dbg_fn("request target freq %d MHz", *target_freq); | ||
83 | |||
84 | ref_clk_f = pll->clk_in; | ||
85 | target_clk_f = *target_freq; | ||
86 | max_vco_f = pll_params->max_vco; | ||
87 | min_vco_f = pll_params->min_vco; | ||
88 | best_M = pll_params->max_M; | ||
89 | best_N = pll_params->min_N; | ||
90 | best_PL = pll_params->min_PL; | ||
91 | |||
92 | target_vco_f = target_clk_f + target_clk_f / 50; | ||
93 | if (max_vco_f < target_vco_f) | ||
94 | max_vco_f = target_vco_f; | ||
95 | |||
96 | high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f; | ||
97 | high_PL = min(high_PL, pll_params->max_PL); | ||
98 | high_PL = max(high_PL, pll_params->min_PL); | ||
99 | |||
100 | low_PL = min_vco_f / target_vco_f; | ||
101 | low_PL = min(low_PL, pll_params->max_PL); | ||
102 | low_PL = max(low_PL, pll_params->min_PL); | ||
103 | |||
104 | /* Find Indices of high_PL and low_PL */ | ||
105 | for (pl = 0; pl < 14; pl++) { | ||
106 | if (pl_to_div[pl] >= low_PL) { | ||
107 | low_PL = pl; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | for (pl = 0; pl < 14; pl++) { | ||
112 | if (pl_to_div[pl] >= high_PL) { | ||
113 | high_PL = pl; | ||
114 | break; | ||
115 | } | ||
116 | } | ||
117 | gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)", | ||
118 | low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]); | ||
119 | |||
120 | for (pl = low_PL; pl <= high_PL; pl++) { | ||
121 | target_vco_f = target_clk_f * pl_to_div[pl]; | ||
122 | |||
123 | for (m = pll_params->min_M; m <= pll_params->max_M; m++) { | ||
124 | u_f = ref_clk_f / m; | ||
125 | |||
126 | if (u_f < pll_params->min_u) | ||
127 | break; | ||
128 | if (u_f > pll_params->max_u) | ||
129 | continue; | ||
130 | |||
131 | n = (target_vco_f * m) / ref_clk_f; | ||
132 | n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; | ||
133 | |||
134 | if (n > pll_params->max_N) | ||
135 | break; | ||
136 | |||
137 | for (; n <= n2; n++) { | ||
138 | if (n < pll_params->min_N) | ||
139 | continue; | ||
140 | if (n > pll_params->max_N) | ||
141 | break; | ||
142 | |||
143 | vco_f = ref_clk_f * n / m; | ||
144 | |||
145 | if (vco_f >= min_vco_f && vco_f <= max_vco_f) { | ||
146 | lwv = (vco_f + (pl_to_div[pl] / 2)) | ||
147 | / pl_to_div[pl]; | ||
148 | delta = abs(lwv - target_clk_f); | ||
149 | |||
150 | if (delta < best_delta) { | ||
151 | best_delta = delta; | ||
152 | best_M = m; | ||
153 | best_N = n; | ||
154 | best_PL = pl; | ||
155 | |||
156 | if (best_delta == 0 || | ||
157 | /* 0.45% for non best fit */ | ||
158 | (!best_fit && (vco_f / best_delta > 218))) { | ||
159 | goto found_match; | ||
160 | } | ||
161 | |||
162 | gk20a_dbg_info("delta %d @ M %d, N %d, PL %d", | ||
163 | delta, m, n, pl); | ||
164 | } | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | } | ||
169 | |||
170 | found_match: | ||
171 | BUG_ON(best_delta == ~0); | ||
172 | |||
173 | if (best_fit && best_delta != 0) | ||
174 | gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll", | ||
175 | target_clk_f); | ||
176 | |||
177 | pll->M = best_M; | ||
178 | pll->N = best_N; | ||
179 | pll->PL = best_PL; | ||
180 | |||
181 | /* save current frequency */ | ||
182 | pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]); | ||
183 | |||
184 | *target_freq = pll->freq; | ||
185 | |||
186 | gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)", | ||
187 | *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]); | ||
188 | |||
189 | gk20a_dbg_fn("done"); | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static int clk_slide_gpc_pll(struct gk20a *g, u32 n) | ||
195 | { | ||
196 | u32 data, coeff; | ||
197 | u32 nold; | ||
198 | int ramp_timeout = 500; | ||
199 | |||
200 | /* get old coefficients */ | ||
201 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
202 | nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
203 | |||
204 | /* do nothing if NDIV is same */ | ||
205 | if (n == nold) | ||
206 | return 0; | ||
207 | |||
208 | /* setup */ | ||
209 | data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
210 | data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), | ||
211 | trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b)); | ||
212 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); | ||
213 | data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
214 | data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), | ||
215 | trim_sys_gpcpll_cfg3_pll_stepb_f(0xb)); | ||
216 | gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); | ||
217 | |||
218 | /* pll slowdown mode */ | ||
219 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
220 | data = set_field(data, | ||
221 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
222 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); | ||
223 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
224 | |||
225 | /* new ndiv ready for ramp */ | ||
226 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
227 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), | ||
228 | trim_sys_gpcpll_coeff_ndiv_f(n)); | ||
229 | udelay(1); | ||
230 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
231 | |||
232 | /* dynamic ramp to new ndiv */ | ||
233 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
234 | data = set_field(data, | ||
235 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
236 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); | ||
237 | udelay(1); | ||
238 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
239 | |||
240 | do { | ||
241 | udelay(1); | ||
242 | ramp_timeout--; | ||
243 | data = gk20a_readl( | ||
244 | g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); | ||
245 | if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) | ||
246 | break; | ||
247 | } while (ramp_timeout > 0); | ||
248 | |||
249 | /* exit slowdown mode */ | ||
250 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
251 | data = set_field(data, | ||
252 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
253 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); | ||
254 | data = set_field(data, | ||
255 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
256 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); | ||
257 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
258 | gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
259 | |||
260 | if (ramp_timeout <= 0) { | ||
261 | gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout"); | ||
262 | return -ETIMEDOUT; | ||
263 | } | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk, | ||
268 | int allow_slide) | ||
269 | { | ||
270 | u32 data, cfg, coeff, timeout; | ||
271 | u32 m, n, pl; | ||
272 | u32 nlo; | ||
273 | |||
274 | gk20a_dbg_fn(""); | ||
275 | |||
276 | if (!tegra_platform_is_silicon()) | ||
277 | return 0; | ||
278 | |||
279 | /* get old coefficients */ | ||
280 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
281 | m = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
282 | n = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
283 | pl = trim_sys_gpcpll_coeff_pldiv_v(coeff); | ||
284 | |||
285 | /* do NDIV slide if there is no change in M and PL */ | ||
286 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
287 | if (allow_slide && clk->gpc_pll.M == m && clk->gpc_pll.PL == pl | ||
288 | && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
289 | return clk_slide_gpc_pll(g, clk->gpc_pll.N); | ||
290 | } | ||
291 | |||
292 | /* slide down to NDIV_LO */ | ||
293 | nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in); | ||
294 | if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
295 | int ret = clk_slide_gpc_pll(g, nlo); | ||
296 | if (ret) | ||
297 | return ret; | ||
298 | } | ||
299 | |||
300 | /* split FO-to-bypass jump in halfs by setting out divider 1:2 */ | ||
301 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
302 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
303 | trim_sys_gpc2clk_out_vcodiv_f(2)); | ||
304 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
305 | |||
306 | /* put PLL in bypass before programming it */ | ||
307 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
308 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
309 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
310 | udelay(2); | ||
311 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
312 | |||
313 | /* get out from IDDQ */ | ||
314 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
315 | if (trim_sys_gpcpll_cfg_iddq_v(cfg)) { | ||
316 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(), | ||
317 | trim_sys_gpcpll_cfg_iddq_power_on_v()); | ||
318 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
319 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
320 | udelay(2); | ||
321 | } | ||
322 | |||
323 | /* disable PLL before changing coefficients */ | ||
324 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
325 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
326 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
327 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
328 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
329 | |||
330 | /* change coefficients */ | ||
331 | nlo = DIV_ROUND_UP(clk->gpc_pll.M * gpc_pll_params.min_vco, | ||
332 | clk->gpc_pll.clk_in); | ||
333 | coeff = trim_sys_gpcpll_coeff_mdiv_f(clk->gpc_pll.M) | | ||
334 | trim_sys_gpcpll_coeff_ndiv_f(allow_slide ? | ||
335 | nlo : clk->gpc_pll.N) | | ||
336 | trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL); | ||
337 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
338 | |||
339 | /* enable PLL after changing coefficients */ | ||
340 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
341 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
342 | trim_sys_gpcpll_cfg_enable_yes_f()); | ||
343 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
344 | |||
345 | /* lock pll */ | ||
346 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
347 | if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){ | ||
348 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(), | ||
349 | trim_sys_gpcpll_cfg_enb_lckdet_power_on_f()); | ||
350 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
351 | } | ||
352 | |||
353 | /* wait pll lock */ | ||
354 | timeout = clk->pll_delay / 2 + 1; | ||
355 | do { | ||
356 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
357 | if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f()) | ||
358 | goto pll_locked; | ||
359 | udelay(2); | ||
360 | } while (--timeout > 0); | ||
361 | |||
362 | /* PLL is messed up. What can we do here? */ | ||
363 | BUG(); | ||
364 | return -EBUSY; | ||
365 | |||
366 | pll_locked: | ||
367 | /* put PLL back on vco */ | ||
368 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
369 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
370 | trim_sys_sel_vco_gpc2clk_out_vco_f()); | ||
371 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
372 | clk->gpc_pll.enabled = true; | ||
373 | |||
374 | /* restore out divider 1:1 */ | ||
375 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
376 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
377 | trim_sys_gpc2clk_out_vcodiv_by1_f()); | ||
378 | udelay(2); | ||
379 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
380 | |||
381 | /* slide up to target NDIV */ | ||
382 | return clk_slide_gpc_pll(g, clk->gpc_pll.N); | ||
383 | } | ||
384 | |||
385 | static int clk_disable_gpcpll(struct gk20a *g, int allow_slide) | ||
386 | { | ||
387 | u32 cfg, coeff, m, nlo; | ||
388 | struct clk_gk20a *clk = &g->clk; | ||
389 | |||
390 | /* slide to VCO min */ | ||
391 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
392 | if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
393 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
394 | m = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
395 | nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, | ||
396 | clk->gpc_pll.clk_in); | ||
397 | clk_slide_gpc_pll(g, nlo); | ||
398 | } | ||
399 | |||
400 | /* put PLL in bypass before disabling it */ | ||
401 | cfg = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
402 | cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(), | ||
403 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
404 | gk20a_writel(g, trim_sys_sel_vco_r(), cfg); | ||
405 | |||
406 | /* disable PLL */ | ||
407 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
408 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
409 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
410 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
411 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
412 | |||
413 | clk->gpc_pll.enabled = false; | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | static int gk20a_init_clk_reset_enable_hw(struct gk20a *g) | ||
418 | { | ||
419 | gk20a_dbg_fn(""); | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | struct clk *gk20a_clk_get(struct gk20a *g) | ||
424 | { | ||
425 | if (!g->clk.tegra_clk) { | ||
426 | struct clk *clk; | ||
427 | |||
428 | clk = clk_get_sys("tegra_gk20a", "gpu"); | ||
429 | if (IS_ERR(clk)) { | ||
430 | gk20a_err(dev_from_gk20a(g), | ||
431 | "fail to get tegra gpu clk tegra_gk20a/gpu"); | ||
432 | return NULL; | ||
433 | } | ||
434 | g->clk.tegra_clk = clk; | ||
435 | } | ||
436 | |||
437 | return g->clk.tegra_clk; | ||
438 | } | ||
439 | |||
440 | static int gk20a_init_clk_setup_sw(struct gk20a *g) | ||
441 | { | ||
442 | struct clk_gk20a *clk = &g->clk; | ||
443 | static int initialized; | ||
444 | unsigned long *freqs; | ||
445 | int err, num_freqs; | ||
446 | struct clk *ref; | ||
447 | unsigned long ref_rate; | ||
448 | |||
449 | gk20a_dbg_fn(""); | ||
450 | |||
451 | if (clk->sw_ready) { | ||
452 | gk20a_dbg_fn("skip init"); | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | if (!gk20a_clk_get(g)) | ||
457 | return -EINVAL; | ||
458 | |||
459 | ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); | ||
460 | if (IS_ERR(ref)) { | ||
461 | gk20a_err(dev_from_gk20a(g), | ||
462 | "failed to get GPCPLL reference clock"); | ||
463 | return -EINVAL; | ||
464 | } | ||
465 | ref_rate = clk_get_rate(ref); | ||
466 | |||
467 | clk->pll_delay = 300; /* usec */ | ||
468 | |||
469 | clk->gpc_pll.id = GK20A_GPC_PLL; | ||
470 | clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */ | ||
471 | |||
472 | /* Decide initial frequency */ | ||
473 | if (!initialized) { | ||
474 | initialized = 1; | ||
475 | clk->gpc_pll.M = 1; | ||
476 | clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, | ||
477 | clk->gpc_pll.clk_in); | ||
478 | clk->gpc_pll.PL = 1; | ||
479 | clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; | ||
480 | clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; | ||
481 | } | ||
482 | |||
483 | err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk), | ||
484 | &freqs, &num_freqs); | ||
485 | if (!err) { | ||
486 | int i, j; | ||
487 | |||
488 | /* init j for inverse traversal of frequencies */ | ||
489 | j = num_freqs - 1; | ||
490 | |||
491 | gpu_cooling_freq = kzalloc( | ||
492 | (1 + num_freqs) * sizeof(*gpu_cooling_freq), | ||
493 | GFP_KERNEL); | ||
494 | |||
495 | /* store frequencies in inverse order */ | ||
496 | for (i = 0; i < num_freqs; ++i, --j) { | ||
497 | gpu_cooling_freq[i].index = i; | ||
498 | gpu_cooling_freq[i].frequency = freqs[j]; | ||
499 | } | ||
500 | |||
501 | /* add 'end of table' marker */ | ||
502 | gpu_cooling_freq[i].index = i; | ||
503 | gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END; | ||
504 | |||
505 | /* store number of frequencies */ | ||
506 | num_gpu_cooling_freq = num_freqs + 1; | ||
507 | } | ||
508 | |||
509 | mutex_init(&clk->clk_mutex); | ||
510 | |||
511 | clk->sw_ready = true; | ||
512 | |||
513 | gk20a_dbg_fn("done"); | ||
514 | return 0; | ||
515 | } | ||
516 | |||
517 | static int gk20a_init_clk_setup_hw(struct gk20a *g) | ||
518 | { | ||
519 | u32 data; | ||
520 | |||
521 | gk20a_dbg_fn(""); | ||
522 | |||
523 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
524 | data = set_field(data, | ||
525 | trim_sys_gpc2clk_out_sdiv14_m() | | ||
526 | trim_sys_gpc2clk_out_vcodiv_m() | | ||
527 | trim_sys_gpc2clk_out_bypdiv_m(), | ||
528 | trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() | | ||
529 | trim_sys_gpc2clk_out_vcodiv_by1_f() | | ||
530 | trim_sys_gpc2clk_out_bypdiv_f(0)); | ||
531 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) | ||
537 | { | ||
538 | struct clk_gk20a *clk = &g->clk; | ||
539 | |||
540 | if (freq > gpc_pll_params.max_freq) | ||
541 | freq = gpc_pll_params.max_freq; | ||
542 | else if (freq < gpc_pll_params.min_freq) | ||
543 | freq = gpc_pll_params.min_freq; | ||
544 | |||
545 | if (freq != old_freq) { | ||
546 | /* gpc_pll.freq is changed to new value here */ | ||
547 | if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, | ||
548 | &freq, true)) { | ||
549 | gk20a_err(dev_from_gk20a(g), | ||
550 | "failed to set pll target for %d", freq); | ||
551 | return -EINVAL; | ||
552 | } | ||
553 | } | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq) | ||
558 | { | ||
559 | struct clk_gk20a *clk = &g->clk; | ||
560 | int err = 0; | ||
561 | |||
562 | gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq); | ||
563 | |||
564 | if ((freq == old_freq) && clk->gpc_pll.enabled) | ||
565 | return 0; | ||
566 | |||
567 | /* change frequency only if power is on */ | ||
568 | if (g->clk.clk_hw_on) { | ||
569 | err = clk_program_gpc_pll(g, clk, 1); | ||
570 | if (err) | ||
571 | err = clk_program_gpc_pll(g, clk, 0); | ||
572 | } | ||
573 | |||
574 | /* Just report error but not restore PLL since dvfs could already change | ||
575 | voltage even when it returns error. */ | ||
576 | if (err) | ||
577 | gk20a_err(dev_from_gk20a(g), | ||
578 | "failed to set pll to %d", freq); | ||
579 | return err; | ||
580 | } | ||
581 | |||
582 | static int gk20a_clk_export_set_rate(void *data, unsigned long *rate) | ||
583 | { | ||
584 | u32 old_freq; | ||
585 | int ret = -ENODATA; | ||
586 | struct gk20a *g = data; | ||
587 | struct clk_gk20a *clk = &g->clk; | ||
588 | |||
589 | if (rate) { | ||
590 | mutex_lock(&clk->clk_mutex); | ||
591 | old_freq = clk->gpc_pll.freq; | ||
592 | ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq); | ||
593 | if (!ret && clk->gpc_pll.enabled) | ||
594 | ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq); | ||
595 | if (!ret) | ||
596 | *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
597 | mutex_unlock(&clk->clk_mutex); | ||
598 | } | ||
599 | return ret; | ||
600 | } | ||
601 | |||
602 | static int gk20a_clk_export_enable(void *data) | ||
603 | { | ||
604 | int ret; | ||
605 | struct gk20a *g = data; | ||
606 | struct clk_gk20a *clk = &g->clk; | ||
607 | |||
608 | mutex_lock(&clk->clk_mutex); | ||
609 | ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); | ||
610 | mutex_unlock(&clk->clk_mutex); | ||
611 | return ret; | ||
612 | } | ||
613 | |||
614 | static void gk20a_clk_export_disable(void *data) | ||
615 | { | ||
616 | struct gk20a *g = data; | ||
617 | struct clk_gk20a *clk = &g->clk; | ||
618 | |||
619 | mutex_lock(&clk->clk_mutex); | ||
620 | if (g->clk.clk_hw_on) | ||
621 | clk_disable_gpcpll(g, 1); | ||
622 | mutex_unlock(&clk->clk_mutex); | ||
623 | } | ||
624 | |||
625 | static void gk20a_clk_export_init(void *data, unsigned long *rate, bool *state) | ||
626 | { | ||
627 | struct gk20a *g = data; | ||
628 | struct clk_gk20a *clk = &g->clk; | ||
629 | |||
630 | mutex_lock(&clk->clk_mutex); | ||
631 | if (state) | ||
632 | *state = clk->gpc_pll.enabled; | ||
633 | if (rate) | ||
634 | *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
635 | mutex_unlock(&clk->clk_mutex); | ||
636 | } | ||
637 | |||
638 | static struct tegra_clk_export_ops gk20a_clk_export_ops = { | ||
639 | .init = gk20a_clk_export_init, | ||
640 | .enable = gk20a_clk_export_enable, | ||
641 | .disable = gk20a_clk_export_disable, | ||
642 | .set_rate = gk20a_clk_export_set_rate, | ||
643 | }; | ||
644 | |||
645 | static int gk20a_clk_register_export_ops(struct gk20a *g) | ||
646 | { | ||
647 | int ret; | ||
648 | struct clk *c; | ||
649 | |||
650 | if (gk20a_clk_export_ops.data) | ||
651 | return 0; | ||
652 | |||
653 | gk20a_clk_export_ops.data = (void *)g; | ||
654 | c = g->clk.tegra_clk; | ||
655 | if (!c || !clk_get_parent(c)) | ||
656 | return -ENOSYS; | ||
657 | |||
658 | ret = tegra_clk_register_export_ops(clk_get_parent(c), | ||
659 | &gk20a_clk_export_ops); | ||
660 | |||
661 | return ret; | ||
662 | } | ||
663 | |||
664 | int gk20a_init_clk_support(struct gk20a *g) | ||
665 | { | ||
666 | struct clk_gk20a *clk = &g->clk; | ||
667 | u32 err; | ||
668 | |||
669 | gk20a_dbg_fn(""); | ||
670 | |||
671 | clk->g = g; | ||
672 | |||
673 | err = gk20a_init_clk_reset_enable_hw(g); | ||
674 | if (err) | ||
675 | return err; | ||
676 | |||
677 | err = gk20a_init_clk_setup_sw(g); | ||
678 | if (err) | ||
679 | return err; | ||
680 | |||
681 | mutex_lock(&clk->clk_mutex); | ||
682 | clk->clk_hw_on = true; | ||
683 | |||
684 | err = gk20a_init_clk_setup_hw(g); | ||
685 | mutex_unlock(&clk->clk_mutex); | ||
686 | if (err) | ||
687 | return err; | ||
688 | |||
689 | err = gk20a_clk_register_export_ops(g); | ||
690 | if (err) | ||
691 | return err; | ||
692 | |||
693 | /* FIXME: this effectively prevents host level clock gating */ | ||
694 | err = clk_enable(g->clk.tegra_clk); | ||
695 | if (err) | ||
696 | return err; | ||
697 | |||
698 | /* The prev call may not enable PLL if gbus is unbalanced - force it */ | ||
699 | mutex_lock(&clk->clk_mutex); | ||
700 | err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq); | ||
701 | mutex_unlock(&clk->clk_mutex); | ||
702 | if (err) | ||
703 | return err; | ||
704 | |||
705 | return err; | ||
706 | } | ||
707 | |||
708 | unsigned long gk20a_clk_get_rate(struct gk20a *g) | ||
709 | { | ||
710 | struct clk_gk20a *clk = &g->clk; | ||
711 | return rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
712 | } | ||
713 | |||
714 | long gk20a_clk_round_rate(struct gk20a *g, unsigned long rate) | ||
715 | { | ||
716 | /* make sure the clock is available */ | ||
717 | if (!gk20a_clk_get(g)) | ||
718 | return rate; | ||
719 | |||
720 | return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); | ||
721 | } | ||
722 | |||
723 | int gk20a_clk_set_rate(struct gk20a *g, unsigned long rate) | ||
724 | { | ||
725 | return clk_set_rate(g->clk.tegra_clk, rate); | ||
726 | } | ||
727 | |||
728 | int gk20a_suspend_clk_support(struct gk20a *g) | ||
729 | { | ||
730 | int ret; | ||
731 | |||
732 | clk_disable(g->clk.tegra_clk); | ||
733 | |||
734 | /* The prev call may not disable PLL if gbus is unbalanced - force it */ | ||
735 | mutex_lock(&g->clk.clk_mutex); | ||
736 | ret = clk_disable_gpcpll(g, 1); | ||
737 | g->clk.clk_hw_on = false; | ||
738 | mutex_unlock(&g->clk.clk_mutex); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | #ifdef CONFIG_DEBUG_FS | ||
743 | |||
744 | static int rate_get(void *data, u64 *val) | ||
745 | { | ||
746 | struct gk20a *g = (struct gk20a *)data; | ||
747 | *val = (u64)gk20a_clk_get_rate(g); | ||
748 | return 0; | ||
749 | } | ||
750 | static int rate_set(void *data, u64 val) | ||
751 | { | ||
752 | struct gk20a *g = (struct gk20a *)data; | ||
753 | return gk20a_clk_set_rate(g, (u32)val); | ||
754 | } | ||
755 | DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); | ||
756 | |||
757 | static int pll_reg_show(struct seq_file *s, void *data) | ||
758 | { | ||
759 | struct gk20a *g = s->private; | ||
760 | u32 reg, m, n, pl, f; | ||
761 | |||
762 | mutex_lock(&g->clk.clk_mutex); | ||
763 | if (!g->clk.clk_hw_on) { | ||
764 | seq_printf(s, "gk20a powered down - no access to registers\n"); | ||
765 | mutex_unlock(&g->clk.clk_mutex); | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
770 | seq_printf(s, "cfg = 0x%x : %s : %s\n", reg, | ||
771 | trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled", | ||
772 | trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked"); | ||
773 | |||
774 | reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
775 | m = trim_sys_gpcpll_coeff_mdiv_v(reg); | ||
776 | n = trim_sys_gpcpll_coeff_ndiv_v(reg); | ||
777 | pl = trim_sys_gpcpll_coeff_pldiv_v(reg); | ||
778 | f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]); | ||
779 | seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); | ||
780 | seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2); | ||
781 | mutex_unlock(&g->clk.clk_mutex); | ||
782 | return 0; | ||
783 | } | ||
784 | |||
785 | static int pll_reg_open(struct inode *inode, struct file *file) | ||
786 | { | ||
787 | return single_open(file, pll_reg_show, inode->i_private); | ||
788 | } | ||
789 | |||
790 | static const struct file_operations pll_reg_fops = { | ||
791 | .open = pll_reg_open, | ||
792 | .read = seq_read, | ||
793 | .llseek = seq_lseek, | ||
794 | .release = single_release, | ||
795 | }; | ||
796 | |||
797 | static int monitor_get(void *data, u64 *val) | ||
798 | { | ||
799 | struct gk20a *g = (struct gk20a *)data; | ||
800 | struct clk_gk20a *clk = &g->clk; | ||
801 | int err; | ||
802 | |||
803 | u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */ | ||
804 | u32 clkin = clk->gpc_pll.clk_in; | ||
805 | u32 count1, count2; | ||
806 | |||
807 | err = gk20a_busy(g->dev); | ||
808 | if (err) | ||
809 | return err; | ||
810 | |||
811 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
812 | trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f()); | ||
813 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
814 | trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() | | ||
815 | trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() | | ||
816 | trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle)); | ||
817 | /* start */ | ||
818 | |||
819 | /* It should take about 8us to finish 100 cycle of 12MHz. | ||
820 | But longer than 100us delay is required here. */ | ||
821 | gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0)); | ||
822 | udelay(2000); | ||
823 | |||
824 | count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
825 | udelay(100); | ||
826 | count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
827 | *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle); | ||
828 | gk20a_idle(g->dev); | ||
829 | |||
830 | if (count1 != count2) | ||
831 | return -EBUSY; | ||
832 | return 0; | ||
833 | } | ||
834 | DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); | ||
835 | |||
836 | int clk_gk20a_debugfs_init(struct platform_device *dev) | ||
837 | { | ||
838 | struct dentry *d; | ||
839 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
840 | struct gk20a *g = get_gk20a(dev); | ||
841 | |||
842 | d = debugfs_create_file( | ||
843 | "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops); | ||
844 | if (!d) | ||
845 | goto err_out; | ||
846 | |||
847 | d = debugfs_create_file( | ||
848 | "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops); | ||
849 | if (!d) | ||
850 | goto err_out; | ||
851 | |||
852 | d = debugfs_create_file( | ||
853 | "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops); | ||
854 | if (!d) | ||
855 | goto err_out; | ||
856 | |||
857 | return 0; | ||
858 | |||
859 | err_out: | ||
860 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
861 | debugfs_remove_recursive(platform->debugfs); | ||
862 | return -ENOMEM; | ||
863 | } | ||
864 | |||
865 | #endif /* CONFIG_DEBUG_FS */ | ||