diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
28 files changed, 8728 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c new file mode 100644 index 00000000..a39cdf2c --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -0,0 +1,1444 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/types.h> | ||
24 | #include <nvgpu/dma.h> | ||
25 | #include <nvgpu/gmmu.h> | ||
26 | #include <nvgpu/timers.h> | ||
27 | #include <nvgpu/nvgpu_common.h> | ||
28 | #include <nvgpu/kmem.h> | ||
29 | #include <nvgpu/nvgpu_mem.h> | ||
30 | #include <nvgpu/acr/nvgpu_acr.h> | ||
31 | #include <nvgpu/firmware.h> | ||
32 | #include <nvgpu/pmu.h> | ||
33 | #include <nvgpu/falcon.h> | ||
34 | #include <nvgpu/enabled.h> | ||
35 | #include <nvgpu/mm.h> | ||
36 | |||
37 | #include "gk20a/gk20a.h" | ||
38 | #include "gk20a/pmu_gk20a.h" | ||
39 | #include "mm_gm20b.h" | ||
40 | #include "acr_gm20b.h" | ||
41 | |||
42 | #include <nvgpu/hw/gm20b/hw_pwr_gm20b.h> | ||
43 | |||
44 | /*Defines*/ | ||
45 | #define gm20b_dbg_pmu(fmt, arg...) \ | ||
46 | gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) | ||
47 | |||
48 | typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata); | ||
49 | |||
50 | /*Externs*/ | ||
51 | |||
52 | /*Forwards*/ | ||
53 | static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img); | ||
54 | static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img); | ||
55 | static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img); | ||
56 | static int lsfm_discover_ucode_images(struct gk20a *g, | ||
57 | struct ls_flcn_mgr *plsfm); | ||
58 | static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, | ||
59 | struct flcn_ucode_img *ucode_image, u32 falcon_id); | ||
60 | static void lsfm_free_ucode_img_res(struct gk20a *g, | ||
61 | struct flcn_ucode_img *p_img); | ||
62 | static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g, | ||
63 | struct flcn_ucode_img *p_img); | ||
64 | static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); | ||
65 | static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, | ||
66 | struct nvgpu_mem *nonwpr); | ||
67 | static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm); | ||
68 | |||
69 | /*Globals*/ | ||
70 | static get_ucode_details pmu_acr_supp_ucode_list[] = { | ||
71 | pmu_ucode_details, | ||
72 | fecs_ucode_details, | ||
73 | gpccs_ucode_details, | ||
74 | }; | ||
75 | |||
76 | /*Once is LS mode, cpuctl_alias is only accessible*/ | ||
77 | static void start_gm20b_pmu(struct gk20a *g) | ||
78 | { | ||
79 | /*disable irqs for hs falcon booting as we will poll for halt*/ | ||
80 | nvgpu_mutex_acquire(&g->pmu.isr_mutex); | ||
81 | pmu_enable_irq(&g->pmu, true); | ||
82 | g->pmu.isr_enabled = true; | ||
83 | nvgpu_mutex_release(&g->pmu.isr_mutex); | ||
84 | gk20a_writel(g, pwr_falcon_cpuctl_alias_r(), | ||
85 | pwr_falcon_cpuctl_startcpu_f(1)); | ||
86 | } | ||
87 | |||
88 | void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf) | ||
89 | { | ||
90 | g->ops.fb.read_wpr_info(g, inf); | ||
91 | } | ||
92 | |||
93 | bool gm20b_is_pmu_supported(struct gk20a *g) | ||
94 | { | ||
95 | return true; | ||
96 | } | ||
97 | |||
98 | static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) | ||
99 | { | ||
100 | struct nvgpu_firmware *pmu_fw, *pmu_desc, *pmu_sig; | ||
101 | struct nvgpu_pmu *pmu = &g->pmu; | ||
102 | struct lsf_ucode_desc *lsf_desc; | ||
103 | int err; | ||
104 | gm20b_dbg_pmu("requesting PMU ucode in GM20B\n"); | ||
105 | pmu_fw = nvgpu_request_firmware(g, GM20B_PMU_UCODE_IMAGE, 0); | ||
106 | if (!pmu_fw) { | ||
107 | nvgpu_err(g, "failed to load pmu ucode!!"); | ||
108 | return -ENOENT; | ||
109 | } | ||
110 | g->acr.pmu_fw = pmu_fw; | ||
111 | gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation"); | ||
112 | |||
113 | gm20b_dbg_pmu("requesting PMU ucode desc in GM20B\n"); | ||
114 | pmu_desc = nvgpu_request_firmware(g, GM20B_PMU_UCODE_DESC, 0); | ||
115 | if (!pmu_desc) { | ||
116 | nvgpu_err(g, "failed to load pmu ucode desc!!"); | ||
117 | err = -ENOENT; | ||
118 | goto release_img_fw; | ||
119 | } | ||
120 | pmu_sig = nvgpu_request_firmware(g, GM20B_PMU_UCODE_SIG, 0); | ||
121 | if (!pmu_sig) { | ||
122 | nvgpu_err(g, "failed to load pmu sig!!"); | ||
123 | err = -ENOENT; | ||
124 | goto release_desc; | ||
125 | } | ||
126 | pmu->desc = (struct pmu_ucode_desc *)pmu_desc->data; | ||
127 | pmu->ucode_image = (u32 *)pmu_fw->data; | ||
128 | g->acr.pmu_desc = pmu_desc; | ||
129 | |||
130 | err = nvgpu_init_pmu_fw_support(pmu); | ||
131 | if (err) { | ||
132 | gm20b_dbg_pmu("failed to set function pointers\n"); | ||
133 | goto release_sig; | ||
134 | } | ||
135 | |||
136 | lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); | ||
137 | if (!lsf_desc) { | ||
138 | err = -ENOMEM; | ||
139 | goto release_sig; | ||
140 | } | ||
141 | memcpy(lsf_desc, (void *)pmu_sig->data, sizeof(struct lsf_ucode_desc)); | ||
142 | lsf_desc->falcon_id = LSF_FALCON_ID_PMU; | ||
143 | |||
144 | p_img->desc = pmu->desc; | ||
145 | p_img->data = pmu->ucode_image; | ||
146 | p_img->data_size = pmu->desc->image_size; | ||
147 | p_img->fw_ver = NULL; | ||
148 | p_img->header = NULL; | ||
149 | p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; | ||
150 | gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n"); | ||
151 | nvgpu_release_firmware(g, pmu_sig); | ||
152 | return 0; | ||
153 | release_sig: | ||
154 | nvgpu_release_firmware(g, pmu_sig); | ||
155 | release_desc: | ||
156 | nvgpu_release_firmware(g, pmu_desc); | ||
157 | g->acr.pmu_desc = NULL; | ||
158 | release_img_fw: | ||
159 | nvgpu_release_firmware(g, pmu_fw); | ||
160 | g->acr.pmu_fw = NULL; | ||
161 | return err; | ||
162 | } | ||
163 | |||
164 | static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) | ||
165 | { | ||
166 | struct lsf_ucode_desc *lsf_desc; | ||
167 | struct nvgpu_firmware *fecs_sig; | ||
168 | int err; | ||
169 | |||
170 | fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0); | ||
171 | if (!fecs_sig) { | ||
172 | nvgpu_err(g, "failed to load fecs sig"); | ||
173 | return -ENOENT; | ||
174 | } | ||
175 | lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); | ||
176 | if (!lsf_desc) { | ||
177 | err = -ENOMEM; | ||
178 | goto rel_sig; | ||
179 | } | ||
180 | memcpy(lsf_desc, (void *)fecs_sig->data, sizeof(struct lsf_ucode_desc)); | ||
181 | lsf_desc->falcon_id = LSF_FALCON_ID_FECS; | ||
182 | |||
183 | p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc)); | ||
184 | if (p_img->desc == NULL) { | ||
185 | err = -ENOMEM; | ||
186 | goto free_lsf_desc; | ||
187 | } | ||
188 | |||
189 | p_img->desc->bootloader_start_offset = | ||
190 | g->ctxsw_ucode_info.fecs.boot.offset; | ||
191 | p_img->desc->bootloader_size = | ||
192 | ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256); | ||
193 | p_img->desc->bootloader_imem_offset = | ||
194 | g->ctxsw_ucode_info.fecs.boot_imem_offset; | ||
195 | p_img->desc->bootloader_entry_point = | ||
196 | g->ctxsw_ucode_info.fecs.boot_entry; | ||
197 | |||
198 | p_img->desc->image_size = | ||
199 | ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) + | ||
200 | ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + | ||
201 | ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); | ||
202 | p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + | ||
203 | ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); | ||
204 | p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset; | ||
205 | p_img->desc->app_imem_offset = 0; | ||
206 | p_img->desc->app_imem_entry = 0; | ||
207 | p_img->desc->app_dmem_offset = 0; | ||
208 | p_img->desc->app_resident_code_offset = 0; | ||
209 | p_img->desc->app_resident_code_size = | ||
210 | g->ctxsw_ucode_info.fecs.code.size; | ||
211 | p_img->desc->app_resident_data_offset = | ||
212 | g->ctxsw_ucode_info.fecs.data.offset - | ||
213 | g->ctxsw_ucode_info.fecs.code.offset; | ||
214 | p_img->desc->app_resident_data_size = | ||
215 | g->ctxsw_ucode_info.fecs.data.size; | ||
216 | p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va; | ||
217 | p_img->data_size = p_img->desc->image_size; | ||
218 | |||
219 | p_img->fw_ver = NULL; | ||
220 | p_img->header = NULL; | ||
221 | p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; | ||
222 | gm20b_dbg_pmu("fecs fw loaded\n"); | ||
223 | nvgpu_release_firmware(g, fecs_sig); | ||
224 | return 0; | ||
225 | free_lsf_desc: | ||
226 | nvgpu_kfree(g, lsf_desc); | ||
227 | rel_sig: | ||
228 | nvgpu_release_firmware(g, fecs_sig); | ||
229 | return err; | ||
230 | } | ||
231 | static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) | ||
232 | { | ||
233 | struct lsf_ucode_desc *lsf_desc; | ||
234 | struct nvgpu_firmware *gpccs_sig; | ||
235 | int err; | ||
236 | |||
237 | if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) | ||
238 | return -ENOENT; | ||
239 | |||
240 | gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, 0); | ||
241 | if (!gpccs_sig) { | ||
242 | nvgpu_err(g, "failed to load gpccs sig"); | ||
243 | return -ENOENT; | ||
244 | } | ||
245 | lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc)); | ||
246 | if (!lsf_desc) { | ||
247 | err = -ENOMEM; | ||
248 | goto rel_sig; | ||
249 | } | ||
250 | memcpy(lsf_desc, (void *)gpccs_sig->data, | ||
251 | sizeof(struct lsf_ucode_desc)); | ||
252 | lsf_desc->falcon_id = LSF_FALCON_ID_GPCCS; | ||
253 | |||
254 | p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc)); | ||
255 | if (p_img->desc == NULL) { | ||
256 | err = -ENOMEM; | ||
257 | goto free_lsf_desc; | ||
258 | } | ||
259 | |||
260 | p_img->desc->bootloader_start_offset = | ||
261 | 0; | ||
262 | p_img->desc->bootloader_size = | ||
263 | ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256); | ||
264 | p_img->desc->bootloader_imem_offset = | ||
265 | g->ctxsw_ucode_info.gpccs.boot_imem_offset; | ||
266 | p_img->desc->bootloader_entry_point = | ||
267 | g->ctxsw_ucode_info.gpccs.boot_entry; | ||
268 | |||
269 | p_img->desc->image_size = | ||
270 | ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) + | ||
271 | ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) + | ||
272 | ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); | ||
273 | p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) | ||
274 | + ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); | ||
275 | p_img->desc->app_start_offset = p_img->desc->bootloader_size; | ||
276 | p_img->desc->app_imem_offset = 0; | ||
277 | p_img->desc->app_imem_entry = 0; | ||
278 | p_img->desc->app_dmem_offset = 0; | ||
279 | p_img->desc->app_resident_code_offset = 0; | ||
280 | p_img->desc->app_resident_code_size = | ||
281 | ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256); | ||
282 | p_img->desc->app_resident_data_offset = | ||
283 | ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) - | ||
284 | ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256); | ||
285 | p_img->desc->app_resident_data_size = | ||
286 | ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); | ||
287 | p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va + | ||
288 | g->ctxsw_ucode_info.gpccs.boot.offset); | ||
289 | p_img->data_size = ALIGN(p_img->desc->image_size, 256); | ||
290 | p_img->fw_ver = NULL; | ||
291 | p_img->header = NULL; | ||
292 | p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; | ||
293 | gm20b_dbg_pmu("gpccs fw loaded\n"); | ||
294 | nvgpu_release_firmware(g, gpccs_sig); | ||
295 | return 0; | ||
296 | free_lsf_desc: | ||
297 | nvgpu_kfree(g, lsf_desc); | ||
298 | rel_sig: | ||
299 | nvgpu_release_firmware(g, gpccs_sig); | ||
300 | return err; | ||
301 | } | ||
302 | |||
303 | bool gm20b_is_lazy_bootstrap(u32 falcon_id) | ||
304 | { | ||
305 | bool enable_status = false; | ||
306 | |||
307 | switch (falcon_id) { | ||
308 | case LSF_FALCON_ID_FECS: | ||
309 | enable_status = false; | ||
310 | break; | ||
311 | case LSF_FALCON_ID_GPCCS: | ||
312 | enable_status = false; | ||
313 | break; | ||
314 | default: | ||
315 | break; | ||
316 | } | ||
317 | |||
318 | return enable_status; | ||
319 | } | ||
320 | |||
321 | bool gm20b_is_priv_load(u32 falcon_id) | ||
322 | { | ||
323 | bool enable_status = false; | ||
324 | |||
325 | switch (falcon_id) { | ||
326 | case LSF_FALCON_ID_FECS: | ||
327 | enable_status = false; | ||
328 | break; | ||
329 | case LSF_FALCON_ID_GPCCS: | ||
330 | enable_status = false; | ||
331 | break; | ||
332 | default: | ||
333 | break; | ||
334 | } | ||
335 | |||
336 | return enable_status; | ||
337 | } | ||
338 | |||
339 | int gm20b_alloc_blob_space(struct gk20a *g, | ||
340 | size_t size, struct nvgpu_mem *mem) | ||
341 | { | ||
342 | int err; | ||
343 | |||
344 | err = nvgpu_dma_alloc_sys(g, size, mem); | ||
345 | |||
346 | return err; | ||
347 | } | ||
348 | |||
349 | int prepare_ucode_blob(struct gk20a *g) | ||
350 | { | ||
351 | |||
352 | int err; | ||
353 | struct ls_flcn_mgr lsfm_l, *plsfm; | ||
354 | struct nvgpu_pmu *pmu = &g->pmu; | ||
355 | struct wpr_carveout_info wpr_inf; | ||
356 | |||
357 | if (g->acr.ucode_blob.cpu_va) { | ||
358 | /*Recovery case, we do not need to form | ||
359 | non WPR blob of ucodes*/ | ||
360 | err = nvgpu_init_pmu_fw_support(pmu); | ||
361 | if (err) { | ||
362 | gm20b_dbg_pmu("failed to set function pointers\n"); | ||
363 | return err; | ||
364 | } | ||
365 | return 0; | ||
366 | } | ||
367 | plsfm = &lsfm_l; | ||
368 | memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr)); | ||
369 | gm20b_dbg_pmu("fetching GMMU regs\n"); | ||
370 | g->ops.fb.vpr_info_fetch(g); | ||
371 | gr_gk20a_init_ctxsw_ucode(g); | ||
372 | |||
373 | g->ops.pmu.get_wpr(g, &wpr_inf); | ||
374 | gm20b_dbg_pmu("wpr carveout base:%llx\n", wpr_inf.wpr_base); | ||
375 | gm20b_dbg_pmu("wpr carveout size :%llx\n", wpr_inf.size); | ||
376 | |||
377 | /* Discover all managed falcons*/ | ||
378 | err = lsfm_discover_ucode_images(g, plsfm); | ||
379 | gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt); | ||
380 | if (err) | ||
381 | goto free_sgt; | ||
382 | |||
383 | if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) { | ||
384 | /* Generate WPR requirements*/ | ||
385 | err = lsf_gen_wpr_requirements(g, plsfm); | ||
386 | if (err) | ||
387 | goto free_sgt; | ||
388 | |||
389 | /*Alloc memory to hold ucode blob contents*/ | ||
390 | err = g->ops.pmu.alloc_blob_space(g, plsfm->wpr_size | ||
391 | , &g->acr.ucode_blob); | ||
392 | if (err) | ||
393 | goto free_sgt; | ||
394 | |||
395 | gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", | ||
396 | plsfm->managed_flcn_cnt, plsfm->wpr_size); | ||
397 | lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob); | ||
398 | } else { | ||
399 | gm20b_dbg_pmu("LSFM is managing no falcons.\n"); | ||
400 | } | ||
401 | gm20b_dbg_pmu("prepare ucode blob return 0\n"); | ||
402 | free_acr_resources(g, plsfm); | ||
403 | free_sgt: | ||
404 | return err; | ||
405 | } | ||
406 | |||
407 | static u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm, | ||
408 | u32 falcon_id) | ||
409 | { | ||
410 | return (plsfm->disable_mask >> falcon_id) & 0x1; | ||
411 | } | ||
412 | |||
413 | /* Discover all managed falcon ucode images */ | ||
414 | static int lsfm_discover_ucode_images(struct gk20a *g, | ||
415 | struct ls_flcn_mgr *plsfm) | ||
416 | { | ||
417 | struct nvgpu_pmu *pmu = &g->pmu; | ||
418 | struct flcn_ucode_img ucode_img; | ||
419 | u32 falcon_id; | ||
420 | u32 i; | ||
421 | int status; | ||
422 | |||
423 | /* LSFM requires a secure PMU, discover it first.*/ | ||
424 | /* Obtain the PMU ucode image and add it to the list if required*/ | ||
425 | memset(&ucode_img, 0, sizeof(ucode_img)); | ||
426 | status = pmu_ucode_details(g, &ucode_img); | ||
427 | if (status) | ||
428 | return status; | ||
429 | |||
430 | /* The falon_id is formed by grabbing the static base | ||
431 | * falon_id from the image and adding the | ||
432 | * engine-designated falcon instance.*/ | ||
433 | pmu->pmu_mode |= PMU_SECURE_MODE; | ||
434 | falcon_id = ucode_img.lsf_desc->falcon_id + | ||
435 | ucode_img.flcn_inst; | ||
436 | |||
437 | if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) { | ||
438 | pmu->falcon_id = falcon_id; | ||
439 | if (lsfm_add_ucode_img(g, plsfm, &ucode_img, | ||
440 | pmu->falcon_id) == 0) | ||
441 | pmu->pmu_mode |= PMU_LSFM_MANAGED; | ||
442 | |||
443 | plsfm->managed_flcn_cnt++; | ||
444 | } else { | ||
445 | gm20b_dbg_pmu("id not managed %d\n", | ||
446 | ucode_img.lsf_desc->falcon_id); | ||
447 | } | ||
448 | |||
449 | /*Free any ucode image resources if not managing this falcon*/ | ||
450 | if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) { | ||
451 | gm20b_dbg_pmu("pmu is not LSFM managed\n"); | ||
452 | lsfm_free_ucode_img_res(g, &ucode_img); | ||
453 | } | ||
454 | |||
455 | /* Enumerate all constructed falcon objects, | ||
456 | as we need the ucode image info and total falcon count.*/ | ||
457 | |||
458 | /*0th index is always PMU which is already handled in earlier | ||
459 | if condition*/ | ||
460 | for (i = 1; i < (MAX_SUPPORTED_LSFM); i++) { | ||
461 | memset(&ucode_img, 0, sizeof(ucode_img)); | ||
462 | if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) { | ||
463 | if (ucode_img.lsf_desc != NULL) { | ||
464 | /* We have engine sigs, ensure that this falcon | ||
465 | is aware of the secure mode expectations | ||
466 | (ACR status)*/ | ||
467 | |||
468 | /* falon_id is formed by grabbing the static | ||
469 | base falonId from the image and adding the | ||
470 | engine-designated falcon instance. */ | ||
471 | falcon_id = ucode_img.lsf_desc->falcon_id + | ||
472 | ucode_img.flcn_inst; | ||
473 | |||
474 | if (!lsfm_falcon_disabled(g, plsfm, | ||
475 | falcon_id)) { | ||
476 | /* Do not manage non-FB ucode*/ | ||
477 | if (lsfm_add_ucode_img(g, | ||
478 | plsfm, &ucode_img, falcon_id) | ||
479 | == 0) | ||
480 | plsfm->managed_flcn_cnt++; | ||
481 | } else { | ||
482 | gm20b_dbg_pmu("not managed %d\n", | ||
483 | ucode_img.lsf_desc->falcon_id); | ||
484 | lsfm_free_nonpmu_ucode_img_res(g, | ||
485 | &ucode_img); | ||
486 | } | ||
487 | } | ||
488 | } else { | ||
489 | /* Consumed all available falcon objects */ | ||
490 | gm20b_dbg_pmu("Done checking for ucodes %d\n", i); | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | |||
498 | int gm20b_pmu_populate_loader_cfg(struct gk20a *g, | ||
499 | void *lsfm, u32 *p_bl_gen_desc_size) | ||
500 | { | ||
501 | struct wpr_carveout_info wpr_inf; | ||
502 | struct nvgpu_pmu *pmu = &g->pmu; | ||
503 | struct lsfm_managed_ucode_img *p_lsfm = | ||
504 | (struct lsfm_managed_ucode_img *)lsfm; | ||
505 | struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); | ||
506 | struct loader_config *ldr_cfg = &(p_lsfm->bl_gen_desc.loader_cfg); | ||
507 | u64 addr_base; | ||
508 | struct pmu_ucode_desc *desc; | ||
509 | u64 addr_code, addr_data; | ||
510 | u32 addr_args; | ||
511 | |||
512 | if (p_img->desc == NULL) /*This means its a header based ucode, | ||
513 | and so we do not fill BL gen desc structure*/ | ||
514 | return -EINVAL; | ||
515 | desc = p_img->desc; | ||
516 | /* | ||
517 | Calculate physical and virtual addresses for various portions of | ||
518 | the PMU ucode image | ||
519 | Calculate the 32-bit addresses for the application code, application | ||
520 | data, and bootloader code. These values are all based on IM_BASE. | ||
521 | The 32-bit addresses will be the upper 32-bits of the virtual or | ||
522 | physical addresses of each respective segment. | ||
523 | */ | ||
524 | addr_base = p_lsfm->lsb_header.ucode_off; | ||
525 | g->ops.pmu.get_wpr(g, &wpr_inf); | ||
526 | addr_base += wpr_inf.wpr_base; | ||
527 | gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base); | ||
528 | /*From linux*/ | ||
529 | addr_code = u64_lo32((addr_base + | ||
530 | desc->app_start_offset + | ||
531 | desc->app_resident_code_offset) >> 8); | ||
532 | gm20b_dbg_pmu("app start %d app res code off %d\n", | ||
533 | desc->app_start_offset, desc->app_resident_code_offset); | ||
534 | addr_data = u64_lo32((addr_base + | ||
535 | desc->app_start_offset + | ||
536 | desc->app_resident_data_offset) >> 8); | ||
537 | gm20b_dbg_pmu("app res data offset%d\n", | ||
538 | desc->app_resident_data_offset); | ||
539 | gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset); | ||
540 | |||
541 | addr_args = ((pwr_falcon_hwcfg_dmem_size_v( | ||
542 | gk20a_readl(g, pwr_falcon_hwcfg_r()))) | ||
543 | << GK20A_PMU_DMEM_BLKSIZE2); | ||
544 | addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); | ||
545 | |||
546 | gm20b_dbg_pmu("addr_args %x\n", addr_args); | ||
547 | |||
548 | /* Populate the loader_config state*/ | ||
549 | ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE; | ||
550 | ldr_cfg->code_dma_base = addr_code; | ||
551 | ldr_cfg->code_dma_base1 = 0x0; | ||
552 | ldr_cfg->code_size_total = desc->app_size; | ||
553 | ldr_cfg->code_size_to_load = desc->app_resident_code_size; | ||
554 | ldr_cfg->code_entry_point = desc->app_imem_entry; | ||
555 | ldr_cfg->data_dma_base = addr_data; | ||
556 | ldr_cfg->data_dma_base1 = 0; | ||
557 | ldr_cfg->data_size = desc->app_resident_data_size; | ||
558 | ldr_cfg->overlay_dma_base = addr_code; | ||
559 | ldr_cfg->overlay_dma_base1 = 0x0; | ||
560 | |||
561 | /* Update the argc/argv members*/ | ||
562 | ldr_cfg->argc = 1; | ||
563 | ldr_cfg->argv = addr_args; | ||
564 | |||
565 | *p_bl_gen_desc_size = sizeof(struct loader_config); | ||
566 | g->acr.pmu_args = addr_args; | ||
567 | return 0; | ||
568 | } | ||
569 | |||
570 | int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g, | ||
571 | void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid) | ||
572 | { | ||
573 | struct wpr_carveout_info wpr_inf; | ||
574 | struct lsfm_managed_ucode_img *p_lsfm = | ||
575 | (struct lsfm_managed_ucode_img *)lsfm; | ||
576 | struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); | ||
577 | struct flcn_bl_dmem_desc *ldr_cfg = | ||
578 | &(p_lsfm->bl_gen_desc.bl_dmem_desc); | ||
579 | u64 addr_base; | ||
580 | struct pmu_ucode_desc *desc; | ||
581 | u64 addr_code, addr_data; | ||
582 | |||
583 | if (p_img->desc == NULL) /*This means its a header based ucode, | ||
584 | and so we do not fill BL gen desc structure*/ | ||
585 | return -EINVAL; | ||
586 | desc = p_img->desc; | ||
587 | |||
588 | /* | ||
589 | Calculate physical and virtual addresses for various portions of | ||
590 | the PMU ucode image | ||
591 | Calculate the 32-bit addresses for the application code, application | ||
592 | data, and bootloader code. These values are all based on IM_BASE. | ||
593 | The 32-bit addresses will be the upper 32-bits of the virtual or | ||
594 | physical addresses of each respective segment. | ||
595 | */ | ||
596 | addr_base = p_lsfm->lsb_header.ucode_off; | ||
597 | g->ops.pmu.get_wpr(g, &wpr_inf); | ||
598 | addr_base += wpr_inf.wpr_base; | ||
599 | |||
600 | gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base, | ||
601 | p_lsfm->wpr_header.falcon_id); | ||
602 | addr_code = u64_lo32((addr_base + | ||
603 | desc->app_start_offset + | ||
604 | desc->app_resident_code_offset) >> 8); | ||
605 | addr_data = u64_lo32((addr_base + | ||
606 | desc->app_start_offset + | ||
607 | desc->app_resident_data_offset) >> 8); | ||
608 | |||
609 | gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n", | ||
610 | (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset, | ||
611 | p_lsfm->wpr_header.falcon_id); | ||
612 | |||
613 | /* Populate the LOADER_CONFIG state */ | ||
614 | memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc)); | ||
615 | ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE; | ||
616 | ldr_cfg->code_dma_base = addr_code; | ||
617 | ldr_cfg->non_sec_code_size = desc->app_resident_code_size; | ||
618 | ldr_cfg->data_dma_base = addr_data; | ||
619 | ldr_cfg->data_size = desc->app_resident_data_size; | ||
620 | ldr_cfg->code_entry_point = desc->app_imem_entry; | ||
621 | *p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc); | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | /* Populate falcon boot loader generic desc.*/ | ||
626 | static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, | ||
627 | struct lsfm_managed_ucode_img *pnode) | ||
628 | { | ||
629 | |||
630 | struct nvgpu_pmu *pmu = &g->pmu; | ||
631 | if (pnode->wpr_header.falcon_id != pmu->falcon_id) { | ||
632 | gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n"); | ||
633 | g->ops.pmu.flcn_populate_bl_dmem_desc(g, | ||
634 | pnode, &pnode->bl_gen_desc_size, | ||
635 | pnode->wpr_header.falcon_id); | ||
636 | return 0; | ||
637 | } | ||
638 | |||
639 | if (pmu->pmu_mode & PMU_LSFM_MANAGED) { | ||
640 | gm20b_dbg_pmu("pmu write flcn bl gen desc\n"); | ||
641 | if (pnode->wpr_header.falcon_id == pmu->falcon_id) | ||
642 | return g->ops.pmu.pmu_populate_loader_cfg(g, pnode, | ||
643 | &pnode->bl_gen_desc_size); | ||
644 | } | ||
645 | |||
646 | /* Failed to find the falcon requested. */ | ||
647 | return -ENOENT; | ||
648 | } | ||
649 | |||
650 | /* Initialize WPR contents */ | ||
651 | static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, | ||
652 | struct nvgpu_mem *ucode) | ||
653 | { | ||
654 | struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; | ||
655 | struct lsf_wpr_header last_wpr_hdr; | ||
656 | u32 i; | ||
657 | |||
658 | /* The WPR array is at the base of the WPR */ | ||
659 | pnode = plsfm->ucode_img_list; | ||
660 | memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header)); | ||
661 | i = 0; | ||
662 | |||
663 | /* | ||
664 | * Walk the managed falcons, flush WPR and LSB headers to FB. | ||
665 | * flush any bl args to the storage area relative to the | ||
666 | * ucode image (appended on the end as a DMEM area). | ||
667 | */ | ||
668 | while (pnode) { | ||
669 | /* Flush WPR header to memory*/ | ||
670 | nvgpu_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header), | ||
671 | &pnode->wpr_header, sizeof(pnode->wpr_header)); | ||
672 | |||
673 | gm20b_dbg_pmu("wpr header"); | ||
674 | gm20b_dbg_pmu("falconid :%d", | ||
675 | pnode->wpr_header.falcon_id); | ||
676 | gm20b_dbg_pmu("lsb_offset :%x", | ||
677 | pnode->wpr_header.lsb_offset); | ||
678 | gm20b_dbg_pmu("bootstrap_owner :%d", | ||
679 | pnode->wpr_header.bootstrap_owner); | ||
680 | gm20b_dbg_pmu("lazy_bootstrap :%d", | ||
681 | pnode->wpr_header.lazy_bootstrap); | ||
682 | gm20b_dbg_pmu("status :%d", | ||
683 | pnode->wpr_header.status); | ||
684 | |||
685 | /*Flush LSB header to memory*/ | ||
686 | nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, | ||
687 | &pnode->lsb_header, sizeof(pnode->lsb_header)); | ||
688 | |||
689 | gm20b_dbg_pmu("lsb header"); | ||
690 | gm20b_dbg_pmu("ucode_off :%x", | ||
691 | pnode->lsb_header.ucode_off); | ||
692 | gm20b_dbg_pmu("ucode_size :%x", | ||
693 | pnode->lsb_header.ucode_size); | ||
694 | gm20b_dbg_pmu("data_size :%x", | ||
695 | pnode->lsb_header.data_size); | ||
696 | gm20b_dbg_pmu("bl_code_size :%x", | ||
697 | pnode->lsb_header.bl_code_size); | ||
698 | gm20b_dbg_pmu("bl_imem_off :%x", | ||
699 | pnode->lsb_header.bl_imem_off); | ||
700 | gm20b_dbg_pmu("bl_data_off :%x", | ||
701 | pnode->lsb_header.bl_data_off); | ||
702 | gm20b_dbg_pmu("bl_data_size :%x", | ||
703 | pnode->lsb_header.bl_data_size); | ||
704 | gm20b_dbg_pmu("app_code_off :%x", | ||
705 | pnode->lsb_header.app_code_off); | ||
706 | gm20b_dbg_pmu("app_code_size :%x", | ||
707 | pnode->lsb_header.app_code_size); | ||
708 | gm20b_dbg_pmu("app_data_off :%x", | ||
709 | pnode->lsb_header.app_data_off); | ||
710 | gm20b_dbg_pmu("app_data_size :%x", | ||
711 | pnode->lsb_header.app_data_size); | ||
712 | gm20b_dbg_pmu("flags :%x", | ||
713 | pnode->lsb_header.flags); | ||
714 | |||
715 | /*If this falcon has a boot loader and related args, | ||
716 | * flush them.*/ | ||
717 | if (!pnode->ucode_img.header) { | ||
718 | /*Populate gen bl and flush to memory*/ | ||
719 | lsfm_fill_flcn_bl_gen_desc(g, pnode); | ||
720 | nvgpu_mem_wr_n(g, ucode, | ||
721 | pnode->lsb_header.bl_data_off, | ||
722 | &pnode->bl_gen_desc, | ||
723 | pnode->bl_gen_desc_size); | ||
724 | } | ||
725 | /*Copying of ucode*/ | ||
726 | nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, | ||
727 | pnode->ucode_img.data, | ||
728 | pnode->ucode_img.data_size); | ||
729 | pnode = pnode->next; | ||
730 | i++; | ||
731 | } | ||
732 | |||
733 | /* Tag the terminator WPR header with an invalid falcon ID. */ | ||
734 | last_wpr_hdr.falcon_id = LSF_FALCON_ID_INVALID; | ||
735 | nvgpu_mem_wr_n(g, ucode, | ||
736 | plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header), | ||
737 | &last_wpr_hdr, | ||
738 | sizeof(struct lsf_wpr_header)); | ||
739 | } | ||
740 | |||
741 | /*! | ||
742 | * lsfm_parse_no_loader_ucode: parses UCODE header of falcon | ||
743 | * | ||
744 | * @param[in] p_ucodehdr : UCODE header | ||
745 | * @param[out] lsb_hdr : updates values in LSB header | ||
746 | * | ||
747 | * @return 0 | ||
748 | */ | ||
749 | static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr, | ||
750 | struct lsf_lsb_header *lsb_hdr) | ||
751 | { | ||
752 | |||
753 | u32 code_size = 0; | ||
754 | u32 data_size = 0; | ||
755 | u32 i = 0; | ||
756 | u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND]; | ||
757 | |||
758 | /* Lets calculate code size*/ | ||
759 | code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND]; | ||
760 | for (i = 0; i < total_apps; i++) { | ||
761 | code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND | ||
762 | (total_apps, i)]; | ||
763 | } | ||
764 | code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)]; | ||
765 | |||
766 | /* Calculate data size*/ | ||
767 | data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND]; | ||
768 | for (i = 0; i < total_apps; i++) { | ||
769 | data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND | ||
770 | (total_apps, i)]; | ||
771 | } | ||
772 | |||
773 | lsb_hdr->ucode_size = code_size; | ||
774 | lsb_hdr->data_size = data_size; | ||
775 | lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND]; | ||
776 | lsb_hdr->bl_imem_off = 0; | ||
777 | lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND]; | ||
778 | lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND]; | ||
779 | return 0; | ||
780 | } | ||
781 | |||
782 | /*! | ||
783 | * @brief lsfm_fill_static_lsb_hdr_info | ||
784 | * Populate static LSB header infomation using the provided ucode image | ||
785 | */ | ||
786 | static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g, | ||
787 | u32 falcon_id, struct lsfm_managed_ucode_img *pnode) | ||
788 | { | ||
789 | |||
790 | struct nvgpu_pmu *pmu = &g->pmu; | ||
791 | u32 full_app_size = 0; | ||
792 | u32 data = 0; | ||
793 | |||
794 | if (pnode->ucode_img.lsf_desc) | ||
795 | memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc, | ||
796 | sizeof(struct lsf_ucode_desc)); | ||
797 | pnode->lsb_header.ucode_size = pnode->ucode_img.data_size; | ||
798 | |||
799 | /* The remainder of the LSB depends on the loader usage */ | ||
800 | if (pnode->ucode_img.header) { | ||
801 | /* Does not use a loader */ | ||
802 | pnode->lsb_header.data_size = 0; | ||
803 | pnode->lsb_header.bl_code_size = 0; | ||
804 | pnode->lsb_header.bl_data_off = 0; | ||
805 | pnode->lsb_header.bl_data_size = 0; | ||
806 | |||
807 | lsfm_parse_no_loader_ucode(pnode->ucode_img.header, | ||
808 | &(pnode->lsb_header)); | ||
809 | |||
810 | /* Load the first 256 bytes of IMEM. */ | ||
811 | /* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX. | ||
812 | True for all method based falcons */ | ||
813 | data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE | | ||
814 | NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE; | ||
815 | pnode->lsb_header.flags = data; | ||
816 | } else { | ||
817 | /* Uses a loader. that is has a desc */ | ||
818 | pnode->lsb_header.data_size = 0; | ||
819 | |||
820 | /* The loader code size is already aligned (padded) such that | ||
821 | the code following it is aligned, but the size in the image | ||
822 | desc is not, bloat it up to be on a 256 byte alignment. */ | ||
823 | pnode->lsb_header.bl_code_size = ALIGN( | ||
824 | pnode->ucode_img.desc->bootloader_size, | ||
825 | LSF_BL_CODE_SIZE_ALIGNMENT); | ||
826 | full_app_size = ALIGN(pnode->ucode_img.desc->app_size, | ||
827 | LSF_BL_CODE_SIZE_ALIGNMENT) + | ||
828 | pnode->lsb_header.bl_code_size; | ||
829 | pnode->lsb_header.ucode_size = ALIGN( | ||
830 | pnode->ucode_img.desc->app_resident_data_offset, | ||
831 | LSF_BL_CODE_SIZE_ALIGNMENT) + | ||
832 | pnode->lsb_header.bl_code_size; | ||
833 | pnode->lsb_header.data_size = full_app_size - | ||
834 | pnode->lsb_header.ucode_size; | ||
835 | /* Though the BL is located at 0th offset of the image, the VA | ||
836 | is different to make sure that it doesnt collide the actual OS | ||
837 | VA range */ | ||
838 | pnode->lsb_header.bl_imem_off = | ||
839 | pnode->ucode_img.desc->bootloader_imem_offset; | ||
840 | |||
841 | /* TODO: OBJFLCN should export properties using which the below | ||
842 | flags should be populated.*/ | ||
843 | pnode->lsb_header.flags = 0; | ||
844 | |||
845 | if (falcon_id == pmu->falcon_id) { | ||
846 | data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE; | ||
847 | pnode->lsb_header.flags = data; | ||
848 | } | ||
849 | |||
850 | if (g->ops.pmu.is_priv_load(falcon_id)) { | ||
851 | pnode->lsb_header.flags |= | ||
852 | NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE; | ||
853 | } | ||
854 | } | ||
855 | } | ||
856 | |||
857 | /* Adds a ucode image to the list of managed ucode images managed. */ | ||
858 | static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, | ||
859 | struct flcn_ucode_img *ucode_image, u32 falcon_id) | ||
860 | { | ||
861 | |||
862 | struct lsfm_managed_ucode_img *pnode; | ||
863 | pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img)); | ||
864 | if (pnode == NULL) | ||
865 | return -ENOMEM; | ||
866 | |||
867 | /* Keep a copy of the ucode image info locally */ | ||
868 | memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img)); | ||
869 | |||
870 | /* Fill in static WPR header info*/ | ||
871 | pnode->wpr_header.falcon_id = falcon_id; | ||
872 | pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; | ||
873 | pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY; | ||
874 | |||
875 | pnode->wpr_header.lazy_bootstrap = | ||
876 | g->ops.pmu.is_lazy_bootstrap(falcon_id); | ||
877 | |||
878 | /*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by | ||
879 | Android */ | ||
880 | /* Fill in static LSB header info elsewhere */ | ||
881 | lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode); | ||
882 | pnode->next = plsfm->ucode_img_list; | ||
883 | plsfm->ucode_img_list = pnode; | ||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | /* Free any ucode image structure resources. */ | ||
888 | static void lsfm_free_ucode_img_res(struct gk20a *g, | ||
889 | struct flcn_ucode_img *p_img) | ||
890 | { | ||
891 | if (p_img->lsf_desc != NULL) { | ||
892 | nvgpu_kfree(g, p_img->lsf_desc); | ||
893 | p_img->lsf_desc = NULL; | ||
894 | } | ||
895 | } | ||
896 | |||
897 | /* Free any ucode image structure resources. */ | ||
898 | static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g, | ||
899 | struct flcn_ucode_img *p_img) | ||
900 | { | ||
901 | if (p_img->lsf_desc != NULL) { | ||
902 | nvgpu_kfree(g, p_img->lsf_desc); | ||
903 | p_img->lsf_desc = NULL; | ||
904 | } | ||
905 | if (p_img->desc != NULL) { | ||
906 | nvgpu_kfree(g, p_img->desc); | ||
907 | p_img->desc = NULL; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm) | ||
912 | { | ||
913 | u32 cnt = plsfm->managed_flcn_cnt; | ||
914 | struct lsfm_managed_ucode_img *mg_ucode_img; | ||
915 | while (cnt) { | ||
916 | mg_ucode_img = plsfm->ucode_img_list; | ||
917 | if (mg_ucode_img->ucode_img.lsf_desc->falcon_id == | ||
918 | LSF_FALCON_ID_PMU) | ||
919 | lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img); | ||
920 | else | ||
921 | lsfm_free_nonpmu_ucode_img_res(g, | ||
922 | &mg_ucode_img->ucode_img); | ||
923 | plsfm->ucode_img_list = mg_ucode_img->next; | ||
924 | nvgpu_kfree(g, mg_ucode_img); | ||
925 | cnt--; | ||
926 | } | ||
927 | } | ||
928 | |||
929 | /* Generate WPR requirements for ACR allocation request */ | ||
930 | static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm) | ||
931 | { | ||
932 | struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; | ||
933 | u32 wpr_offset; | ||
934 | |||
935 | /* Calculate WPR size required */ | ||
936 | |||
937 | /* Start with an array of WPR headers at the base of the WPR. | ||
938 | The expectation here is that the secure falcon will do a single DMA | ||
939 | read of this array and cache it internally so it's OK to pack these. | ||
940 | Also, we add 1 to the falcon count to indicate the end of the array.*/ | ||
941 | wpr_offset = sizeof(struct lsf_wpr_header) * | ||
942 | (plsfm->managed_flcn_cnt+1); | ||
943 | |||
944 | /* Walk the managed falcons, accounting for the LSB structs | ||
945 | as well as the ucode images. */ | ||
946 | while (pnode) { | ||
947 | /* Align, save off, and include an LSB header size */ | ||
948 | wpr_offset = ALIGN(wpr_offset, | ||
949 | LSF_LSB_HEADER_ALIGNMENT); | ||
950 | pnode->wpr_header.lsb_offset = wpr_offset; | ||
951 | wpr_offset += sizeof(struct lsf_lsb_header); | ||
952 | |||
953 | /* Align, save off, and include the original (static) | ||
954 | ucode image size */ | ||
955 | wpr_offset = ALIGN(wpr_offset, | ||
956 | LSF_UCODE_DATA_ALIGNMENT); | ||
957 | pnode->lsb_header.ucode_off = wpr_offset; | ||
958 | wpr_offset += pnode->ucode_img.data_size; | ||
959 | |||
960 | /* For falcons that use a boot loader (BL), we append a loader | ||
961 | desc structure on the end of the ucode image and consider this | ||
962 | the boot loader data. The host will then copy the loader desc | ||
963 | args to this space within the WPR region (before locking down) | ||
964 | and the HS bin will then copy them to DMEM 0 for the loader. */ | ||
965 | if (!pnode->ucode_img.header) { | ||
966 | /* Track the size for LSB details filled in later | ||
967 | Note that at this point we don't know what kind of i | ||
968 | boot loader desc, so we just take the size of the | ||
969 | generic one, which is the largest it will will ever be. | ||
970 | */ | ||
971 | /* Align (size bloat) and save off generic | ||
972 | descriptor size*/ | ||
973 | pnode->lsb_header.bl_data_size = ALIGN( | ||
974 | sizeof(pnode->bl_gen_desc), | ||
975 | LSF_BL_DATA_SIZE_ALIGNMENT); | ||
976 | |||
977 | /*Align, save off, and include the additional BL data*/ | ||
978 | wpr_offset = ALIGN(wpr_offset, | ||
979 | LSF_BL_DATA_ALIGNMENT); | ||
980 | pnode->lsb_header.bl_data_off = wpr_offset; | ||
981 | wpr_offset += pnode->lsb_header.bl_data_size; | ||
982 | } else { | ||
983 | /* bl_data_off is already assigned in static | ||
984 | information. But that is from start of the image */ | ||
985 | pnode->lsb_header.bl_data_off += | ||
986 | (wpr_offset - pnode->ucode_img.data_size); | ||
987 | } | ||
988 | |||
989 | /* Finally, update ucode surface size to include updates */ | ||
990 | pnode->full_ucode_size = wpr_offset - | ||
991 | pnode->lsb_header.ucode_off; | ||
992 | if (pnode->wpr_header.falcon_id != LSF_FALCON_ID_PMU) { | ||
993 | pnode->lsb_header.app_code_off = | ||
994 | pnode->lsb_header.bl_code_size; | ||
995 | pnode->lsb_header.app_code_size = | ||
996 | pnode->lsb_header.ucode_size - | ||
997 | pnode->lsb_header.bl_code_size; | ||
998 | pnode->lsb_header.app_data_off = | ||
999 | pnode->lsb_header.ucode_size; | ||
1000 | pnode->lsb_header.app_data_size = | ||
1001 | pnode->lsb_header.data_size; | ||
1002 | } | ||
1003 | pnode = pnode->next; | ||
1004 | } | ||
1005 | plsfm->wpr_size = wpr_offset; | ||
1006 | return 0; | ||
1007 | } | ||
1008 | |||
1009 | /*Loads ACR bin to FB mem and bootstraps PMU with bootloader code | ||
1010 | * start and end are addresses of ucode blob in non-WPR region*/ | ||
1011 | int gm20b_bootstrap_hs_flcn(struct gk20a *g) | ||
1012 | { | ||
1013 | struct mm_gk20a *mm = &g->mm; | ||
1014 | struct vm_gk20a *vm = mm->pmu.vm; | ||
1015 | int err = 0; | ||
1016 | u64 *acr_dmem; | ||
1017 | u32 img_size_in_bytes = 0; | ||
1018 | u32 status, size; | ||
1019 | u64 start; | ||
1020 | struct acr_desc *acr = &g->acr; | ||
1021 | struct nvgpu_firmware *acr_fw = acr->acr_fw; | ||
1022 | struct flcn_bl_dmem_desc *bl_dmem_desc = &acr->bl_dmem_desc; | ||
1023 | u32 *acr_ucode_header_t210_load; | ||
1024 | u32 *acr_ucode_data_t210_load; | ||
1025 | |||
1026 | start = nvgpu_mem_get_addr(g, &acr->ucode_blob); | ||
1027 | size = acr->ucode_blob.size; | ||
1028 | |||
1029 | gm20b_dbg_pmu(""); | ||
1030 | |||
1031 | if (!acr_fw) { | ||
1032 | /*First time init case*/ | ||
1033 | acr_fw = nvgpu_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE, 0); | ||
1034 | if (!acr_fw) { | ||
1035 | nvgpu_err(g, "pmu ucode get fail"); | ||
1036 | return -ENOENT; | ||
1037 | } | ||
1038 | acr->acr_fw = acr_fw; | ||
1039 | acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data; | ||
1040 | acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data + | ||
1041 | acr->hsbin_hdr->header_offset); | ||
1042 | acr_ucode_data_t210_load = (u32 *)(acr_fw->data + | ||
1043 | acr->hsbin_hdr->data_offset); | ||
1044 | acr_ucode_header_t210_load = (u32 *)(acr_fw->data + | ||
1045 | acr->fw_hdr->hdr_offset); | ||
1046 | img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256); | ||
1047 | |||
1048 | /* Lets patch the signatures first.. */ | ||
1049 | if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load, | ||
1050 | (u32 *)(acr_fw->data + | ||
1051 | acr->fw_hdr->sig_prod_offset), | ||
1052 | (u32 *)(acr_fw->data + | ||
1053 | acr->fw_hdr->sig_dbg_offset), | ||
1054 | (u32 *)(acr_fw->data + | ||
1055 | acr->fw_hdr->patch_loc), | ||
1056 | (u32 *)(acr_fw->data + | ||
1057 | acr->fw_hdr->patch_sig)) < 0) { | ||
1058 | nvgpu_err(g, "patch signatures fail"); | ||
1059 | err = -1; | ||
1060 | goto err_release_acr_fw; | ||
1061 | } | ||
1062 | err = nvgpu_dma_alloc_map_sys(vm, img_size_in_bytes, | ||
1063 | &acr->acr_ucode); | ||
1064 | if (err) { | ||
1065 | err = -ENOMEM; | ||
1066 | goto err_release_acr_fw; | ||
1067 | } | ||
1068 | |||
1069 | acr_dmem = (u64 *) | ||
1070 | &(((u8 *)acr_ucode_data_t210_load)[ | ||
1071 | acr_ucode_header_t210_load[2]]); | ||
1072 | acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( | ||
1073 | acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]); | ||
1074 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = | ||
1075 | start; | ||
1076 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = | ||
1077 | size; | ||
1078 | ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; | ||
1079 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; | ||
1080 | |||
1081 | nvgpu_mem_wr_n(g, &acr->acr_ucode, 0, | ||
1082 | acr_ucode_data_t210_load, img_size_in_bytes); | ||
1083 | /* | ||
1084 | * In order to execute this binary, we will be using | ||
1085 | * a bootloader which will load this image into PMU IMEM/DMEM. | ||
1086 | * Fill up the bootloader descriptor for PMU HAL to use.. | ||
1087 | * TODO: Use standard descriptor which the generic bootloader is | ||
1088 | * checked in. | ||
1089 | */ | ||
1090 | |||
1091 | bl_dmem_desc->signature[0] = 0; | ||
1092 | bl_dmem_desc->signature[1] = 0; | ||
1093 | bl_dmem_desc->signature[2] = 0; | ||
1094 | bl_dmem_desc->signature[3] = 0; | ||
1095 | bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; | ||
1096 | bl_dmem_desc->code_dma_base = | ||
1097 | (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8)); | ||
1098 | bl_dmem_desc->code_dma_base1 = 0x0; | ||
1099 | bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; | ||
1100 | bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; | ||
1101 | bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; | ||
1102 | bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6]; | ||
1103 | bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */ | ||
1104 | bl_dmem_desc->data_dma_base = | ||
1105 | bl_dmem_desc->code_dma_base + | ||
1106 | ((acr_ucode_header_t210_load[2]) >> 8); | ||
1107 | bl_dmem_desc->data_dma_base1 = 0x0; | ||
1108 | bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; | ||
1109 | } else | ||
1110 | acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; | ||
1111 | status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); | ||
1112 | if (status != 0) { | ||
1113 | err = status; | ||
1114 | goto err_free_ucode_map; | ||
1115 | } | ||
1116 | return 0; | ||
1117 | err_free_ucode_map: | ||
1118 | nvgpu_dma_unmap_free(vm, &acr->acr_ucode); | ||
1119 | err_release_acr_fw: | ||
1120 | nvgpu_release_firmware(g, acr_fw); | ||
1121 | acr->acr_fw = NULL; | ||
1122 | return err; | ||
1123 | } | ||
1124 | |||
1125 | static u8 pmu_is_debug_mode_en(struct gk20a *g) | ||
1126 | { | ||
1127 | u32 ctl_stat = gk20a_readl(g, pwr_pmu_scpctl_stat_r()); | ||
1128 | return pwr_pmu_scpctl_stat_debug_mode_v(ctl_stat); | ||
1129 | } | ||
1130 | |||
1131 | /* | ||
1132 | * @brief Patch signatures into ucode image | ||
1133 | */ | ||
1134 | int acr_ucode_patch_sig(struct gk20a *g, | ||
1135 | unsigned int *p_img, | ||
1136 | unsigned int *p_prod_sig, | ||
1137 | unsigned int *p_dbg_sig, | ||
1138 | unsigned int *p_patch_loc, | ||
1139 | unsigned int *p_patch_ind) | ||
1140 | { | ||
1141 | unsigned int i, *p_sig; | ||
1142 | gm20b_dbg_pmu(""); | ||
1143 | |||
1144 | if (!pmu_is_debug_mode_en(g)) { | ||
1145 | p_sig = p_prod_sig; | ||
1146 | gm20b_dbg_pmu("PRODUCTION MODE\n"); | ||
1147 | } else { | ||
1148 | p_sig = p_dbg_sig; | ||
1149 | gm20b_dbg_pmu("DEBUG MODE\n"); | ||
1150 | } | ||
1151 | |||
1152 | /* Patching logic:*/ | ||
1153 | for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) { | ||
1154 | p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)]; | ||
1155 | p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1]; | ||
1156 | p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2]; | ||
1157 | p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3]; | ||
1158 | } | ||
1159 | return 0; | ||
1160 | } | ||
1161 | |||
1162 | static int bl_bootstrap(struct nvgpu_pmu *pmu, | ||
1163 | struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz) | ||
1164 | { | ||
1165 | struct gk20a *g = gk20a_from_pmu(pmu); | ||
1166 | struct acr_desc *acr = &g->acr; | ||
1167 | struct mm_gk20a *mm = &g->mm; | ||
1168 | u32 virt_addr = 0; | ||
1169 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc; | ||
1170 | u32 dst; | ||
1171 | |||
1172 | gk20a_dbg_fn(""); | ||
1173 | gk20a_writel(g, pwr_falcon_itfen_r(), | ||
1174 | gk20a_readl(g, pwr_falcon_itfen_r()) | | ||
1175 | pwr_falcon_itfen_ctxen_enable_f()); | ||
1176 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | ||
1177 | pwr_pmu_new_instblk_ptr_f( | ||
1178 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | ||
1179 | pwr_pmu_new_instblk_valid_f(1) | | ||
1180 | pwr_pmu_new_instblk_target_sys_coh_f()); | ||
1181 | |||
1182 | /*copy bootloader interface structure to dmem*/ | ||
1183 | nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, | ||
1184 | sizeof(struct flcn_bl_dmem_desc), 0); | ||
1185 | |||
1186 | /* copy bootloader to TOP of IMEM */ | ||
1187 | dst = (pwr_falcon_hwcfg_imem_size_v( | ||
1188 | gk20a_readl(g, pwr_falcon_hwcfg_r())) << 8) - bl_sz; | ||
1189 | |||
1190 | nvgpu_flcn_copy_to_imem(pmu->flcn, dst, | ||
1191 | (u8 *)(acr->hsbl_ucode.cpu_va), bl_sz, 0, 0, | ||
1192 | pmu_bl_gm10x_desc->bl_start_tag); | ||
1193 | |||
1194 | gm20b_dbg_pmu("Before starting falcon with BL\n"); | ||
1195 | |||
1196 | virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; | ||
1197 | |||
1198 | nvgpu_flcn_bootstrap(pmu->flcn, virt_addr); | ||
1199 | |||
1200 | return 0; | ||
1201 | } | ||
1202 | |||
1203 | int gm20b_init_nspmu_setup_hw1(struct gk20a *g) | ||
1204 | { | ||
1205 | struct nvgpu_pmu *pmu = &g->pmu; | ||
1206 | int err = 0; | ||
1207 | |||
1208 | gk20a_dbg_fn(""); | ||
1209 | |||
1210 | nvgpu_mutex_acquire(&pmu->isr_mutex); | ||
1211 | nvgpu_flcn_reset(pmu->flcn); | ||
1212 | pmu->isr_enabled = true; | ||
1213 | nvgpu_mutex_release(&pmu->isr_mutex); | ||
1214 | |||
1215 | /* setup apertures - virtual */ | ||
1216 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), | ||
1217 | pwr_fbif_transcfg_mem_type_virtual_f()); | ||
1218 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), | ||
1219 | pwr_fbif_transcfg_mem_type_virtual_f()); | ||
1220 | /* setup apertures - physical */ | ||
1221 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), | ||
1222 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1223 | pwr_fbif_transcfg_target_local_fb_f()); | ||
1224 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), | ||
1225 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1226 | pwr_fbif_transcfg_target_coherent_sysmem_f()); | ||
1227 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), | ||
1228 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1229 | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); | ||
1230 | |||
1231 | err = g->ops.pmu.pmu_nsbootstrap(pmu); | ||
1232 | |||
1233 | return err; | ||
1234 | } | ||
1235 | |||
1236 | int gm20b_init_pmu_setup_hw1(struct gk20a *g, | ||
1237 | void *desc, u32 bl_sz) | ||
1238 | { | ||
1239 | |||
1240 | struct nvgpu_pmu *pmu = &g->pmu; | ||
1241 | int err; | ||
1242 | |||
1243 | gk20a_dbg_fn(""); | ||
1244 | |||
1245 | nvgpu_mutex_acquire(&pmu->isr_mutex); | ||
1246 | nvgpu_flcn_reset(pmu->flcn); | ||
1247 | pmu->isr_enabled = true; | ||
1248 | nvgpu_mutex_release(&pmu->isr_mutex); | ||
1249 | |||
1250 | /* setup apertures - virtual */ | ||
1251 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE), | ||
1252 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1253 | pwr_fbif_transcfg_target_local_fb_f()); | ||
1254 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT), | ||
1255 | pwr_fbif_transcfg_mem_type_virtual_f()); | ||
1256 | /* setup apertures - physical */ | ||
1257 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID), | ||
1258 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1259 | pwr_fbif_transcfg_target_local_fb_f()); | ||
1260 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH), | ||
1261 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1262 | pwr_fbif_transcfg_target_coherent_sysmem_f()); | ||
1263 | gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH), | ||
1264 | pwr_fbif_transcfg_mem_type_physical_f() | | ||
1265 | pwr_fbif_transcfg_target_noncoherent_sysmem_f()); | ||
1266 | |||
1267 | /*Copying pmu cmdline args*/ | ||
1268 | g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu, | ||
1269 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK)); | ||
1270 | g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1); | ||
1271 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( | ||
1272 | pmu, GK20A_PMU_TRACE_BUFSIZE); | ||
1273 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu); | ||
1274 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx( | ||
1275 | pmu, GK20A_PMU_DMAIDX_VIRT); | ||
1276 | nvgpu_flcn_copy_to_dmem(pmu->flcn, g->acr.pmu_args, | ||
1277 | (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)), | ||
1278 | g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0); | ||
1279 | /*disable irqs for hs falcon booting as we will poll for halt*/ | ||
1280 | nvgpu_mutex_acquire(&pmu->isr_mutex); | ||
1281 | pmu_enable_irq(pmu, false); | ||
1282 | pmu->isr_enabled = false; | ||
1283 | nvgpu_mutex_release(&pmu->isr_mutex); | ||
1284 | /*Clearing mailbox register used to reflect capabilities*/ | ||
1285 | gk20a_writel(g, pwr_falcon_mailbox1_r(), 0); | ||
1286 | err = bl_bootstrap(pmu, desc, bl_sz); | ||
1287 | if (err) | ||
1288 | return err; | ||
1289 | return 0; | ||
1290 | } | ||
1291 | |||
1292 | /* | ||
1293 | * Executes a generic bootloader and wait for PMU to halt. | ||
1294 | * This BL will be used for those binaries that are loaded | ||
1295 | * and executed at times other than RM PMU Binary execution. | ||
1296 | * | ||
1297 | * @param[in] g gk20a pointer | ||
1298 | * @param[in] desc Bootloader descriptor | ||
1299 | * @param[in] dma_idx DMA Index | ||
1300 | * @param[in] b_wait_for_halt Wait for PMU to HALT | ||
1301 | */ | ||
1302 | int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | ||
1303 | { | ||
1304 | struct mm_gk20a *mm = &g->mm; | ||
1305 | struct vm_gk20a *vm = mm->pmu.vm; | ||
1306 | int err = 0; | ||
1307 | u32 bl_sz; | ||
1308 | struct acr_desc *acr = &g->acr; | ||
1309 | struct nvgpu_firmware *hsbl_fw = acr->hsbl_fw; | ||
1310 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc; | ||
1311 | u32 *pmu_bl_gm10x = NULL; | ||
1312 | gm20b_dbg_pmu(""); | ||
1313 | |||
1314 | if (!hsbl_fw) { | ||
1315 | hsbl_fw = nvgpu_request_firmware(g, | ||
1316 | GM20B_HSBIN_PMU_BL_UCODE_IMAGE, 0); | ||
1317 | if (!hsbl_fw) { | ||
1318 | nvgpu_err(g, "pmu ucode load fail"); | ||
1319 | return -ENOENT; | ||
1320 | } | ||
1321 | acr->hsbl_fw = hsbl_fw; | ||
1322 | acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data; | ||
1323 | acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data + | ||
1324 | acr->bl_bin_hdr->header_offset); | ||
1325 | pmu_bl_gm10x_desc = acr->pmu_hsbl_desc; | ||
1326 | pmu_bl_gm10x = (u32 *)(hsbl_fw->data + | ||
1327 | acr->bl_bin_hdr->data_offset); | ||
1328 | bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size, | ||
1329 | 256); | ||
1330 | acr->hsbl_ucode.size = bl_sz; | ||
1331 | gm20b_dbg_pmu("Executing Generic Bootloader\n"); | ||
1332 | |||
1333 | /*TODO in code verify that enable PMU is done, | ||
1334 | scrubbing etc is done*/ | ||
1335 | /*TODO in code verify that gmmu vm init is done*/ | ||
1336 | err = nvgpu_dma_alloc_flags_sys(g, | ||
1337 | NVGPU_DMA_READ_ONLY, bl_sz, &acr->hsbl_ucode); | ||
1338 | if (err) { | ||
1339 | nvgpu_err(g, "failed to allocate memory"); | ||
1340 | goto err_done; | ||
1341 | } | ||
1342 | |||
1343 | acr->hsbl_ucode.gpu_va = nvgpu_gmmu_map(vm, | ||
1344 | &acr->hsbl_ucode, | ||
1345 | bl_sz, | ||
1346 | 0, /* flags */ | ||
1347 | gk20a_mem_flag_read_only, false, | ||
1348 | acr->hsbl_ucode.aperture); | ||
1349 | if (!acr->hsbl_ucode.gpu_va) { | ||
1350 | nvgpu_err(g, "failed to map pmu ucode memory!!"); | ||
1351 | goto err_free_ucode; | ||
1352 | } | ||
1353 | |||
1354 | nvgpu_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz); | ||
1355 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); | ||
1356 | } | ||
1357 | /* | ||
1358 | * Disable interrupts to avoid kernel hitting breakpoint due | ||
1359 | * to PMU halt | ||
1360 | */ | ||
1361 | |||
1362 | if (g->ops.pmu.falcon_clear_halt_interrupt_status(g, | ||
1363 | gk20a_get_gr_idle_timeout(g))) | ||
1364 | goto err_unmap_bl; | ||
1365 | |||
1366 | gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g, | ||
1367 | pwr_falcon_mmu_phys_sec_r())); | ||
1368 | gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r())); | ||
1369 | |||
1370 | g->ops.pmu.init_falcon_setup_hw(g, desc, acr->hsbl_ucode.size); | ||
1371 | |||
1372 | /* Poll for HALT */ | ||
1373 | if (b_wait_for_halt) { | ||
1374 | err = g->ops.pmu.falcon_wait_for_halt(g, | ||
1375 | ACR_COMPLETION_TIMEOUT_MS); | ||
1376 | if (err == 0) { | ||
1377 | /* Clear the HALT interrupt */ | ||
1378 | if (g->ops.pmu.falcon_clear_halt_interrupt_status(g, | ||
1379 | gk20a_get_gr_idle_timeout(g))) | ||
1380 | goto err_unmap_bl; | ||
1381 | } | ||
1382 | else | ||
1383 | goto err_unmap_bl; | ||
1384 | } | ||
1385 | gm20b_dbg_pmu("after waiting for halt, err %x\n", err); | ||
1386 | gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g, | ||
1387 | pwr_falcon_mmu_phys_sec_r())); | ||
1388 | gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r())); | ||
1389 | start_gm20b_pmu(g); | ||
1390 | return 0; | ||
1391 | err_unmap_bl: | ||
1392 | nvgpu_gmmu_unmap(vm, &acr->hsbl_ucode, acr->hsbl_ucode.gpu_va); | ||
1393 | err_free_ucode: | ||
1394 | nvgpu_dma_free(g, &acr->hsbl_ucode); | ||
1395 | err_done: | ||
1396 | nvgpu_release_firmware(g, hsbl_fw); | ||
1397 | return err; | ||
1398 | } | ||
1399 | |||
1400 | /*! | ||
1401 | * Wait for PMU to halt | ||
1402 | * @param[in] g GPU object pointer | ||
1403 | * @param[in] timeout_ms Timeout in msec for PMU to halt | ||
1404 | * @return '0' if PMU halts | ||
1405 | */ | ||
1406 | int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms) | ||
1407 | { | ||
1408 | struct nvgpu_pmu *pmu = &g->pmu; | ||
1409 | u32 data = 0; | ||
1410 | int ret = -EBUSY; | ||
1411 | |||
1412 | ret = nvgpu_flcn_wait_for_halt(pmu->flcn, timeout_ms); | ||
1413 | if (ret) { | ||
1414 | nvgpu_err(g, "ACR boot timed out"); | ||
1415 | return ret; | ||
1416 | } | ||
1417 | |||
1418 | g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r()); | ||
1419 | gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities); | ||
1420 | data = gk20a_readl(g, pwr_falcon_mailbox0_r()); | ||
1421 | if (data) { | ||
1422 | nvgpu_err(g, "ACR boot failed, err %x", data); | ||
1423 | ret = -EAGAIN; | ||
1424 | } | ||
1425 | |||
1426 | return ret; | ||
1427 | } | ||
1428 | |||
1429 | /*! | ||
1430 | * Wait for PMU halt interrupt status to be cleared | ||
1431 | * @param[in] g GPU object pointer | ||
1432 | * @param[in] timeout_ms Timeout in msec for halt to clear | ||
1433 | * @return '0' if PMU halt irq status is clear | ||
1434 | */ | ||
1435 | int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout_ms) | ||
1436 | { | ||
1437 | struct nvgpu_pmu *pmu = &g->pmu; | ||
1438 | int status = 0; | ||
1439 | |||
1440 | if (nvgpu_flcn_clear_halt_intr_status(pmu->flcn, timeout_ms)) | ||
1441 | status = -EBUSY; | ||
1442 | |||
1443 | return status; | ||
1444 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h new file mode 100644 index 00000000..9d261aae --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h | |||
@@ -0,0 +1,60 @@ | |||
1 | /* | ||
2 | * GM20B ACR | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef __ACR_GM20B_H_ | ||
26 | #define __ACR_GM20B_H_ | ||
27 | |||
28 | #define GM20B_PMU_UCODE_IMAGE "gpmu_ucode_image.bin" | ||
29 | #define GM20B_PMU_UCODE_DESC "gpmu_ucode_desc.bin" | ||
30 | #define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin" | ||
31 | #define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin" | ||
32 | #define GM20B_PMU_UCODE_SIG "pmu_sig.bin" | ||
33 | #define GM20B_FECS_UCODE_SIG "fecs_sig.bin" | ||
34 | #define T18x_GPCCS_UCODE_SIG "gpccs_sig.bin" | ||
35 | |||
36 | bool gm20b_is_pmu_supported(struct gk20a *g); | ||
37 | int prepare_ucode_blob(struct gk20a *g); | ||
38 | int gm20b_bootstrap_hs_flcn(struct gk20a *g); | ||
39 | bool gm20b_is_lazy_bootstrap(u32 falcon_id); | ||
40 | bool gm20b_is_priv_load(u32 falcon_id); | ||
41 | void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf); | ||
42 | int gm20b_alloc_blob_space(struct gk20a *g, size_t size, struct nvgpu_mem *mem); | ||
43 | int gm20b_pmu_populate_loader_cfg(struct gk20a *g, | ||
44 | void *lsfm, u32 *p_bl_gen_desc_size); | ||
45 | int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g, | ||
46 | void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid); | ||
47 | int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms); | ||
48 | int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout); | ||
49 | int gm20b_init_pmu_setup_hw1(struct gk20a *g, void *desc, u32 bl_sz); | ||
50 | |||
51 | int gm20b_pmu_setup_sw(struct gk20a *g); | ||
52 | int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt); | ||
53 | int gm20b_init_nspmu_setup_hw1(struct gk20a *g); | ||
54 | int acr_ucode_patch_sig(struct gk20a *g, | ||
55 | unsigned int *p_img, | ||
56 | unsigned int *p_prod_sig, | ||
57 | unsigned int *p_dbg_sig, | ||
58 | unsigned int *p_patch_loc, | ||
59 | unsigned int *p_patch_ind); | ||
60 | #endif /*__ACR_GM20B_H_*/ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c new file mode 100644 index 00000000..34c8d4b7 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * GM20B MMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/timers.h> | ||
26 | #include <nvgpu/bus.h> | ||
27 | #include <nvgpu/mm.h> | ||
28 | |||
29 | #include "bus_gm20b.h" | ||
30 | #include "gk20a/gk20a.h" | ||
31 | #include "gk20a/bus_gk20a.h" | ||
32 | |||
33 | #include <nvgpu/hw/gm20b/hw_bus_gm20b.h> | ||
34 | |||
35 | int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | ||
36 | { | ||
37 | struct nvgpu_timeout timeout; | ||
38 | int err = 0; | ||
39 | u64 iova = nvgpu_inst_block_addr(g, bar1_inst); | ||
40 | u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v()); | ||
41 | |||
42 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | ||
43 | |||
44 | gk20a_writel(g, bus_bar1_block_r(), | ||
45 | nvgpu_aperture_mask(g, bar1_inst, | ||
46 | bus_bar1_block_target_sys_mem_ncoh_f(), | ||
47 | bus_bar1_block_target_vid_mem_f()) | | ||
48 | bus_bar1_block_mode_virtual_f() | | ||
49 | bus_bar1_block_ptr_f(ptr_v)); | ||
50 | nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); | ||
51 | do { | ||
52 | u32 val = gk20a_readl(g, bus_bind_status_r()); | ||
53 | u32 pending = bus_bind_status_bar1_pending_v(val); | ||
54 | u32 outstanding = bus_bind_status_bar1_outstanding_v(val); | ||
55 | if (!pending && !outstanding) | ||
56 | break; | ||
57 | |||
58 | nvgpu_udelay(5); | ||
59 | } while (!nvgpu_timeout_expired(&timeout)); | ||
60 | |||
61 | if (nvgpu_timeout_peek_expired(&timeout)) | ||
62 | err = -EINVAL; | ||
63 | |||
64 | return err; | ||
65 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.h b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h new file mode 100644 index 00000000..961b906a --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * GM20B BUS | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVGPU_GM20B_BUS | ||
26 | #define _NVGPU_GM20B_BUS | ||
27 | |||
28 | struct gk20a; | ||
29 | struct nvgpu_mem; | ||
30 | |||
31 | int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst); | ||
32 | |||
33 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c new file mode 100644 index 00000000..61d3b6f5 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c | |||
@@ -0,0 +1,1605 @@ | |||
1 | /* | ||
2 | * GM20B Clocks | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "clk_gm20b.h" | ||
27 | |||
28 | #include <nvgpu/soc.h> | ||
29 | #include <nvgpu/fuse.h> | ||
30 | #include <nvgpu/bug.h> | ||
31 | |||
32 | #include <nvgpu/hw/gm20b/hw_trim_gm20b.h> | ||
33 | #include <nvgpu/hw/gm20b/hw_timer_gm20b.h> | ||
34 | #include <nvgpu/hw/gm20b/hw_therm_gm20b.h> | ||
35 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | ||
36 | |||
37 | #define gk20a_dbg_clk(fmt, arg...) \ | ||
38 | gk20a_dbg(gpu_dbg_clk, fmt, ##arg) | ||
39 | |||
40 | #define DFS_DET_RANGE 6 /* -2^6 ... 2^6-1 */ | ||
41 | #define SDM_DIN_RANGE 12 /* -2^12 ... 2^12-1 */ | ||
42 | #define DFS_TESTOUT_DET BIT(0) | ||
43 | #define DFS_EXT_CAL_EN BIT(9) | ||
44 | #define DFS_EXT_STROBE BIT(16) | ||
45 | |||
46 | #define BOOT_GPU_UV_B1 1000000 /* gpu rail boot voltage 1.0V */ | ||
47 | #define BOOT_GPU_UV_C1 800000 /* gpu rail boot voltage 0.8V */ | ||
48 | #define ADC_SLOPE_UV 10000 /* default ADC detection slope 10mV */ | ||
49 | |||
50 | #define DVFS_SAFE_MARGIN 10 /* 10% */ | ||
51 | |||
52 | static struct pll_parms gpc_pll_params_b1 = { | ||
53 | 128000, 2600000, /* freq */ | ||
54 | 1300000, 2600000, /* vco */ | ||
55 | 12000, 38400, /* u */ | ||
56 | 1, 255, /* M */ | ||
57 | 8, 255, /* N */ | ||
58 | 1, 31, /* PL */ | ||
59 | -165230, 214007, /* DFS_COEFF */ | ||
60 | 0, 0, /* ADC char coeff - to be read from fuses */ | ||
61 | 0x7 << 3, /* vco control in NA mode */ | ||
62 | 500, /* Locking and ramping timeout */ | ||
63 | 40, /* Lock delay in NA mode */ | ||
64 | 5, /* IDDQ mode exit delay */ | ||
65 | }; | ||
66 | |||
67 | static struct pll_parms gpc_pll_params_c1 = { | ||
68 | 76800, 2600000, /* freq */ | ||
69 | 1300000, 2600000, /* vco */ | ||
70 | 19200, 38400, /* u */ | ||
71 | 1, 255, /* M */ | ||
72 | 8, 255, /* N */ | ||
73 | 1, 31, /* PL */ | ||
74 | -172550, 195374, /* DFS_COEFF */ | ||
75 | 0, 0, /* ADC char coeff - to be read from fuses */ | ||
76 | (0x1 << 3) | 0x7, /* vco control in NA mode */ | ||
77 | 500, /* Locking and ramping timeout */ | ||
78 | 40, /* Lock delay in NA mode */ | ||
79 | 5, /* IDDQ mode exit delay */ | ||
80 | 0x3 << 10, /* DFS control settings */ | ||
81 | }; | ||
82 | |||
83 | static struct pll_parms gpc_pll_params; | ||
84 | |||
85 | static void clk_setup_slide(struct gk20a *g, u32 clk_u); | ||
86 | |||
87 | #define DUMP_REG(addr_func) \ | ||
88 | do { \ | ||
89 | addr = trim_sys_##addr_func##_r(); \ | ||
90 | data = gk20a_readl(g, addr); \ | ||
91 | pr_info(#addr_func "[0x%x] = 0x%x\n", addr, data); \ | ||
92 | } while (0) | ||
93 | |||
94 | static void dump_gpc_pll(struct gk20a *g, struct pll *gpll, u32 last_cfg) | ||
95 | { | ||
96 | u32 addr, data; | ||
97 | |||
98 | pr_info("**** GPCPLL DUMP ****"); | ||
99 | pr_info("gpcpll s/w M=%u N=%u P=%u\n", gpll->M, gpll->N, gpll->PL); | ||
100 | pr_info("gpcpll_cfg_last = 0x%x\n", last_cfg); | ||
101 | DUMP_REG(gpcpll_cfg); | ||
102 | DUMP_REG(gpcpll_coeff); | ||
103 | DUMP_REG(sel_vco); | ||
104 | pr_info("\n"); | ||
105 | } | ||
106 | |||
107 | #define PLDIV_GLITCHLESS 1 | ||
108 | |||
109 | #if PLDIV_GLITCHLESS | ||
110 | /* | ||
111 | * Post divider tarnsition is glitchless only if there is common "1" in binary | ||
112 | * representation of old and new settings. | ||
113 | */ | ||
114 | static u32 get_interim_pldiv(struct gk20a *g, u32 old_pl, u32 new_pl) | ||
115 | { | ||
116 | u32 pl; | ||
117 | |||
118 | if ((g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) || (old_pl & new_pl)) | ||
119 | return 0; | ||
120 | |||
121 | pl = old_pl | BIT(ffs(new_pl) - 1); /* pl never 0 */ | ||
122 | new_pl |= BIT(ffs(old_pl) - 1); | ||
123 | |||
124 | return min(pl, new_pl); | ||
125 | } | ||
126 | #endif | ||
127 | |||
128 | /* Calculate and update M/N/PL as well as pll->freq | ||
129 | ref_clk_f = clk_in_f; | ||
130 | u_f = ref_clk_f / M; | ||
131 | vco_f = u_f * N = ref_clk_f * N / M; | ||
132 | PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL); | ||
133 | gpcclk = gpc2clk / 2; */ | ||
134 | static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll, | ||
135 | struct pll_parms *pll_params, u32 *target_freq, bool best_fit) | ||
136 | { | ||
137 | u32 min_vco_f, max_vco_f; | ||
138 | u32 best_M, best_N; | ||
139 | u32 low_PL, high_PL, best_PL; | ||
140 | u32 m, n, n2; | ||
141 | u32 target_vco_f, vco_f; | ||
142 | u32 ref_clk_f, target_clk_f, u_f; | ||
143 | u32 delta, lwv, best_delta = ~0; | ||
144 | u32 pl; | ||
145 | |||
146 | BUG_ON(target_freq == NULL); | ||
147 | |||
148 | gk20a_dbg_fn("request target freq %d MHz", *target_freq); | ||
149 | |||
150 | ref_clk_f = pll->clk_in; | ||
151 | target_clk_f = *target_freq; | ||
152 | max_vco_f = pll_params->max_vco; | ||
153 | min_vco_f = pll_params->min_vco; | ||
154 | best_M = pll_params->max_M; | ||
155 | best_N = pll_params->min_N; | ||
156 | best_PL = pll_params->min_PL; | ||
157 | |||
158 | target_vco_f = target_clk_f + target_clk_f / 50; | ||
159 | if (max_vco_f < target_vco_f) | ||
160 | max_vco_f = target_vco_f; | ||
161 | |||
162 | /* Set PL search boundaries. */ | ||
163 | high_PL = nvgpu_div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f); | ||
164 | high_PL = min(high_PL, pll_params->max_PL); | ||
165 | high_PL = max(high_PL, pll_params->min_PL); | ||
166 | |||
167 | low_PL = nvgpu_div_to_pl(min_vco_f / target_vco_f); | ||
168 | low_PL = min(low_PL, pll_params->max_PL); | ||
169 | low_PL = max(low_PL, pll_params->min_PL); | ||
170 | |||
171 | gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)", | ||
172 | low_PL, nvgpu_pl_to_div(low_PL), high_PL, nvgpu_pl_to_div(high_PL)); | ||
173 | |||
174 | for (pl = low_PL; pl <= high_PL; pl++) { | ||
175 | target_vco_f = target_clk_f * nvgpu_pl_to_div(pl); | ||
176 | |||
177 | for (m = pll_params->min_M; m <= pll_params->max_M; m++) { | ||
178 | u_f = ref_clk_f / m; | ||
179 | |||
180 | if (u_f < pll_params->min_u) | ||
181 | break; | ||
182 | if (u_f > pll_params->max_u) | ||
183 | continue; | ||
184 | |||
185 | n = (target_vco_f * m) / ref_clk_f; | ||
186 | n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; | ||
187 | |||
188 | if (n > pll_params->max_N) | ||
189 | break; | ||
190 | |||
191 | for (; n <= n2; n++) { | ||
192 | if (n < pll_params->min_N) | ||
193 | continue; | ||
194 | if (n > pll_params->max_N) | ||
195 | break; | ||
196 | |||
197 | vco_f = ref_clk_f * n / m; | ||
198 | |||
199 | if (vco_f >= min_vco_f && vco_f <= max_vco_f) { | ||
200 | lwv = (vco_f + (nvgpu_pl_to_div(pl) / 2)) | ||
201 | / nvgpu_pl_to_div(pl); | ||
202 | delta = abs(lwv - target_clk_f); | ||
203 | |||
204 | if (delta < best_delta) { | ||
205 | best_delta = delta; | ||
206 | best_M = m; | ||
207 | best_N = n; | ||
208 | best_PL = pl; | ||
209 | |||
210 | if (best_delta == 0 || | ||
211 | /* 0.45% for non best fit */ | ||
212 | (!best_fit && (vco_f / best_delta > 218))) { | ||
213 | goto found_match; | ||
214 | } | ||
215 | |||
216 | gk20a_dbg_info("delta %d @ M %d, N %d, PL %d", | ||
217 | delta, m, n, pl); | ||
218 | } | ||
219 | } | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 | found_match: | ||
225 | BUG_ON(best_delta == ~0U); | ||
226 | |||
227 | if (best_fit && best_delta != 0) | ||
228 | gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll", | ||
229 | target_clk_f); | ||
230 | |||
231 | pll->M = best_M; | ||
232 | pll->N = best_N; | ||
233 | pll->PL = best_PL; | ||
234 | |||
235 | /* save current frequency */ | ||
236 | pll->freq = ref_clk_f * pll->N / (pll->M * nvgpu_pl_to_div(pll->PL)); | ||
237 | |||
238 | *target_freq = pll->freq; | ||
239 | |||
240 | gk20a_dbg_clk("actual target freq %d kHz, M %d, N %d, PL %d(div%d)", | ||
241 | *target_freq, pll->M, pll->N, pll->PL, nvgpu_pl_to_div(pll->PL)); | ||
242 | |||
243 | gk20a_dbg_fn("done"); | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | /* GPCPLL NA/DVFS mode methods */ | ||
249 | |||
250 | static inline int fuse_get_gpcpll_adc_rev(u32 val) | ||
251 | { | ||
252 | return (val >> 30) & 0x3; | ||
253 | } | ||
254 | |||
255 | static inline int fuse_get_gpcpll_adc_slope_uv(u32 val) | ||
256 | { | ||
257 | /* Integer part in mV * 1000 + fractional part in uV */ | ||
258 | return ((val >> 24) & 0x3f) * 1000 + ((val >> 14) & 0x3ff); | ||
259 | } | ||
260 | |||
261 | static inline int fuse_get_gpcpll_adc_intercept_uv(u32 val) | ||
262 | { | ||
263 | /* Integer part in mV * 1000 + fractional part in 100uV */ | ||
264 | return ((val >> 4) & 0x3ff) * 1000 + ((val >> 0) & 0xf) * 100; | ||
265 | } | ||
266 | |||
267 | static int nvgpu_fuse_calib_gpcpll_get_adc(struct gk20a *g, | ||
268 | int *slope_uv, int *intercept_uv) | ||
269 | { | ||
270 | u32 val; | ||
271 | int ret; | ||
272 | |||
273 | ret = nvgpu_tegra_fuse_read_reserved_calib(g, &val); | ||
274 | if (ret) | ||
275 | return ret; | ||
276 | |||
277 | if (!fuse_get_gpcpll_adc_rev(val)) | ||
278 | return -EINVAL; | ||
279 | |||
280 | *slope_uv = fuse_get_gpcpll_adc_slope_uv(val); | ||
281 | *intercept_uv = fuse_get_gpcpll_adc_intercept_uv(val); | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | #ifdef CONFIG_TEGRA_USE_NA_GPCPLL | ||
286 | static bool nvgpu_fuse_can_use_na_gpcpll(struct gk20a *g) | ||
287 | { | ||
288 | return nvgpu_tegra_get_gpu_speedo_id(g); | ||
289 | } | ||
290 | #endif | ||
291 | |||
292 | /* | ||
293 | * Read ADC characteristic parmeters from fuses. | ||
294 | * Determine clibration settings. | ||
295 | */ | ||
296 | static int clk_config_calibration_params(struct gk20a *g) | ||
297 | { | ||
298 | int slope, offs; | ||
299 | struct pll_parms *p = &gpc_pll_params; | ||
300 | |||
301 | if (!nvgpu_fuse_calib_gpcpll_get_adc(g, &slope, &offs)) { | ||
302 | p->uvdet_slope = slope; | ||
303 | p->uvdet_offs = offs; | ||
304 | } | ||
305 | |||
306 | if (!p->uvdet_slope || !p->uvdet_offs) { | ||
307 | /* | ||
308 | * If ADC conversion slope/offset parameters are not fused | ||
309 | * (non-production config), report error, but allow to use | ||
310 | * boot internal calibration with default slope. | ||
311 | */ | ||
312 | nvgpu_err(g, "ADC coeff are not fused"); | ||
313 | return -EINVAL; | ||
314 | } | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Determine DFS_COEFF for the requested voltage. Always select external | ||
320 | * calibration override equal to the voltage, and set maximum detection | ||
321 | * limit "0" (to make sure that PLL output remains under F/V curve when | ||
322 | * voltage increases). | ||
323 | */ | ||
324 | static void clk_config_dvfs_detection(int mv, struct na_dvfs *d) | ||
325 | { | ||
326 | u32 coeff, coeff_max; | ||
327 | struct pll_parms *p = &gpc_pll_params; | ||
328 | |||
329 | coeff_max = trim_sys_gpcpll_dvfs0_dfs_coeff_v( | ||
330 | trim_sys_gpcpll_dvfs0_dfs_coeff_m()); | ||
331 | coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs; | ||
332 | coeff = DIV_ROUND_CLOSEST(coeff, 1000); | ||
333 | coeff = min(coeff, coeff_max); | ||
334 | d->dfs_coeff = coeff; | ||
335 | |||
336 | d->dfs_ext_cal = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs, | ||
337 | p->uvdet_slope); | ||
338 | BUG_ON(abs(d->dfs_ext_cal) >= (1 << DFS_DET_RANGE)); | ||
339 | d->uv_cal = p->uvdet_offs + d->dfs_ext_cal * p->uvdet_slope; | ||
340 | d->dfs_det_max = 0; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Solve equation for integer and fractional part of the effective NDIV: | ||
345 | * | ||
346 | * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) + | ||
347 | * DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE | ||
348 | * | ||
349 | * The SDM_DIN LSB is finally shifted out, since it is not accessible by s/w. | ||
350 | */ | ||
351 | static void clk_config_dvfs_ndiv(int mv, u32 n_eff, struct na_dvfs *d) | ||
352 | { | ||
353 | int n, det_delta; | ||
354 | u32 rem, rem_range; | ||
355 | struct pll_parms *p = &gpc_pll_params; | ||
356 | |||
357 | det_delta = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs, | ||
358 | p->uvdet_slope); | ||
359 | det_delta -= d->dfs_ext_cal; | ||
360 | det_delta = min(det_delta, d->dfs_det_max); | ||
361 | det_delta = det_delta * d->dfs_coeff; | ||
362 | |||
363 | n = (int)(n_eff << DFS_DET_RANGE) - det_delta; | ||
364 | BUG_ON((n < 0) || (n > (int)(p->max_N << DFS_DET_RANGE))); | ||
365 | d->n_int = ((u32)n) >> DFS_DET_RANGE; | ||
366 | |||
367 | rem = ((u32)n) & ((1 << DFS_DET_RANGE) - 1); | ||
368 | rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE; | ||
369 | d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE); | ||
370 | d->sdm_din = (d->sdm_din >> BITS_PER_BYTE) & 0xff; | ||
371 | } | ||
372 | |||
373 | /* Voltage dependent configuration */ | ||
374 | static void clk_config_dvfs(struct gk20a *g, struct pll *gpll) | ||
375 | { | ||
376 | struct na_dvfs *d = &gpll->dvfs; | ||
377 | |||
378 | d->mv = g->ops.clk.predict_mv_at_hz_cur_tfloor(&g->clk, | ||
379 | rate_gpc2clk_to_gpu(gpll->freq)); | ||
380 | |||
381 | clk_config_dvfs_detection(d->mv, d); | ||
382 | clk_config_dvfs_ndiv(d->mv, gpll->N, d); | ||
383 | } | ||
384 | |||
385 | /* Update DVFS detection settings in flight */ | ||
386 | static void clk_set_dfs_coeff(struct gk20a *g, u32 dfs_coeff) | ||
387 | { | ||
388 | u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
389 | data |= DFS_EXT_STROBE; | ||
390 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
391 | |||
392 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r()); | ||
393 | data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(), | ||
394 | trim_sys_gpcpll_dvfs0_dfs_coeff_f(dfs_coeff)); | ||
395 | gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data); | ||
396 | |||
397 | data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
398 | nvgpu_udelay(1); | ||
399 | data &= ~DFS_EXT_STROBE; | ||
400 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
401 | } | ||
402 | |||
403 | static void __maybe_unused clk_set_dfs_det_max(struct gk20a *g, u32 dfs_det_max) | ||
404 | { | ||
405 | u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
406 | data |= DFS_EXT_STROBE; | ||
407 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
408 | |||
409 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r()); | ||
410 | data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(), | ||
411 | trim_sys_gpcpll_dvfs0_dfs_det_max_f(dfs_det_max)); | ||
412 | gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data); | ||
413 | |||
414 | data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
415 | nvgpu_udelay(1); | ||
416 | data &= ~DFS_EXT_STROBE; | ||
417 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
418 | } | ||
419 | |||
420 | static void clk_set_dfs_ext_cal(struct gk20a *g, u32 dfs_det_cal) | ||
421 | { | ||
422 | u32 data, ctrl; | ||
423 | |||
424 | data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
425 | data &= ~(BIT(DFS_DET_RANGE + 1) - 1); | ||
426 | data |= dfs_det_cal & (BIT(DFS_DET_RANGE + 1) - 1); | ||
427 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
428 | |||
429 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
430 | nvgpu_udelay(1); | ||
431 | ctrl = trim_sys_gpcpll_dvfs1_dfs_ctrl_v(data); | ||
432 | if (~ctrl & DFS_EXT_CAL_EN) { | ||
433 | data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(), | ||
434 | trim_sys_gpcpll_dvfs1_dfs_ctrl_f( | ||
435 | ctrl | DFS_EXT_CAL_EN | DFS_TESTOUT_DET)); | ||
436 | gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | static void clk_setup_dvfs_detection(struct gk20a *g, struct pll *gpll) | ||
441 | { | ||
442 | struct na_dvfs *d = &gpll->dvfs; | ||
443 | |||
444 | u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
445 | data |= DFS_EXT_STROBE; | ||
446 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
447 | |||
448 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r()); | ||
449 | data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(), | ||
450 | trim_sys_gpcpll_dvfs0_dfs_coeff_f(d->dfs_coeff)); | ||
451 | data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(), | ||
452 | trim_sys_gpcpll_dvfs0_dfs_det_max_f(d->dfs_det_max)); | ||
453 | gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data); | ||
454 | |||
455 | data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r()); | ||
456 | nvgpu_udelay(1); | ||
457 | data &= ~DFS_EXT_STROBE; | ||
458 | gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data); | ||
459 | |||
460 | clk_set_dfs_ext_cal(g, d->dfs_ext_cal); | ||
461 | } | ||
462 | |||
463 | /* Enable NA/DVFS mode */ | ||
464 | static int clk_enbale_pll_dvfs(struct gk20a *g) | ||
465 | { | ||
466 | u32 data, cfg = 0; | ||
467 | int delay = gpc_pll_params.iddq_exit_delay; /* iddq & calib delay */ | ||
468 | struct pll_parms *p = &gpc_pll_params; | ||
469 | bool calibrated = p->uvdet_slope && p->uvdet_offs; | ||
470 | |||
471 | /* Enable NA DVFS */ | ||
472 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
473 | data |= trim_sys_gpcpll_dvfs1_en_dfs_m(); | ||
474 | gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data); | ||
475 | |||
476 | /* Set VCO_CTRL */ | ||
477 | if (p->vco_ctrl) { | ||
478 | data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
479 | data = set_field(data, trim_sys_gpcpll_cfg3_vco_ctrl_m(), | ||
480 | trim_sys_gpcpll_cfg3_vco_ctrl_f(p->vco_ctrl)); | ||
481 | gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); | ||
482 | } | ||
483 | |||
484 | /* Set NA mode DFS control */ | ||
485 | if (p->dfs_ctrl) { | ||
486 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
487 | data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(), | ||
488 | trim_sys_gpcpll_dvfs1_dfs_ctrl_f(p->dfs_ctrl)); | ||
489 | gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data); | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * If calibration parameters are known (either from fuses, or from | ||
494 | * internal calibration on boot) - use them. Internal calibration is | ||
495 | * started anyway; it will complete, but results will not be used. | ||
496 | */ | ||
497 | if (calibrated) { | ||
498 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
499 | data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m(); | ||
500 | gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data); | ||
501 | } | ||
502 | |||
503 | /* Exit IDDQ mode */ | ||
504 | data = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
505 | data = set_field(data, trim_sys_gpcpll_cfg_iddq_m(), | ||
506 | trim_sys_gpcpll_cfg_iddq_power_on_v()); | ||
507 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), data); | ||
508 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
509 | nvgpu_udelay(delay); | ||
510 | |||
511 | /* | ||
512 | * Dynamic ramp setup based on update rate, which in DVFS mode on GM20b | ||
513 | * is always 38.4 MHz, the same as reference clock rate. | ||
514 | */ | ||
515 | clk_setup_slide(g, g->clk.gpc_pll.clk_in); | ||
516 | |||
517 | if (calibrated) | ||
518 | return 0; | ||
519 | |||
520 | /* | ||
521 | * If calibration parameters are not fused, start internal calibration, | ||
522 | * wait for completion, and use results along with default slope to | ||
523 | * calculate ADC offset during boot. | ||
524 | */ | ||
525 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
526 | data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m(); | ||
527 | gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data); | ||
528 | |||
529 | /* C1 PLL must be enabled to read internal calibration results */ | ||
530 | if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) { | ||
531 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
532 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
533 | trim_sys_gpcpll_cfg_enable_yes_f()); | ||
534 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
535 | } | ||
536 | |||
537 | /* Wait for internal calibration done (spec < 2us). */ | ||
538 | do { | ||
539 | data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r()); | ||
540 | if (trim_sys_gpcpll_dvfs1_dfs_cal_done_v(data)) | ||
541 | break; | ||
542 | nvgpu_udelay(1); | ||
543 | delay--; | ||
544 | } while (delay > 0); | ||
545 | |||
546 | /* Read calibration results */ | ||
547 | data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
548 | data = trim_sys_gpcpll_cfg3_dfs_testout_v(data); | ||
549 | |||
550 | if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) { | ||
551 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
552 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
553 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
554 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
555 | } | ||
556 | |||
557 | if (delay <= 0) { | ||
558 | nvgpu_err(g, "GPCPLL calibration timeout"); | ||
559 | return -ETIMEDOUT; | ||
560 | } | ||
561 | |||
562 | p->uvdet_offs = g->clk.pll_poweron_uv - data * ADC_SLOPE_UV; | ||
563 | p->uvdet_slope = ADC_SLOPE_UV; | ||
564 | return 0; | ||
565 | } | ||
566 | |||
567 | /* GPCPLL slide methods */ | ||
568 | static void clk_setup_slide(struct gk20a *g, u32 clk_u) | ||
569 | { | ||
570 | u32 data, step_a, step_b; | ||
571 | |||
572 | switch (clk_u) { | ||
573 | case 12000: | ||
574 | case 12800: | ||
575 | case 13000: /* only on FPGA */ | ||
576 | step_a = 0x2B; | ||
577 | step_b = 0x0B; | ||
578 | break; | ||
579 | case 19200: | ||
580 | step_a = 0x12; | ||
581 | step_b = 0x08; | ||
582 | break; | ||
583 | case 38400: | ||
584 | step_a = 0x04; | ||
585 | step_b = 0x05; | ||
586 | break; | ||
587 | default: | ||
588 | nvgpu_err(g, "Unexpected reference rate %u kHz", clk_u); | ||
589 | BUG(); | ||
590 | } | ||
591 | |||
592 | /* setup */ | ||
593 | data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
594 | data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), | ||
595 | trim_sys_gpcpll_cfg2_pll_stepa_f(step_a)); | ||
596 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); | ||
597 | data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
598 | data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), | ||
599 | trim_sys_gpcpll_cfg3_pll_stepb_f(step_b)); | ||
600 | gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); | ||
601 | } | ||
602 | |||
603 | static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll) | ||
604 | { | ||
605 | u32 data, coeff; | ||
606 | u32 nold, sdm_old; | ||
607 | int ramp_timeout = gpc_pll_params.lock_timeout; | ||
608 | |||
609 | /* get old coefficients */ | ||
610 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
611 | nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
612 | |||
613 | /* do nothing if NDIV is same */ | ||
614 | if (gpll->mode == GPC_PLL_MODE_DVFS) { | ||
615 | /* in DVFS mode check both integer and fraction */ | ||
616 | coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
617 | sdm_old = trim_sys_gpcpll_cfg2_sdm_din_v(coeff); | ||
618 | if ((gpll->dvfs.n_int == nold) && | ||
619 | (gpll->dvfs.sdm_din == sdm_old)) | ||
620 | return 0; | ||
621 | } else { | ||
622 | if (gpll->N == nold) | ||
623 | return 0; | ||
624 | |||
625 | /* dynamic ramp setup based on update rate */ | ||
626 | clk_setup_slide(g, gpll->clk_in / gpll->M); | ||
627 | } | ||
628 | |||
629 | /* pll slowdown mode */ | ||
630 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
631 | data = set_field(data, | ||
632 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
633 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); | ||
634 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
635 | |||
636 | /* new ndiv ready for ramp */ | ||
637 | if (gpll->mode == GPC_PLL_MODE_DVFS) { | ||
638 | /* in DVFS mode SDM is updated via "new" field */ | ||
639 | coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
640 | coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_new_m(), | ||
641 | trim_sys_gpcpll_cfg2_sdm_din_new_f(gpll->dvfs.sdm_din)); | ||
642 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff); | ||
643 | |||
644 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
645 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), | ||
646 | trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int)); | ||
647 | nvgpu_udelay(1); | ||
648 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
649 | } else { | ||
650 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
651 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), | ||
652 | trim_sys_gpcpll_coeff_ndiv_f(gpll->N)); | ||
653 | nvgpu_udelay(1); | ||
654 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
655 | } | ||
656 | |||
657 | /* dynamic ramp to new ndiv */ | ||
658 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
659 | data = set_field(data, | ||
660 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
661 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); | ||
662 | nvgpu_udelay(1); | ||
663 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
664 | |||
665 | do { | ||
666 | nvgpu_udelay(1); | ||
667 | ramp_timeout--; | ||
668 | data = gk20a_readl( | ||
669 | g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); | ||
670 | if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) | ||
671 | break; | ||
672 | } while (ramp_timeout > 0); | ||
673 | |||
674 | if ((gpll->mode == GPC_PLL_MODE_DVFS) && (ramp_timeout > 0)) { | ||
675 | /* in DVFS mode complete SDM update */ | ||
676 | coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
677 | coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(), | ||
678 | trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din)); | ||
679 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff); | ||
680 | } | ||
681 | |||
682 | /* exit slowdown mode */ | ||
683 | data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
684 | data = set_field(data, | ||
685 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), | ||
686 | trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); | ||
687 | data = set_field(data, | ||
688 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), | ||
689 | trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); | ||
690 | gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); | ||
691 | gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); | ||
692 | |||
693 | if (ramp_timeout <= 0) { | ||
694 | nvgpu_err(g, "gpcpll dynamic ramp timeout"); | ||
695 | return -ETIMEDOUT; | ||
696 | } | ||
697 | return 0; | ||
698 | } | ||
699 | |||
700 | /* GPCPLL bypass methods */ | ||
701 | static int clk_change_pldiv_under_bypass(struct gk20a *g, struct pll *gpll) | ||
702 | { | ||
703 | u32 data, coeff; | ||
704 | |||
705 | /* put PLL in bypass before programming it */ | ||
706 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
707 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
708 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
709 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
710 | |||
711 | /* change PLDIV */ | ||
712 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
713 | nvgpu_udelay(1); | ||
714 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(), | ||
715 | trim_sys_gpcpll_coeff_pldiv_f(gpll->PL)); | ||
716 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
717 | |||
718 | /* put PLL back on vco */ | ||
719 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
720 | nvgpu_udelay(1); | ||
721 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
722 | trim_sys_sel_vco_gpc2clk_out_vco_f()); | ||
723 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll) | ||
729 | { | ||
730 | u32 data, cfg, coeff, timeout; | ||
731 | |||
732 | /* put PLL in bypass before programming it */ | ||
733 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
734 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
735 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
736 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
737 | |||
738 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
739 | nvgpu_udelay(1); | ||
740 | if (trim_sys_gpcpll_cfg_iddq_v(cfg)) { | ||
741 | /* get out from IDDQ (1st power up) */ | ||
742 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(), | ||
743 | trim_sys_gpcpll_cfg_iddq_power_on_v()); | ||
744 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
745 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
746 | nvgpu_udelay(gpc_pll_params.iddq_exit_delay); | ||
747 | } else { | ||
748 | /* clear SYNC_MODE before disabling PLL */ | ||
749 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(), | ||
750 | trim_sys_gpcpll_cfg_sync_mode_disable_f()); | ||
751 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
752 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
753 | |||
754 | /* disable running PLL before changing coefficients */ | ||
755 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
756 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
757 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
758 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
759 | } | ||
760 | |||
761 | /* change coefficients */ | ||
762 | if (gpll->mode == GPC_PLL_MODE_DVFS) { | ||
763 | clk_setup_dvfs_detection(g, gpll); | ||
764 | |||
765 | coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); | ||
766 | coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(), | ||
767 | trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din)); | ||
768 | gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff); | ||
769 | |||
770 | coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) | | ||
771 | trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int) | | ||
772 | trim_sys_gpcpll_coeff_pldiv_f(gpll->PL); | ||
773 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
774 | } else { | ||
775 | coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) | | ||
776 | trim_sys_gpcpll_coeff_ndiv_f(gpll->N) | | ||
777 | trim_sys_gpcpll_coeff_pldiv_f(gpll->PL); | ||
778 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
779 | } | ||
780 | |||
781 | /* enable PLL after changing coefficients */ | ||
782 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
783 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
784 | trim_sys_gpcpll_cfg_enable_yes_f()); | ||
785 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
786 | |||
787 | /* just delay in DVFS mode (lock cannot be used) */ | ||
788 | if (gpll->mode == GPC_PLL_MODE_DVFS) { | ||
789 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
790 | nvgpu_udelay(gpc_pll_params.na_lock_delay); | ||
791 | gk20a_dbg_clk("NA config_pll under bypass: %u (%u) kHz %d mV", | ||
792 | gpll->freq, gpll->freq / 2, | ||
793 | (trim_sys_gpcpll_cfg3_dfs_testout_v( | ||
794 | gk20a_readl(g, trim_sys_gpcpll_cfg3_r())) | ||
795 | * gpc_pll_params.uvdet_slope | ||
796 | + gpc_pll_params.uvdet_offs) / 1000); | ||
797 | goto pll_locked; | ||
798 | } | ||
799 | |||
800 | /* lock pll */ | ||
801 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
802 | if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){ | ||
803 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(), | ||
804 | trim_sys_gpcpll_cfg_enb_lckdet_power_on_f()); | ||
805 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
806 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
807 | } | ||
808 | |||
809 | /* wait pll lock */ | ||
810 | timeout = gpc_pll_params.lock_timeout + 1; | ||
811 | do { | ||
812 | nvgpu_udelay(1); | ||
813 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
814 | if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f()) | ||
815 | goto pll_locked; | ||
816 | } while (--timeout > 0); | ||
817 | |||
818 | /* PLL is messed up. What can we do here? */ | ||
819 | dump_gpc_pll(g, gpll, cfg); | ||
820 | BUG(); | ||
821 | return -EBUSY; | ||
822 | |||
823 | pll_locked: | ||
824 | gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x", | ||
825 | trim_sys_gpcpll_cfg_r(), cfg); | ||
826 | |||
827 | /* set SYNC_MODE for glitchless switch out of bypass */ | ||
828 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(), | ||
829 | trim_sys_gpcpll_cfg_sync_mode_enable_f()); | ||
830 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
831 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
832 | |||
833 | /* put PLL back on vco */ | ||
834 | data = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
835 | data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(), | ||
836 | trim_sys_sel_vco_gpc2clk_out_vco_f()); | ||
837 | gk20a_writel(g, trim_sys_sel_vco_r(), data); | ||
838 | |||
839 | return 0; | ||
840 | } | ||
841 | |||
842 | /* | ||
843 | * Change GPCPLL frequency: | ||
844 | * - in legacy (non-DVFS) mode | ||
845 | * - in DVFS mode at constant DVFS detection settings, matching current/lower | ||
846 | * voltage; the same procedure can be used in this case, since maximum DVFS | ||
847 | * detection limit makes sure that PLL output remains under F/V curve when | ||
848 | * voltage increases arbitrary. | ||
849 | */ | ||
850 | static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new, | ||
851 | int allow_slide) | ||
852 | { | ||
853 | u32 cfg, coeff, data; | ||
854 | bool can_slide, pldiv_only; | ||
855 | struct pll gpll; | ||
856 | |||
857 | gk20a_dbg_fn(""); | ||
858 | |||
859 | if (!nvgpu_platform_is_silicon(g)) | ||
860 | return 0; | ||
861 | |||
862 | /* get old coefficients */ | ||
863 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
864 | gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
865 | gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff); | ||
866 | gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff); | ||
867 | gpll.clk_in = gpll_new->clk_in; | ||
868 | |||
869 | /* combine target dvfs with old coefficients */ | ||
870 | gpll.dvfs = gpll_new->dvfs; | ||
871 | gpll.mode = gpll_new->mode; | ||
872 | |||
873 | /* do NDIV slide if there is no change in M and PL */ | ||
874 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
875 | can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg); | ||
876 | |||
877 | if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL)) | ||
878 | return clk_slide_gpc_pll(g, gpll_new); | ||
879 | |||
880 | /* slide down to NDIV_LO */ | ||
881 | if (can_slide) { | ||
882 | int ret; | ||
883 | gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco, | ||
884 | gpll.clk_in); | ||
885 | if (gpll.mode == GPC_PLL_MODE_DVFS) | ||
886 | clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs); | ||
887 | ret = clk_slide_gpc_pll(g, &gpll); | ||
888 | if (ret) | ||
889 | return ret; | ||
890 | } | ||
891 | pldiv_only = can_slide && (gpll_new->M == gpll.M); | ||
892 | |||
893 | /* | ||
894 | * Split FO-to-bypass jump in halfs by setting out divider 1:2. | ||
895 | * (needed even if PLDIV_GLITCHLESS is set, since 1:1 <=> 1:2 direct | ||
896 | * transition is not really glitch-less - see get_interim_pldiv | ||
897 | * function header). | ||
898 | */ | ||
899 | if ((gpll_new->PL < 2) || (gpll.PL < 2)) { | ||
900 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
901 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
902 | trim_sys_gpc2clk_out_vcodiv_f(2)); | ||
903 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
904 | /* Intentional 2nd write to assure linear divider operation */ | ||
905 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
906 | gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
907 | nvgpu_udelay(2); | ||
908 | } | ||
909 | |||
910 | #if PLDIV_GLITCHLESS | ||
911 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
912 | if (pldiv_only) { | ||
913 | /* Insert interim PLDIV state if necessary */ | ||
914 | u32 interim_pl = get_interim_pldiv(g, gpll_new->PL, gpll.PL); | ||
915 | if (interim_pl) { | ||
916 | coeff = set_field(coeff, | ||
917 | trim_sys_gpcpll_coeff_pldiv_m(), | ||
918 | trim_sys_gpcpll_coeff_pldiv_f(interim_pl)); | ||
919 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
920 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
921 | } | ||
922 | goto set_pldiv; /* path A: no need to bypass */ | ||
923 | } | ||
924 | |||
925 | /* path B: bypass if either M changes or PLL is disabled */ | ||
926 | #endif | ||
927 | /* | ||
928 | * Program and lock pll under bypass. On exit PLL is out of bypass, | ||
929 | * enabled, and locked. VCO is at vco_min if sliding is allowed. | ||
930 | * Otherwise it is at VCO target (and therefore last slide call below | ||
931 | * is effectively NOP). PL is set to target. Output divider is engaged | ||
932 | * at 1:2 if either entry, or exit PL setting is 1:1. | ||
933 | */ | ||
934 | gpll = *gpll_new; | ||
935 | if (allow_slide) { | ||
936 | gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco, | ||
937 | gpll_new->clk_in); | ||
938 | if (gpll.mode == GPC_PLL_MODE_DVFS) | ||
939 | clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs); | ||
940 | } | ||
941 | if (pldiv_only) | ||
942 | clk_change_pldiv_under_bypass(g, &gpll); | ||
943 | else | ||
944 | clk_lock_gpc_pll_under_bypass(g, &gpll); | ||
945 | |||
946 | #if PLDIV_GLITCHLESS | ||
947 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
948 | |||
949 | set_pldiv: | ||
950 | /* coeff must be current from either path A or B */ | ||
951 | if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) { | ||
952 | coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(), | ||
953 | trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL)); | ||
954 | gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); | ||
955 | } | ||
956 | #endif | ||
957 | /* restore out divider 1:1 */ | ||
958 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
959 | if ((data & trim_sys_gpc2clk_out_vcodiv_m()) != | ||
960 | trim_sys_gpc2clk_out_vcodiv_by1_f()) { | ||
961 | data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(), | ||
962 | trim_sys_gpc2clk_out_vcodiv_by1_f()); | ||
963 | nvgpu_udelay(2); | ||
964 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
965 | /* Intentional 2nd write to assure linear divider operation */ | ||
966 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
967 | gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
968 | } | ||
969 | |||
970 | /* slide up to target NDIV */ | ||
971 | return clk_slide_gpc_pll(g, gpll_new); | ||
972 | } | ||
973 | |||
974 | /* Find GPCPLL config safe at DVFS coefficient = 0, matching target frequency */ | ||
975 | static void clk_config_pll_safe_dvfs(struct gk20a *g, struct pll *gpll) | ||
976 | { | ||
977 | u32 nsafe, nmin; | ||
978 | |||
979 | if (gpll->freq > g->clk.dvfs_safe_max_freq) | ||
980 | gpll->freq = gpll->freq * (100 - DVFS_SAFE_MARGIN) / 100; | ||
981 | |||
982 | nmin = DIV_ROUND_UP(gpll->M * gpc_pll_params.min_vco, gpll->clk_in); | ||
983 | nsafe = gpll->M * gpll->freq / gpll->clk_in; | ||
984 | |||
985 | /* | ||
986 | * If safe frequency is above VCOmin, it can be used in safe PLL config | ||
987 | * as is. Since safe frequency is below both old and new frequencies, | ||
988 | * in this case all three configurations have same post divider 1:1, and | ||
989 | * direct old=>safe=>new n-sliding will be used for transitions. | ||
990 | * | ||
991 | * Otherwise, if safe frequency is below VCO min, post-divider in safe | ||
992 | * configuration (and possibly in old and/or new configurations) is | ||
993 | * above 1:1, and each old=>safe and safe=>new transitions includes | ||
994 | * sliding to/from VCOmin, as well as divider changes. To avoid extra | ||
995 | * dynamic ramps from VCOmin during old=>safe transition and to VCOmin | ||
996 | * during safe=>new transition, select nmin as safe NDIV, and set safe | ||
997 | * post divider to assure PLL output is below safe frequency | ||
998 | */ | ||
999 | if (nsafe < nmin) { | ||
1000 | gpll->PL = DIV_ROUND_UP(nmin * gpll->clk_in, | ||
1001 | gpll->M * gpll->freq); | ||
1002 | nsafe = nmin; | ||
1003 | } | ||
1004 | gpll->N = nsafe; | ||
1005 | clk_config_dvfs_ndiv(gpll->dvfs.mv, gpll->N, &gpll->dvfs); | ||
1006 | |||
1007 | gk20a_dbg_clk("safe freq %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d", | ||
1008 | gpll->freq, gpll->M, gpll->N, gpll->PL, nvgpu_pl_to_div(gpll->PL), | ||
1009 | gpll->dvfs.mv, gpll->dvfs.uv_cal / 1000, gpll->dvfs.dfs_coeff); | ||
1010 | } | ||
1011 | |||
1012 | /* Change GPCPLL frequency and DVFS detection settings in DVFS mode */ | ||
1013 | static int clk_program_na_gpc_pll(struct gk20a *g, struct pll *gpll_new, | ||
1014 | int allow_slide) | ||
1015 | { | ||
1016 | int ret; | ||
1017 | struct pll gpll_safe; | ||
1018 | struct pll *gpll_old = &g->clk.gpc_pll_last; | ||
1019 | |||
1020 | BUG_ON(gpll_new->M != 1); /* the only MDIV in NA mode */ | ||
1021 | clk_config_dvfs(g, gpll_new); | ||
1022 | |||
1023 | /* | ||
1024 | * In cases below no intermediate steps in PLL DVFS configuration are | ||
1025 | * necessary because either | ||
1026 | * - PLL DVFS will be configured under bypass directly to target, or | ||
1027 | * - voltage is not changing, so DVFS detection settings are the same | ||
1028 | */ | ||
1029 | if (!allow_slide || !gpll_new->enabled || | ||
1030 | (gpll_old->dvfs.mv == gpll_new->dvfs.mv)) | ||
1031 | return clk_program_gpc_pll(g, gpll_new, allow_slide); | ||
1032 | |||
1033 | /* | ||
1034 | * Interim step for changing DVFS detection settings: low enough | ||
1035 | * frequency to be safe at at DVFS coeff = 0. | ||
1036 | * | ||
1037 | * 1. If voltage is increasing: | ||
1038 | * - safe frequency target matches the lowest - old - frequency | ||
1039 | * - DVFS settings are still old | ||
1040 | * - Voltage already increased to new level by tegra DVFS, but maximum | ||
1041 | * detection limit assures PLL output remains under F/V curve | ||
1042 | * | ||
1043 | * 2. If voltage is decreasing: | ||
1044 | * - safe frequency target matches the lowest - new - frequency | ||
1045 | * - DVFS settings are still old | ||
1046 | * - Voltage is also old, it will be lowered by tegra DVFS afterwards | ||
1047 | * | ||
1048 | * Interim step can be skipped if old frequency is below safe minimum, | ||
1049 | * i.e., it is low enough to be safe at any voltage in operating range | ||
1050 | * with zero DVFS coefficient. | ||
1051 | */ | ||
1052 | if (gpll_old->freq > g->clk.dvfs_safe_max_freq) { | ||
1053 | if (gpll_old->dvfs.mv < gpll_new->dvfs.mv) { | ||
1054 | gpll_safe = *gpll_old; | ||
1055 | gpll_safe.dvfs.mv = gpll_new->dvfs.mv; | ||
1056 | } else { | ||
1057 | gpll_safe = *gpll_new; | ||
1058 | gpll_safe.dvfs = gpll_old->dvfs; | ||
1059 | } | ||
1060 | clk_config_pll_safe_dvfs(g, &gpll_safe); | ||
1061 | |||
1062 | ret = clk_program_gpc_pll(g, &gpll_safe, 1); | ||
1063 | if (ret) { | ||
1064 | nvgpu_err(g, "Safe dvfs program fail"); | ||
1065 | return ret; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | /* | ||
1070 | * DVFS detection settings transition: | ||
1071 | * - Set DVFS coefficient zero (safe, since already at frequency safe | ||
1072 | * at DVFS coeff = 0 for the lowest of the old/new end-points) | ||
1073 | * - Set calibration level to new voltage (safe, since DVFS coeff = 0) | ||
1074 | * - Set DVFS coefficient to match new voltage (safe, since already at | ||
1075 | * frequency safe at DVFS coeff = 0 for the lowest of the old/new | ||
1076 | * end-points. | ||
1077 | */ | ||
1078 | clk_set_dfs_coeff(g, 0); | ||
1079 | clk_set_dfs_ext_cal(g, gpll_new->dvfs.dfs_ext_cal); | ||
1080 | clk_set_dfs_coeff(g, gpll_new->dvfs.dfs_coeff); | ||
1081 | |||
1082 | gk20a_dbg_clk("config_pll %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d", | ||
1083 | gpll_new->freq, gpll_new->M, gpll_new->N, gpll_new->PL, | ||
1084 | nvgpu_pl_to_div(gpll_new->PL), | ||
1085 | max(gpll_new->dvfs.mv, gpll_old->dvfs.mv), | ||
1086 | gpll_new->dvfs.uv_cal / 1000, gpll_new->dvfs.dfs_coeff); | ||
1087 | |||
1088 | /* Finally set target rate (with DVFS detection settings already new) */ | ||
1089 | return clk_program_gpc_pll(g, gpll_new, 1); | ||
1090 | } | ||
1091 | |||
1092 | static int clk_disable_gpcpll(struct gk20a *g, int allow_slide) | ||
1093 | { | ||
1094 | u32 cfg, coeff; | ||
1095 | struct clk_gk20a *clk = &g->clk; | ||
1096 | struct pll gpll = clk->gpc_pll; | ||
1097 | |||
1098 | /* slide to VCO min */ | ||
1099 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
1100 | if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { | ||
1101 | coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
1102 | gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff); | ||
1103 | gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco, | ||
1104 | gpll.clk_in); | ||
1105 | if (gpll.mode == GPC_PLL_MODE_DVFS) | ||
1106 | clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs); | ||
1107 | clk_slide_gpc_pll(g, &gpll); | ||
1108 | } | ||
1109 | |||
1110 | /* put PLL in bypass before disabling it */ | ||
1111 | cfg = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
1112 | cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(), | ||
1113 | trim_sys_sel_vco_gpc2clk_out_bypass_f()); | ||
1114 | gk20a_writel(g, trim_sys_sel_vco_r(), cfg); | ||
1115 | |||
1116 | /* clear SYNC_MODE before disabling PLL */ | ||
1117 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
1118 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(), | ||
1119 | trim_sys_gpcpll_cfg_sync_mode_disable_f()); | ||
1120 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
1121 | |||
1122 | /* disable PLL */ | ||
1123 | cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
1124 | cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), | ||
1125 | trim_sys_gpcpll_cfg_enable_no_f()); | ||
1126 | gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); | ||
1127 | gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
1128 | |||
1129 | clk->gpc_pll.enabled = false; | ||
1130 | clk->gpc_pll_last.enabled = false; | ||
1131 | return 0; | ||
1132 | } | ||
1133 | |||
1134 | struct pll_parms *gm20b_get_gpc_pll_parms(void) | ||
1135 | { | ||
1136 | return &gpc_pll_params; | ||
1137 | } | ||
1138 | |||
1139 | int gm20b_init_clk_setup_sw(struct gk20a *g) | ||
1140 | { | ||
1141 | struct clk_gk20a *clk = &g->clk; | ||
1142 | unsigned long safe_rate; | ||
1143 | int err; | ||
1144 | |||
1145 | gk20a_dbg_fn(""); | ||
1146 | |||
1147 | err = nvgpu_mutex_init(&clk->clk_mutex); | ||
1148 | if (err) | ||
1149 | return err; | ||
1150 | |||
1151 | if (clk->sw_ready) { | ||
1152 | gk20a_dbg_fn("skip init"); | ||
1153 | return 0; | ||
1154 | } | ||
1155 | |||
1156 | if (clk->gpc_pll.id == GM20B_GPC_PLL_C1) { | ||
1157 | gpc_pll_params = gpc_pll_params_c1; | ||
1158 | if (!clk->pll_poweron_uv) | ||
1159 | clk->pll_poweron_uv = BOOT_GPU_UV_C1; | ||
1160 | } else { | ||
1161 | gpc_pll_params = gpc_pll_params_b1; | ||
1162 | if (!clk->pll_poweron_uv) | ||
1163 | clk->pll_poweron_uv = BOOT_GPU_UV_B1; | ||
1164 | } | ||
1165 | |||
1166 | clk->gpc_pll.clk_in = g->ops.clk.get_ref_clock_rate(g) / KHZ; | ||
1167 | if (clk->gpc_pll.clk_in == 0) { | ||
1168 | nvgpu_err(g, "GPCPLL reference clock is zero"); | ||
1169 | err = -EINVAL; | ||
1170 | goto fail; | ||
1171 | } | ||
1172 | |||
1173 | safe_rate = g->ops.clk.get_fmax_at_vmin_safe(clk); | ||
1174 | safe_rate = safe_rate * (100 - DVFS_SAFE_MARGIN) / 100; | ||
1175 | clk->dvfs_safe_max_freq = rate_gpu_to_gpc2clk(safe_rate); | ||
1176 | clk->gpc_pll.PL = (clk->dvfs_safe_max_freq == 0) ? 0 : | ||
1177 | DIV_ROUND_UP(gpc_pll_params.min_vco, clk->dvfs_safe_max_freq); | ||
1178 | |||
1179 | /* Initial freq: low enough to be safe at Vmin (default 1/3 VCO min) */ | ||
1180 | clk->gpc_pll.M = 1; | ||
1181 | clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, | ||
1182 | clk->gpc_pll.clk_in); | ||
1183 | clk->gpc_pll.PL = max(clk->gpc_pll.PL, 3U); | ||
1184 | clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; | ||
1185 | clk->gpc_pll.freq /= nvgpu_pl_to_div(clk->gpc_pll.PL); | ||
1186 | |||
1187 | /* | ||
1188 | * All production parts should have ADC fuses burnt. Therefore, check | ||
1189 | * ADC fuses always, regardless of whether NA mode is selected; and if | ||
1190 | * NA mode is indeed selected, and part can support it, switch to NA | ||
1191 | * mode even when ADC calibration is not fused; less accurate s/w | ||
1192 | * self-calibration will be used for those parts. | ||
1193 | */ | ||
1194 | clk_config_calibration_params(g); | ||
1195 | #ifdef CONFIG_TEGRA_USE_NA_GPCPLL | ||
1196 | if (nvgpu_fuse_can_use_na_gpcpll(g)) { | ||
1197 | /* NA mode is supported only at max update rate 38.4 MHz */ | ||
1198 | BUG_ON(clk->gpc_pll.clk_in != gpc_pll_params.max_u); | ||
1199 | clk->gpc_pll.mode = GPC_PLL_MODE_DVFS; | ||
1200 | gpc_pll_params.min_u = gpc_pll_params.max_u; | ||
1201 | } | ||
1202 | #endif | ||
1203 | |||
1204 | clk->sw_ready = true; | ||
1205 | |||
1206 | gk20a_dbg_fn("done"); | ||
1207 | nvgpu_info(g, | ||
1208 | "GPCPLL initial settings:%s M=%u, N=%u, P=%u (id = %u)", | ||
1209 | clk->gpc_pll.mode == GPC_PLL_MODE_DVFS ? " NA mode," : "", | ||
1210 | clk->gpc_pll.M, clk->gpc_pll.N, clk->gpc_pll.PL, | ||
1211 | clk->gpc_pll.id); | ||
1212 | return 0; | ||
1213 | |||
1214 | fail: | ||
1215 | nvgpu_mutex_destroy(&clk->clk_mutex); | ||
1216 | return err; | ||
1217 | } | ||
1218 | |||
1219 | |||
1220 | static int set_pll_freq(struct gk20a *g, int allow_slide); | ||
1221 | static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq); | ||
1222 | |||
1223 | int gm20b_clk_prepare(struct clk_gk20a *clk) | ||
1224 | { | ||
1225 | int ret = 0; | ||
1226 | |||
1227 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1228 | if (!clk->gpc_pll.enabled && clk->clk_hw_on) | ||
1229 | ret = set_pll_freq(clk->g, 1); | ||
1230 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1231 | return ret; | ||
1232 | } | ||
1233 | |||
1234 | void gm20b_clk_unprepare(struct clk_gk20a *clk) | ||
1235 | { | ||
1236 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1237 | if (clk->gpc_pll.enabled && clk->clk_hw_on) | ||
1238 | clk_disable_gpcpll(clk->g, 1); | ||
1239 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1240 | } | ||
1241 | |||
1242 | int gm20b_clk_is_prepared(struct clk_gk20a *clk) | ||
1243 | { | ||
1244 | return clk->gpc_pll.enabled && clk->clk_hw_on; | ||
1245 | } | ||
1246 | |||
1247 | unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate) | ||
1248 | { | ||
1249 | return rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
1250 | } | ||
1251 | |||
1252 | int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate, | ||
1253 | unsigned long parent_rate) | ||
1254 | { | ||
1255 | u32 old_freq; | ||
1256 | int ret = -ENODATA; | ||
1257 | |||
1258 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1259 | old_freq = clk->gpc_pll.freq; | ||
1260 | ret = set_pll_target(clk->g, rate_gpu_to_gpc2clk(rate), old_freq); | ||
1261 | if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on) | ||
1262 | ret = set_pll_freq(clk->g, 1); | ||
1263 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1264 | |||
1265 | return ret; | ||
1266 | } | ||
1267 | |||
1268 | long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate, | ||
1269 | unsigned long *parent_rate) | ||
1270 | { | ||
1271 | u32 freq; | ||
1272 | struct pll tmp_pll; | ||
1273 | unsigned long maxrate; | ||
1274 | struct gk20a *g = clk->g; | ||
1275 | |||
1276 | maxrate = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
1277 | if (rate > maxrate) | ||
1278 | rate = maxrate; | ||
1279 | |||
1280 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1281 | freq = rate_gpu_to_gpc2clk(rate); | ||
1282 | if (freq > gpc_pll_params.max_freq) | ||
1283 | freq = gpc_pll_params.max_freq; | ||
1284 | else if (freq < gpc_pll_params.min_freq) | ||
1285 | freq = gpc_pll_params.min_freq; | ||
1286 | |||
1287 | tmp_pll = clk->gpc_pll; | ||
1288 | clk_config_pll(clk, &tmp_pll, &gpc_pll_params, &freq, true); | ||
1289 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1290 | |||
1291 | return rate_gpc2clk_to_gpu(tmp_pll.freq); | ||
1292 | } | ||
1293 | |||
1294 | static int gm20b_init_clk_setup_hw(struct gk20a *g) | ||
1295 | { | ||
1296 | u32 data; | ||
1297 | |||
1298 | gk20a_dbg_fn(""); | ||
1299 | |||
1300 | /* LDIV: Div4 mode (required); both bypass and vco ratios 1:1 */ | ||
1301 | data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
1302 | data = set_field(data, | ||
1303 | trim_sys_gpc2clk_out_sdiv14_m() | | ||
1304 | trim_sys_gpc2clk_out_vcodiv_m() | | ||
1305 | trim_sys_gpc2clk_out_bypdiv_m(), | ||
1306 | trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() | | ||
1307 | trim_sys_gpc2clk_out_vcodiv_by1_f() | | ||
1308 | trim_sys_gpc2clk_out_bypdiv_f(0)); | ||
1309 | gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); | ||
1310 | |||
1311 | /* | ||
1312 | * Clear global bypass control; PLL is still under bypass, since SEL_VCO | ||
1313 | * is cleared by default. | ||
1314 | */ | ||
1315 | data = gk20a_readl(g, trim_sys_bypassctrl_r()); | ||
1316 | data = set_field(data, trim_sys_bypassctrl_gpcpll_m(), | ||
1317 | trim_sys_bypassctrl_gpcpll_vco_f()); | ||
1318 | gk20a_writel(g, trim_sys_bypassctrl_r(), data); | ||
1319 | |||
1320 | /* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */ | ||
1321 | data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_r()); | ||
1322 | if (!fuse_ctrl_opt_ram_svop_pdp_data_v(data)) { | ||
1323 | data = set_field(data, fuse_ctrl_opt_ram_svop_pdp_data_m(), | ||
1324 | fuse_ctrl_opt_ram_svop_pdp_data_f(0x2)); | ||
1325 | gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_r(), data); | ||
1326 | data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_override_r()); | ||
1327 | data = set_field(data, | ||
1328 | fuse_ctrl_opt_ram_svop_pdp_override_data_m(), | ||
1329 | fuse_ctrl_opt_ram_svop_pdp_override_data_yes_f()); | ||
1330 | gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_override_r(), data); | ||
1331 | } | ||
1332 | |||
1333 | /* Disable idle slow down */ | ||
1334 | data = gk20a_readl(g, therm_clk_slowdown_r(0)); | ||
1335 | data = set_field(data, therm_clk_slowdown_idle_factor_m(), | ||
1336 | therm_clk_slowdown_idle_factor_disabled_f()); | ||
1337 | gk20a_writel(g, therm_clk_slowdown_r(0), data); | ||
1338 | gk20a_readl(g, therm_clk_slowdown_r(0)); | ||
1339 | |||
1340 | if (g->clk.gpc_pll.mode == GPC_PLL_MODE_DVFS) | ||
1341 | return clk_enbale_pll_dvfs(g); | ||
1342 | |||
1343 | return 0; | ||
1344 | } | ||
1345 | |||
1346 | static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) | ||
1347 | { | ||
1348 | struct clk_gk20a *clk = &g->clk; | ||
1349 | |||
1350 | if (freq > gpc_pll_params.max_freq) | ||
1351 | freq = gpc_pll_params.max_freq; | ||
1352 | else if (freq < gpc_pll_params.min_freq) | ||
1353 | freq = gpc_pll_params.min_freq; | ||
1354 | |||
1355 | if (freq != old_freq) { | ||
1356 | /* gpc_pll.freq is changed to new value here */ | ||
1357 | if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, | ||
1358 | &freq, true)) { | ||
1359 | nvgpu_err(g, "failed to set pll target for %d", freq); | ||
1360 | return -EINVAL; | ||
1361 | } | ||
1362 | } | ||
1363 | return 0; | ||
1364 | } | ||
1365 | |||
1366 | static int set_pll_freq(struct gk20a *g, int allow_slide) | ||
1367 | { | ||
1368 | struct clk_gk20a *clk = &g->clk; | ||
1369 | int err = 0; | ||
1370 | |||
1371 | gk20a_dbg_fn("last freq: %dMHz, target freq %dMHz", | ||
1372 | clk->gpc_pll_last.freq, clk->gpc_pll.freq); | ||
1373 | |||
1374 | /* If programming with dynamic sliding failed, re-try under bypass */ | ||
1375 | if (clk->gpc_pll.mode == GPC_PLL_MODE_DVFS) { | ||
1376 | err = clk_program_na_gpc_pll(g, &clk->gpc_pll, allow_slide); | ||
1377 | if (err && allow_slide) | ||
1378 | err = clk_program_na_gpc_pll(g, &clk->gpc_pll, 0); | ||
1379 | } else { | ||
1380 | err = clk_program_gpc_pll(g, &clk->gpc_pll, allow_slide); | ||
1381 | if (err && allow_slide) | ||
1382 | err = clk_program_gpc_pll(g, &clk->gpc_pll, 0); | ||
1383 | } | ||
1384 | |||
1385 | if (!err) { | ||
1386 | clk->gpc_pll.enabled = true; | ||
1387 | clk->gpc_pll_last = clk->gpc_pll; | ||
1388 | return 0; | ||
1389 | } | ||
1390 | |||
1391 | /* | ||
1392 | * Just report error but not restore PLL since dvfs could already change | ||
1393 | * voltage even when programming failed. | ||
1394 | */ | ||
1395 | nvgpu_err(g, "failed to set pll to %d", clk->gpc_pll.freq); | ||
1396 | return err; | ||
1397 | } | ||
1398 | |||
1399 | int gm20b_init_clk_support(struct gk20a *g) | ||
1400 | { | ||
1401 | struct clk_gk20a *clk = &g->clk; | ||
1402 | u32 err; | ||
1403 | |||
1404 | gk20a_dbg_fn(""); | ||
1405 | |||
1406 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1407 | clk->clk_hw_on = true; | ||
1408 | |||
1409 | err = gm20b_init_clk_setup_hw(g); | ||
1410 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1411 | if (err) | ||
1412 | return err; | ||
1413 | |||
1414 | /* FIXME: this effectively prevents host level clock gating */ | ||
1415 | err = g->ops.clk.prepare_enable(&g->clk); | ||
1416 | if (err) | ||
1417 | return err; | ||
1418 | |||
1419 | /* The prev call may not enable PLL if gbus is unbalanced - force it */ | ||
1420 | nvgpu_mutex_acquire(&clk->clk_mutex); | ||
1421 | if (!clk->gpc_pll.enabled) | ||
1422 | err = set_pll_freq(g, 1); | ||
1423 | nvgpu_mutex_release(&clk->clk_mutex); | ||
1424 | if (err) | ||
1425 | return err; | ||
1426 | |||
1427 | if (!clk->debugfs_set && g->ops.clk.init_debugfs) { | ||
1428 | err = g->ops.clk.init_debugfs(g); | ||
1429 | if (err) | ||
1430 | return err; | ||
1431 | clk->debugfs_set = true; | ||
1432 | } | ||
1433 | |||
1434 | return err; | ||
1435 | } | ||
1436 | |||
1437 | int gm20b_suspend_clk_support(struct gk20a *g) | ||
1438 | { | ||
1439 | int ret = 0; | ||
1440 | |||
1441 | g->ops.clk.disable_unprepare(&g->clk); | ||
1442 | |||
1443 | /* The prev call may not disable PLL if gbus is unbalanced - force it */ | ||
1444 | nvgpu_mutex_acquire(&g->clk.clk_mutex); | ||
1445 | if (g->clk.gpc_pll.enabled) | ||
1446 | ret = clk_disable_gpcpll(g, 1); | ||
1447 | g->clk.clk_hw_on = false; | ||
1448 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1449 | |||
1450 | nvgpu_mutex_destroy(&g->clk.clk_mutex); | ||
1451 | |||
1452 | return ret; | ||
1453 | } | ||
1454 | |||
1455 | int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val) | ||
1456 | { | ||
1457 | struct gk20a *g = clk->g; | ||
1458 | struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); | ||
1459 | u32 det_out; | ||
1460 | int err; | ||
1461 | |||
1462 | if (clk->gpc_pll.mode != GPC_PLL_MODE_DVFS) | ||
1463 | return -ENOSYS; | ||
1464 | |||
1465 | err = gk20a_busy(g); | ||
1466 | if (err) | ||
1467 | return err; | ||
1468 | |||
1469 | nvgpu_mutex_acquire(&g->clk.clk_mutex); | ||
1470 | |||
1471 | det_out = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); | ||
1472 | det_out = trim_sys_gpcpll_cfg3_dfs_testout_v(det_out); | ||
1473 | *val = div64_u64((u64)det_out * gpc_pll_params->uvdet_slope + | ||
1474 | gpc_pll_params->uvdet_offs, 1000ULL); | ||
1475 | |||
1476 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1477 | |||
1478 | gk20a_idle(g); | ||
1479 | return 0; | ||
1480 | } | ||
1481 | |||
1482 | int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val) | ||
1483 | { | ||
1484 | struct gk20a *g = clk->g; | ||
1485 | u32 clk_slowdown, clk_slowdown_save; | ||
1486 | int err; | ||
1487 | |||
1488 | u32 ncycle = 800; /* count GPCCLK for ncycle of clkin */ | ||
1489 | u64 freq = clk->gpc_pll.clk_in; | ||
1490 | u32 count1, count2; | ||
1491 | |||
1492 | err = gk20a_busy(g); | ||
1493 | if (err) | ||
1494 | return err; | ||
1495 | |||
1496 | nvgpu_mutex_acquire(&g->clk.clk_mutex); | ||
1497 | |||
1498 | /* Disable clock slowdown during measurements */ | ||
1499 | clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0)); | ||
1500 | clk_slowdown = set_field(clk_slowdown_save, | ||
1501 | therm_clk_slowdown_idle_factor_m(), | ||
1502 | therm_clk_slowdown_idle_factor_disabled_f()); | ||
1503 | gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown); | ||
1504 | gk20a_readl(g, therm_clk_slowdown_r(0)); | ||
1505 | |||
1506 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
1507 | trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f()); | ||
1508 | gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), | ||
1509 | trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() | | ||
1510 | trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() | | ||
1511 | trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle)); | ||
1512 | /* start */ | ||
1513 | |||
1514 | /* It should take less than 25us to finish 800 cycle of 38.4MHz. | ||
1515 | * But longer than 100us delay is required here. | ||
1516 | */ | ||
1517 | gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0)); | ||
1518 | nvgpu_udelay(200); | ||
1519 | |||
1520 | count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
1521 | nvgpu_udelay(100); | ||
1522 | count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); | ||
1523 | freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2); | ||
1524 | do_div(freq, ncycle); | ||
1525 | *val = freq; | ||
1526 | |||
1527 | /* Restore clock slowdown */ | ||
1528 | gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save); | ||
1529 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1530 | |||
1531 | gk20a_idle(g); | ||
1532 | |||
1533 | if (count1 != count2) | ||
1534 | return -EBUSY; | ||
1535 | |||
1536 | return 0; | ||
1537 | } | ||
1538 | |||
1539 | int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val) | ||
1540 | { | ||
1541 | if (((reg < trim_sys_gpcpll_cfg_r()) || | ||
1542 | (reg > trim_sys_gpcpll_dvfs2_r())) && | ||
1543 | (reg != trim_sys_sel_vco_r()) && | ||
1544 | (reg != trim_sys_gpc2clk_out_r()) && | ||
1545 | (reg != trim_sys_bypassctrl_r())) | ||
1546 | return -EPERM; | ||
1547 | |||
1548 | if (reg == trim_sys_gpcpll_dvfs2_r()) | ||
1549 | reg = trim_gpc_bcast_gpcpll_dvfs2_r(); | ||
1550 | |||
1551 | nvgpu_mutex_acquire(&g->clk.clk_mutex); | ||
1552 | if (!g->clk.clk_hw_on) { | ||
1553 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1554 | return -EINVAL; | ||
1555 | } | ||
1556 | gk20a_writel(g, reg, val); | ||
1557 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1558 | |||
1559 | return 0; | ||
1560 | } | ||
1561 | |||
1562 | int gm20b_clk_get_pll_debug_data(struct gk20a *g, | ||
1563 | struct nvgpu_clk_pll_debug_data *d) | ||
1564 | { | ||
1565 | u32 reg; | ||
1566 | |||
1567 | nvgpu_mutex_acquire(&g->clk.clk_mutex); | ||
1568 | if (!g->clk.clk_hw_on) { | ||
1569 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1570 | return -EINVAL; | ||
1571 | } | ||
1572 | |||
1573 | d->trim_sys_bypassctrl_reg = trim_sys_bypassctrl_r(); | ||
1574 | d->trim_sys_bypassctrl_val = gk20a_readl(g, trim_sys_bypassctrl_r()); | ||
1575 | d->trim_sys_sel_vco_reg = trim_sys_sel_vco_r(); | ||
1576 | d->trim_sys_sel_vco_val = gk20a_readl(g, trim_sys_sel_vco_r()); | ||
1577 | d->trim_sys_gpc2clk_out_reg = trim_sys_gpc2clk_out_r(); | ||
1578 | d->trim_sys_gpc2clk_out_val = gk20a_readl(g, trim_sys_gpc2clk_out_r()); | ||
1579 | d->trim_sys_gpcpll_cfg_reg = trim_sys_gpcpll_cfg_r(); | ||
1580 | d->trim_sys_gpcpll_dvfs2_reg = trim_gpc_bcast_gpcpll_dvfs2_r(); | ||
1581 | |||
1582 | reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); | ||
1583 | d->trim_sys_gpcpll_cfg_val = reg; | ||
1584 | d->trim_sys_gpcpll_cfg_enabled = trim_sys_gpcpll_cfg_enable_v(reg); | ||
1585 | d->trim_sys_gpcpll_cfg_locked = trim_sys_gpcpll_cfg_pll_lock_v(reg); | ||
1586 | d->trim_sys_gpcpll_cfg_sync_on = trim_sys_gpcpll_cfg_sync_mode_v(reg); | ||
1587 | |||
1588 | reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); | ||
1589 | d->trim_sys_gpcpll_coeff_val = reg; | ||
1590 | d->trim_sys_gpcpll_coeff_mdiv = trim_sys_gpcpll_coeff_mdiv_v(reg); | ||
1591 | d->trim_sys_gpcpll_coeff_ndiv = trim_sys_gpcpll_coeff_ndiv_v(reg); | ||
1592 | d->trim_sys_gpcpll_coeff_pldiv = trim_sys_gpcpll_coeff_pldiv_v(reg); | ||
1593 | |||
1594 | reg = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r()); | ||
1595 | d->trim_sys_gpcpll_dvfs0_val = reg; | ||
1596 | d->trim_sys_gpcpll_dvfs0_dfs_coeff = | ||
1597 | trim_sys_gpcpll_dvfs0_dfs_coeff_v(reg); | ||
1598 | d->trim_sys_gpcpll_dvfs0_dfs_det_max = | ||
1599 | trim_sys_gpcpll_dvfs0_dfs_det_max_v(reg); | ||
1600 | d->trim_sys_gpcpll_dvfs0_dfs_dc_offset = | ||
1601 | trim_sys_gpcpll_dvfs0_dfs_dc_offset_v(reg); | ||
1602 | |||
1603 | nvgpu_mutex_release(&g->clk.clk_mutex); | ||
1604 | return 0; | ||
1605 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h new file mode 100644 index 00000000..e814ac70 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * GM20B Graphics | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | #ifndef _NVHOST_CLK_GM20B_H_ | ||
25 | #define _NVHOST_CLK_GM20B_H_ | ||
26 | |||
27 | #include <nvgpu/lock.h> | ||
28 | |||
29 | struct gk20a; | ||
30 | struct clk_gk20a; | ||
31 | |||
32 | struct nvgpu_clk_pll_debug_data { | ||
33 | u32 trim_sys_sel_vco_reg; | ||
34 | u32 trim_sys_sel_vco_val; | ||
35 | |||
36 | u32 trim_sys_gpc2clk_out_reg; | ||
37 | u32 trim_sys_gpc2clk_out_val; | ||
38 | |||
39 | u32 trim_sys_bypassctrl_reg; | ||
40 | u32 trim_sys_bypassctrl_val; | ||
41 | |||
42 | u32 trim_sys_gpcpll_cfg_reg; | ||
43 | u32 trim_sys_gpcpll_dvfs2_reg; | ||
44 | |||
45 | u32 trim_sys_gpcpll_cfg_val; | ||
46 | bool trim_sys_gpcpll_cfg_enabled; | ||
47 | bool trim_sys_gpcpll_cfg_locked; | ||
48 | bool trim_sys_gpcpll_cfg_sync_on; | ||
49 | |||
50 | u32 trim_sys_gpcpll_coeff_val; | ||
51 | u32 trim_sys_gpcpll_coeff_mdiv; | ||
52 | u32 trim_sys_gpcpll_coeff_ndiv; | ||
53 | u32 trim_sys_gpcpll_coeff_pldiv; | ||
54 | |||
55 | u32 trim_sys_gpcpll_dvfs0_val; | ||
56 | u32 trim_sys_gpcpll_dvfs0_dfs_coeff; | ||
57 | u32 trim_sys_gpcpll_dvfs0_dfs_det_max; | ||
58 | u32 trim_sys_gpcpll_dvfs0_dfs_dc_offset; | ||
59 | }; | ||
60 | |||
61 | int gm20b_init_clk_setup_sw(struct gk20a *g); | ||
62 | |||
63 | int gm20b_clk_prepare(struct clk_gk20a *clk); | ||
64 | void gm20b_clk_unprepare(struct clk_gk20a *clk); | ||
65 | int gm20b_clk_is_prepared(struct clk_gk20a *clk); | ||
66 | unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate); | ||
67 | int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate, | ||
68 | unsigned long parent_rate); | ||
69 | long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate, | ||
70 | unsigned long *parent_rate); | ||
71 | struct pll_parms *gm20b_get_gpc_pll_parms(void); | ||
72 | #ifdef CONFIG_DEBUG_FS | ||
73 | int gm20b_clk_init_debugfs(struct gk20a *g); | ||
74 | #endif | ||
75 | |||
76 | int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val); | ||
77 | int gm20b_init_clk_support(struct gk20a *g); | ||
78 | int gm20b_suspend_clk_support(struct gk20a *g); | ||
79 | int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val); | ||
80 | int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val); | ||
81 | int gm20b_clk_get_pll_debug_data(struct gk20a *g, | ||
82 | struct nvgpu_clk_pll_debug_data *d); | ||
83 | |||
84 | /* 1:1 match between post divider settings and divisor value */ | ||
85 | static inline u32 nvgpu_pl_to_div(u32 pl) | ||
86 | { | ||
87 | return pl; | ||
88 | } | ||
89 | |||
90 | static inline u32 nvgpu_div_to_pl(u32 div) | ||
91 | { | ||
92 | return div; | ||
93 | } | ||
94 | |||
95 | #endif /* _NVHOST_CLK_GM20B_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c new file mode 100644 index 00000000..1f8cc326 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c | |||
@@ -0,0 +1,195 @@ | |||
1 | /* | ||
2 | * GM20B GPC MMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/fb_gk20a.h" | ||
27 | #include "gm20b/fb_gm20b.h" | ||
28 | |||
29 | #include <nvgpu/hw/gm20b/hw_fb_gm20b.h> | ||
30 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
31 | #include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h> | ||
32 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | ||
33 | |||
34 | #define VPR_INFO_FETCH_WAIT (5) | ||
35 | #define WPR_INFO_ADDR_ALIGNMENT 0x0000000c | ||
36 | |||
37 | void fb_gm20b_init_fs_state(struct gk20a *g) | ||
38 | { | ||
39 | gk20a_dbg_info("initialize gm20b fb"); | ||
40 | |||
41 | gk20a_writel(g, fb_fbhub_num_active_ltcs_r(), | ||
42 | g->ltc_count); | ||
43 | } | ||
44 | |||
45 | void gm20b_fb_set_mmu_page_size(struct gk20a *g) | ||
46 | { | ||
47 | /* set large page size in fb */ | ||
48 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
49 | fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f(); | ||
50 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
51 | } | ||
52 | |||
53 | bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g) | ||
54 | { | ||
55 | /* set large page size in fb */ | ||
56 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
57 | fb_mmu_ctrl |= fb_mmu_ctrl_use_full_comp_tag_line_true_f(); | ||
58 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
59 | |||
60 | return true; | ||
61 | } | ||
62 | |||
63 | unsigned int gm20b_fb_compression_page_size(struct gk20a *g) | ||
64 | { | ||
65 | return SZ_128K; | ||
66 | } | ||
67 | |||
68 | unsigned int gm20b_fb_compressible_page_size(struct gk20a *g) | ||
69 | { | ||
70 | return SZ_64K; | ||
71 | } | ||
72 | |||
73 | void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g) | ||
74 | { | ||
75 | u32 val; | ||
76 | |||
77 | /* print vpr and wpr info */ | ||
78 | val = gk20a_readl(g, fb_mmu_vpr_info_r()); | ||
79 | val &= ~0x3; | ||
80 | val |= fb_mmu_vpr_info_index_addr_lo_v(); | ||
81 | gk20a_writel(g, fb_mmu_vpr_info_r(), val); | ||
82 | nvgpu_err(g, "VPR: %08x %08x %08x %08x", | ||
83 | gk20a_readl(g, fb_mmu_vpr_info_r()), | ||
84 | gk20a_readl(g, fb_mmu_vpr_info_r()), | ||
85 | gk20a_readl(g, fb_mmu_vpr_info_r()), | ||
86 | gk20a_readl(g, fb_mmu_vpr_info_r())); | ||
87 | |||
88 | val = gk20a_readl(g, fb_mmu_wpr_info_r()); | ||
89 | val &= ~0xf; | ||
90 | val |= (fb_mmu_wpr_info_index_allow_read_v()); | ||
91 | gk20a_writel(g, fb_mmu_wpr_info_r(), val); | ||
92 | nvgpu_err(g, "WPR: %08x %08x %08x %08x %08x %08x", | ||
93 | gk20a_readl(g, fb_mmu_wpr_info_r()), | ||
94 | gk20a_readl(g, fb_mmu_wpr_info_r()), | ||
95 | gk20a_readl(g, fb_mmu_wpr_info_r()), | ||
96 | gk20a_readl(g, fb_mmu_wpr_info_r()), | ||
97 | gk20a_readl(g, fb_mmu_wpr_info_r()), | ||
98 | gk20a_readl(g, fb_mmu_wpr_info_r())); | ||
99 | |||
100 | } | ||
101 | |||
102 | static int gm20b_fb_vpr_info_fetch_wait(struct gk20a *g, | ||
103 | unsigned int msec) | ||
104 | { | ||
105 | struct nvgpu_timeout timeout; | ||
106 | |||
107 | nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER); | ||
108 | |||
109 | do { | ||
110 | u32 val; | ||
111 | |||
112 | val = gk20a_readl(g, fb_mmu_vpr_info_r()); | ||
113 | if (fb_mmu_vpr_info_fetch_v(val) == | ||
114 | fb_mmu_vpr_info_fetch_false_v()) | ||
115 | return 0; | ||
116 | |||
117 | } while (!nvgpu_timeout_expired(&timeout)); | ||
118 | |||
119 | return -ETIMEDOUT; | ||
120 | } | ||
121 | |||
122 | int gm20b_fb_vpr_info_fetch(struct gk20a *g) | ||
123 | { | ||
124 | if (gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) { | ||
125 | return -ETIME; | ||
126 | } | ||
127 | |||
128 | gk20a_writel(g, fb_mmu_vpr_info_r(), | ||
129 | fb_mmu_vpr_info_fetch_true_v()); | ||
130 | |||
131 | return gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT); | ||
132 | } | ||
133 | |||
134 | void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf) | ||
135 | { | ||
136 | u32 val = 0; | ||
137 | u64 wpr_start = 0; | ||
138 | u64 wpr_end = 0; | ||
139 | |||
140 | val = gk20a_readl(g, fb_mmu_wpr_info_r()); | ||
141 | val &= ~0xF; | ||
142 | val |= fb_mmu_wpr_info_index_wpr1_addr_lo_v(); | ||
143 | gk20a_writel(g, fb_mmu_wpr_info_r(), val); | ||
144 | |||
145 | val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4; | ||
146 | wpr_start = hi32_lo32_to_u64( | ||
147 | (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)), | ||
148 | (val << WPR_INFO_ADDR_ALIGNMENT)); | ||
149 | |||
150 | val = gk20a_readl(g, fb_mmu_wpr_info_r()); | ||
151 | val &= ~0xF; | ||
152 | val |= fb_mmu_wpr_info_index_wpr1_addr_hi_v(); | ||
153 | gk20a_writel(g, fb_mmu_wpr_info_r(), val); | ||
154 | |||
155 | val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4; | ||
156 | wpr_end = hi32_lo32_to_u64( | ||
157 | (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)), | ||
158 | (val << WPR_INFO_ADDR_ALIGNMENT)); | ||
159 | |||
160 | inf->wpr_base = wpr_start; | ||
161 | inf->nonwpr_base = 0; | ||
162 | inf->size = (wpr_end - wpr_start); | ||
163 | } | ||
164 | |||
165 | bool gm20b_fb_debug_mode_enabled(struct gk20a *g) | ||
166 | { | ||
167 | u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r()); | ||
168 | return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) == | ||
169 | gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); | ||
170 | } | ||
171 | |||
172 | void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable) | ||
173 | { | ||
174 | u32 reg_val, fb_debug_ctrl, gpc_debug_ctrl; | ||
175 | |||
176 | if (enable) { | ||
177 | fb_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f(); | ||
178 | gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f(); | ||
179 | g->mmu_debug_ctrl = true; | ||
180 | } else { | ||
181 | fb_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f(); | ||
182 | gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f(); | ||
183 | g->mmu_debug_ctrl = false; | ||
184 | } | ||
185 | |||
186 | reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
187 | reg_val = set_field(reg_val, | ||
188 | fb_mmu_debug_ctrl_debug_m(), fb_debug_ctrl); | ||
189 | gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val); | ||
190 | |||
191 | reg_val = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r()); | ||
192 | reg_val = set_field(reg_val, | ||
193 | gr_gpcs_pri_mmu_debug_ctrl_debug_m(), gpc_debug_ctrl); | ||
194 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), reg_val); | ||
195 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h new file mode 100644 index 00000000..32d36f57 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * GM20B FB | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_FB | ||
26 | #define _NVHOST_GM20B_FB | ||
27 | struct gk20a; | ||
28 | |||
29 | void fb_gm20b_init_fs_state(struct gk20a *g); | ||
30 | void gm20b_fb_set_mmu_page_size(struct gk20a *g); | ||
31 | bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g); | ||
32 | unsigned int gm20b_fb_compression_page_size(struct gk20a *g); | ||
33 | unsigned int gm20b_fb_compressible_page_size(struct gk20a *g); | ||
34 | void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g); | ||
35 | void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf); | ||
36 | int gm20b_fb_vpr_info_fetch(struct gk20a *g); | ||
37 | bool gm20b_fb_debug_mode_enabled(struct gk20a *g); | ||
38 | void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable); | ||
39 | |||
40 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c new file mode 100644 index 00000000..0762e8bd --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -0,0 +1,223 @@ | |||
1 | /* | ||
2 | * GM20B Fifo | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/fifo_gk20a.h" | ||
27 | |||
28 | #include "fifo_gm20b.h" | ||
29 | |||
30 | #include <nvgpu/timers.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/atomic.h> | ||
33 | #include <nvgpu/barrier.h> | ||
34 | #include <nvgpu/mm.h> | ||
35 | |||
36 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> | ||
37 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | ||
38 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> | ||
39 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
40 | #include <nvgpu/hw/gm20b/hw_pbdma_gm20b.h> | ||
41 | |||
42 | void channel_gm20b_bind(struct channel_gk20a *c) | ||
43 | { | ||
44 | struct gk20a *g = c->g; | ||
45 | |||
46 | u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block) | ||
47 | >> ram_in_base_shift_v(); | ||
48 | |||
49 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | ||
50 | c->chid, inst_ptr); | ||
51 | |||
52 | |||
53 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), | ||
54 | ccsr_channel_inst_ptr_f(inst_ptr) | | ||
55 | nvgpu_aperture_mask(g, &c->inst_block, | ||
56 | ccsr_channel_inst_target_sys_mem_ncoh_f(), | ||
57 | ccsr_channel_inst_target_vid_mem_f()) | | ||
58 | ccsr_channel_inst_bind_true_f()); | ||
59 | |||
60 | gk20a_writel(g, ccsr_channel_r(c->chid), | ||
61 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | ||
62 | ~ccsr_channel_enable_set_f(~0)) | | ||
63 | ccsr_channel_enable_set_true_f()); | ||
64 | nvgpu_smp_wmb(); | ||
65 | nvgpu_atomic_set(&c->bound, true); | ||
66 | } | ||
67 | |||
68 | static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id) | ||
69 | { | ||
70 | u32 fault_id = ~0; | ||
71 | struct fifo_engine_info_gk20a *engine_info; | ||
72 | |||
73 | engine_info = gk20a_fifo_get_engine_info(g, engine_id); | ||
74 | |||
75 | if (engine_info) { | ||
76 | fault_id = engine_info->fault_id; | ||
77 | } else { | ||
78 | nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id); | ||
79 | } | ||
80 | return fault_id; | ||
81 | } | ||
82 | |||
83 | void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, | ||
84 | unsigned long engine_ids) | ||
85 | { | ||
86 | unsigned long delay = GR_IDLE_CHECK_DEFAULT; | ||
87 | unsigned long engine_id; | ||
88 | int ret = -EBUSY; | ||
89 | struct nvgpu_timeout timeout; | ||
90 | |||
91 | /* trigger faults for all bad engines */ | ||
92 | for_each_set_bit(engine_id, &engine_ids, 32) { | ||
93 | if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) { | ||
94 | nvgpu_err(g, "faulting unknown engine %ld", engine_id); | ||
95 | } else { | ||
96 | u32 mmu_id = gm20b_engine_id_to_mmu_id(g, | ||
97 | engine_id); | ||
98 | if (mmu_id != (u32)~0) | ||
99 | gk20a_writel(g, fifo_trigger_mmu_fault_r(mmu_id), | ||
100 | fifo_trigger_mmu_fault_enable_f(1)); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
105 | NVGPU_TIMER_CPU_TIMER); | ||
106 | |||
107 | /* Wait for MMU fault to trigger */ | ||
108 | do { | ||
109 | if (gk20a_readl(g, fifo_intr_0_r()) & | ||
110 | fifo_intr_0_mmu_fault_pending_f()) { | ||
111 | ret = 0; | ||
112 | break; | ||
113 | } | ||
114 | |||
115 | nvgpu_usleep_range(delay, delay * 2); | ||
116 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
117 | } while (!nvgpu_timeout_expired(&timeout)); | ||
118 | |||
119 | if (ret) | ||
120 | nvgpu_err(g, "mmu fault timeout"); | ||
121 | |||
122 | /* release mmu fault trigger */ | ||
123 | for_each_set_bit(engine_id, &engine_ids, 32) | ||
124 | gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0); | ||
125 | } | ||
126 | |||
127 | u32 gm20b_fifo_get_num_fifos(struct gk20a *g) | ||
128 | { | ||
129 | return ccsr_channel__size_1_v(); | ||
130 | } | ||
131 | |||
132 | void gm20b_device_info_data_parse(struct gk20a *g, | ||
133 | u32 table_entry, u32 *inst_id, | ||
134 | u32 *pri_base, u32 *fault_id) | ||
135 | { | ||
136 | if (top_device_info_data_type_v(table_entry) == | ||
137 | top_device_info_data_type_enum2_v()) { | ||
138 | if (pri_base) { | ||
139 | *pri_base = | ||
140 | (top_device_info_data_pri_base_v(table_entry) | ||
141 | << top_device_info_data_pri_base_align_v()); | ||
142 | } | ||
143 | if (fault_id && (top_device_info_data_fault_id_v(table_entry) == | ||
144 | top_device_info_data_fault_id_valid_v())) { | ||
145 | *fault_id = | ||
146 | top_device_info_data_fault_id_enum_v(table_entry); | ||
147 | } | ||
148 | } else | ||
149 | nvgpu_err(g, "unknown device_info_data %d", | ||
150 | top_device_info_data_type_v(table_entry)); | ||
151 | } | ||
152 | |||
153 | void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f) | ||
154 | { | ||
155 | /* | ||
156 | * These are all errors which indicate something really wrong | ||
157 | * going on in the device. | ||
158 | */ | ||
159 | f->intr.pbdma.device_fatal_0 = | ||
160 | pbdma_intr_0_memreq_pending_f() | | ||
161 | pbdma_intr_0_memack_timeout_pending_f() | | ||
162 | pbdma_intr_0_memack_extra_pending_f() | | ||
163 | pbdma_intr_0_memdat_timeout_pending_f() | | ||
164 | pbdma_intr_0_memdat_extra_pending_f() | | ||
165 | pbdma_intr_0_memflush_pending_f() | | ||
166 | pbdma_intr_0_memop_pending_f() | | ||
167 | pbdma_intr_0_lbconnect_pending_f() | | ||
168 | pbdma_intr_0_lback_timeout_pending_f() | | ||
169 | pbdma_intr_0_lback_extra_pending_f() | | ||
170 | pbdma_intr_0_lbdat_timeout_pending_f() | | ||
171 | pbdma_intr_0_lbdat_extra_pending_f() | | ||
172 | pbdma_intr_0_pri_pending_f(); | ||
173 | |||
174 | /* | ||
175 | * These are data parsing, framing errors or others which can be | ||
176 | * recovered from with intervention... or just resetting the | ||
177 | * channel | ||
178 | */ | ||
179 | f->intr.pbdma.channel_fatal_0 = | ||
180 | pbdma_intr_0_gpfifo_pending_f() | | ||
181 | pbdma_intr_0_gpptr_pending_f() | | ||
182 | pbdma_intr_0_gpentry_pending_f() | | ||
183 | pbdma_intr_0_gpcrc_pending_f() | | ||
184 | pbdma_intr_0_pbptr_pending_f() | | ||
185 | pbdma_intr_0_pbentry_pending_f() | | ||
186 | pbdma_intr_0_pbcrc_pending_f() | | ||
187 | pbdma_intr_0_method_pending_f() | | ||
188 | pbdma_intr_0_methodcrc_pending_f() | | ||
189 | pbdma_intr_0_pbseg_pending_f() | | ||
190 | pbdma_intr_0_signature_pending_f(); | ||
191 | |||
192 | /* Can be used for sw-methods, or represents a recoverable timeout. */ | ||
193 | f->intr.pbdma.restartable_0 = | ||
194 | pbdma_intr_0_device_pending_f(); | ||
195 | } | ||
196 | |||
197 | static void gm20b_fifo_set_ctx_reload(struct channel_gk20a *ch) | ||
198 | { | ||
199 | struct gk20a *g = ch->g; | ||
200 | u32 channel = gk20a_readl(g, ccsr_channel_r(ch->chid)); | ||
201 | |||
202 | gk20a_writel(g, ccsr_channel_r(ch->chid), | ||
203 | channel | ccsr_channel_force_ctx_reload_true_f()); | ||
204 | } | ||
205 | |||
206 | void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch) | ||
207 | { | ||
208 | struct gk20a *g = ch->g; | ||
209 | struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; | ||
210 | struct channel_gk20a *temp_ch; | ||
211 | |||
212 | /* If CTX_RELOAD is set on a channel, move it to some other channel */ | ||
213 | if (gk20a_fifo_channel_status_is_ctx_reload(ch->g, ch->chid)) { | ||
214 | nvgpu_rwsem_down_read(&tsg->ch_list_lock); | ||
215 | nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, channel_gk20a, ch_entry) { | ||
216 | if (temp_ch->chid != ch->chid) { | ||
217 | gm20b_fifo_set_ctx_reload(temp_ch); | ||
218 | break; | ||
219 | } | ||
220 | } | ||
221 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); | ||
222 | } | ||
223 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h new file mode 100644 index 00000000..8d487358 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * GM20B Fifo | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_FIFO | ||
26 | #define _NVHOST_GM20B_FIFO | ||
27 | struct gk20a; | ||
28 | |||
29 | void channel_gm20b_bind(struct channel_gk20a *c); | ||
30 | void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, | ||
31 | unsigned long engine_ids); | ||
32 | u32 gm20b_fifo_get_num_fifos(struct gk20a *g); | ||
33 | void gm20b_device_info_data_parse(struct gk20a *g, | ||
34 | u32 table_entry, u32 *inst_id, | ||
35 | u32 *pri_base, u32 *fault_id); | ||
36 | void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f); | ||
37 | void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch); | ||
38 | |||
39 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c new file mode 100644 index 00000000..0ebb2d0d --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c | |||
@@ -0,0 +1,731 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * This file is autogenerated. Do not edit. | ||
23 | */ | ||
24 | |||
25 | #ifndef __gm20b_gating_reglist_h__ | ||
26 | #define __gm20b_gating_reglist_h__ | ||
27 | |||
28 | #include "gm20b_gating_reglist.h" | ||
29 | #include <nvgpu/enabled.h> | ||
30 | |||
31 | struct gating_desc { | ||
32 | u32 addr; | ||
33 | u32 prod; | ||
34 | u32 disable; | ||
35 | }; | ||
36 | /* slcg bus */ | ||
37 | static const struct gating_desc gm20b_slcg_bus[] = { | ||
38 | {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000003fe}, | ||
39 | }; | ||
40 | |||
41 | /* slcg ce2 */ | ||
42 | static const struct gating_desc gm20b_slcg_ce2[] = { | ||
43 | {.addr = 0x00106f28, .prod = 0x00000000, .disable = 0x000007fe}, | ||
44 | }; | ||
45 | |||
46 | /* slcg chiplet */ | ||
47 | static const struct gating_desc gm20b_slcg_chiplet[] = { | ||
48 | {.addr = 0x0010c07c, .prod = 0x00000000, .disable = 0x00000007}, | ||
49 | {.addr = 0x0010e07c, .prod = 0x00000000, .disable = 0x00000007}, | ||
50 | {.addr = 0x0010d07c, .prod = 0x00000000, .disable = 0x00000007}, | ||
51 | {.addr = 0x0010e17c, .prod = 0x00000000, .disable = 0x00000007}, | ||
52 | }; | ||
53 | |||
54 | /* slcg fb */ | ||
55 | static const struct gating_desc gm20b_slcg_fb[] = { | ||
56 | {.addr = 0x00100d14, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
57 | {.addr = 0x00100c9c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
58 | }; | ||
59 | |||
60 | /* slcg fifo */ | ||
61 | static const struct gating_desc gm20b_slcg_fifo[] = { | ||
62 | {.addr = 0x000026ac, .prod = 0x00000100, .disable = 0x0001fffe}, | ||
63 | }; | ||
64 | |||
65 | /* slcg gr */ | ||
66 | static const struct gating_desc gm20b_slcg_gr[] = { | ||
67 | {.addr = 0x004041f4, .prod = 0x00000002, .disable = 0x03fffffe}, | ||
68 | {.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe}, | ||
69 | {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe}, | ||
70 | {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe}, | ||
71 | {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
72 | {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe}, | ||
73 | {.addr = 0x00405910, .prod = 0xfffffff0, .disable = 0xfffffffe}, | ||
74 | {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe}, | ||
75 | {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000007e}, | ||
76 | {.addr = 0x0041a17c, .prod = 0x00020008, .disable = 0x0003fffe}, | ||
77 | {.addr = 0x0041a894, .prod = 0x00000040, .disable = 0x0003fffe}, | ||
78 | {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0007fffe}, | ||
79 | {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
80 | {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e}, | ||
81 | {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e}, | ||
82 | {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001}, | ||
83 | {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
84 | {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe}, | ||
85 | {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
86 | {.addr = 0x00418c74, .prod = 0xffffffc0, .disable = 0xfffffffe}, | ||
87 | {.addr = 0x00418cf4, .prod = 0xfffffffc, .disable = 0xfffffffe}, | ||
88 | {.addr = 0x00418d74, .prod = 0xffffffe0, .disable = 0xfffffffe}, | ||
89 | {.addr = 0x00418f10, .prod = 0xffffffe0, .disable = 0xfffffffe}, | ||
90 | {.addr = 0x00418e10, .prod = 0xfffffffe, .disable = 0xfffffffe}, | ||
91 | {.addr = 0x00419024, .prod = 0x000001fe, .disable = 0x000001fe}, | ||
92 | {.addr = 0x0041889c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
93 | {.addr = 0x00419d64, .prod = 0x00000000, .disable = 0x000001ff}, | ||
94 | {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e}, | ||
95 | {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe}, | ||
96 | {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e}, | ||
97 | {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e}, | ||
98 | {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe}, | ||
99 | {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e}, | ||
100 | {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e}, | ||
101 | {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e}, | ||
102 | {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e}, | ||
103 | {.addr = 0x0041986c, .prod = 0x00000104, .disable = 0x00fffffe}, | ||
104 | {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe}, | ||
105 | {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe}, | ||
106 | {.addr = 0x00419c74, .prod = 0x0000001e, .disable = 0x0000001e}, | ||
107 | {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe}, | ||
108 | {.addr = 0x00419fdc, .prod = 0xffedff00, .disable = 0xfffffffe}, | ||
109 | {.addr = 0x00419fe4, .prod = 0x00001b00, .disable = 0x00001ffe}, | ||
110 | {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe}, | ||
111 | {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
112 | {.addr = 0x0041be2c, .prod = 0x04115fc0, .disable = 0xfffffffe}, | ||
113 | {.addr = 0x0041bfec, .prod = 0xfffffff0, .disable = 0xfffffffe}, | ||
114 | {.addr = 0x0041bed4, .prod = 0xfffffff6, .disable = 0xfffffffe}, | ||
115 | {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
116 | {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
117 | {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
118 | {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
119 | {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
120 | {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
121 | {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
122 | {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
123 | {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe}, | ||
124 | {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff}, | ||
125 | }; | ||
126 | |||
127 | /* slcg ltc */ | ||
128 | static const struct gating_desc gm20b_slcg_ltc[] = { | ||
129 | {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
130 | {.addr = 0x0017e35c, .prod = 0x00000000, .disable = 0xfffffffe}, | ||
131 | }; | ||
132 | |||
133 | /* slcg perf */ | ||
134 | static const struct gating_desc gm20b_slcg_perf[] = { | ||
135 | {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
136 | {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
137 | {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000}, | ||
138 | {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000}, | ||
139 | }; | ||
140 | |||
141 | /* slcg PriRing */ | ||
142 | static const struct gating_desc gm20b_slcg_priring[] = { | ||
143 | {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001}, | ||
144 | }; | ||
145 | |||
146 | /* slcg pwr_csb */ | ||
147 | static const struct gating_desc gm20b_slcg_pwr_csb[] = { | ||
148 | {.addr = 0x0000017c, .prod = 0x00020008, .disable = 0x0003fffe}, | ||
149 | {.addr = 0x00000e74, .prod = 0x00000000, .disable = 0x0000000f}, | ||
150 | {.addr = 0x00000a74, .prod = 0x00000000, .disable = 0x00007ffe}, | ||
151 | {.addr = 0x000016b8, .prod = 0x00000000, .disable = 0x0000000f}, | ||
152 | }; | ||
153 | |||
154 | /* slcg pmu */ | ||
155 | static const struct gating_desc gm20b_slcg_pmu[] = { | ||
156 | {.addr = 0x0010a17c, .prod = 0x00020008, .disable = 0x0003fffe}, | ||
157 | {.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe}, | ||
158 | {.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f}, | ||
159 | }; | ||
160 | |||
161 | /* therm gr */ | ||
162 | static const struct gating_desc gm20b_slcg_therm[] = { | ||
163 | {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f}, | ||
164 | }; | ||
165 | |||
166 | /* slcg Xbar */ | ||
167 | static const struct gating_desc gm20b_slcg_xbar[] = { | ||
168 | {.addr = 0x0013cbe4, .prod = 0x00000000, .disable = 0x1ffffffe}, | ||
169 | {.addr = 0x0013cc04, .prod = 0x00000000, .disable = 0x1ffffffe}, | ||
170 | }; | ||
171 | |||
172 | /* blcg bus */ | ||
173 | static const struct gating_desc gm20b_blcg_bus[] = { | ||
174 | {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000}, | ||
175 | }; | ||
176 | |||
177 | /* blcg ctxsw prog */ | ||
178 | static const struct gating_desc gm20b_blcg_ctxsw_prog[] = { | ||
179 | }; | ||
180 | |||
181 | /* blcg fb */ | ||
182 | static const struct gating_desc gm20b_blcg_fb[] = { | ||
183 | {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000}, | ||
184 | {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000}, | ||
185 | {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000}, | ||
186 | {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000}, | ||
187 | {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000}, | ||
188 | }; | ||
189 | |||
190 | /* blcg fifo */ | ||
191 | static const struct gating_desc gm20b_blcg_fifo[] = { | ||
192 | {.addr = 0x000026a4, .prod = 0x0000c242, .disable = 0x00000000}, | ||
193 | }; | ||
194 | |||
195 | /* blcg gr */ | ||
196 | static const struct gating_desc gm20b_blcg_gr[] = { | ||
197 | {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000}, | ||
198 | {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000}, | ||
199 | {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000}, | ||
200 | {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000}, | ||
201 | {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000}, | ||
202 | {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000}, | ||
203 | {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000}, | ||
204 | {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000}, | ||
205 | {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000}, | ||
206 | {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000}, | ||
207 | {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000}, | ||
208 | {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000}, | ||
209 | {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000}, | ||
210 | {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000}, | ||
211 | {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000}, | ||
212 | {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000}, | ||
213 | {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000}, | ||
214 | {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000}, | ||
215 | {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000}, | ||
216 | {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000}, | ||
217 | {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000}, | ||
218 | {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000}, | ||
219 | {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000}, | ||
220 | {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000}, | ||
221 | {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000}, | ||
222 | {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000}, | ||
223 | {.addr = 0x00418898, .prod = 0x00000042, .disable = 0x00000000}, | ||
224 | {.addr = 0x00419a40, .prod = 0x00000042, .disable = 0x00000000}, | ||
225 | {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000}, | ||
226 | {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000}, | ||
227 | {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000}, | ||
228 | {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000}, | ||
229 | {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000}, | ||
230 | {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000}, | ||
231 | {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000}, | ||
232 | {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000}, | ||
233 | {.addr = 0x00419868, .prod = 0x00000042, .disable = 0x00000000}, | ||
234 | {.addr = 0x00419cd4, .prod = 0x00000002, .disable = 0x00000000}, | ||
235 | {.addr = 0x00419cdc, .prod = 0x00000002, .disable = 0x00000000}, | ||
236 | {.addr = 0x00419c70, .prod = 0x00004044, .disable = 0x00000000}, | ||
237 | {.addr = 0x00419fd0, .prod = 0x00004044, .disable = 0x00000000}, | ||
238 | {.addr = 0x00419fd8, .prod = 0x00004046, .disable = 0x00000000}, | ||
239 | {.addr = 0x00419fe0, .prod = 0x00004044, .disable = 0x00000000}, | ||
240 | {.addr = 0x00419fe8, .prod = 0x00000042, .disable = 0x00000000}, | ||
241 | {.addr = 0x00419ff0, .prod = 0x00004045, .disable = 0x00000000}, | ||
242 | {.addr = 0x00419ff8, .prod = 0x00000002, .disable = 0x00000000}, | ||
243 | {.addr = 0x00419f90, .prod = 0x00000002, .disable = 0x00000000}, | ||
244 | {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000}, | ||
245 | {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000}, | ||
246 | {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000}, | ||
247 | {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000}, | ||
248 | {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000}, | ||
249 | {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000}, | ||
250 | {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000}, | ||
251 | {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000}, | ||
252 | {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000}, | ||
253 | {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000}, | ||
254 | {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000}, | ||
255 | {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000}, | ||
256 | {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000}, | ||
257 | {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000}, | ||
258 | }; | ||
259 | |||
260 | /* blcg ltc */ | ||
261 | static const struct gating_desc gm20b_blcg_ltc[] = { | ||
262 | {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000}, | ||
263 | {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000}, | ||
264 | {.addr = 0x0017e3e0, .prod = 0x00000044, .disable = 0x00000000}, | ||
265 | {.addr = 0x0017e3c8, .prod = 0x00000044, .disable = 0x00000000}, | ||
266 | }; | ||
267 | |||
268 | /* blcg pwr_csb */ | ||
269 | static const struct gating_desc gm20b_blcg_pwr_csb[] = { | ||
270 | {.addr = 0x00000a70, .prod = 0x00000045, .disable = 0x00000000}, | ||
271 | }; | ||
272 | |||
273 | /* blcg pmu */ | ||
274 | static const struct gating_desc gm20b_blcg_pmu[] = { | ||
275 | {.addr = 0x0010aa70, .prod = 0x00000045, .disable = 0x00000000}, | ||
276 | }; | ||
277 | |||
278 | /* blcg Xbar */ | ||
279 | static const struct gating_desc gm20b_blcg_xbar[] = { | ||
280 | {.addr = 0x0013cbe0, .prod = 0x00000042, .disable = 0x00000000}, | ||
281 | {.addr = 0x0013cc00, .prod = 0x00000042, .disable = 0x00000000}, | ||
282 | }; | ||
283 | |||
284 | /* pg gr */ | ||
285 | static const struct gating_desc gm20b_pg_gr[] = { | ||
286 | }; | ||
287 | |||
288 | /* inline functions */ | ||
289 | void gm20b_slcg_bus_load_gating_prod(struct gk20a *g, | ||
290 | bool prod) | ||
291 | { | ||
292 | u32 i; | ||
293 | u32 size = sizeof(gm20b_slcg_bus) / sizeof(struct gating_desc); | ||
294 | |||
295 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
296 | return; | ||
297 | |||
298 | for (i = 0; i < size; i++) { | ||
299 | if (prod) | ||
300 | gk20a_writel(g, gm20b_slcg_bus[i].addr, | ||
301 | gm20b_slcg_bus[i].prod); | ||
302 | else | ||
303 | gk20a_writel(g, gm20b_slcg_bus[i].addr, | ||
304 | gm20b_slcg_bus[i].disable); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g, | ||
309 | bool prod) | ||
310 | { | ||
311 | u32 i; | ||
312 | u32 size = sizeof(gm20b_slcg_ce2) / sizeof(struct gating_desc); | ||
313 | |||
314 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
315 | return; | ||
316 | |||
317 | for (i = 0; i < size; i++) { | ||
318 | if (prod) | ||
319 | gk20a_writel(g, gm20b_slcg_ce2[i].addr, | ||
320 | gm20b_slcg_ce2[i].prod); | ||
321 | else | ||
322 | gk20a_writel(g, gm20b_slcg_ce2[i].addr, | ||
323 | gm20b_slcg_ce2[i].disable); | ||
324 | } | ||
325 | } | ||
326 | |||
327 | void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g, | ||
328 | bool prod) | ||
329 | { | ||
330 | u32 i; | ||
331 | u32 size = sizeof(gm20b_slcg_chiplet) / sizeof(struct gating_desc); | ||
332 | |||
333 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
334 | return; | ||
335 | |||
336 | for (i = 0; i < size; i++) { | ||
337 | if (prod) | ||
338 | gk20a_writel(g, gm20b_slcg_chiplet[i].addr, | ||
339 | gm20b_slcg_chiplet[i].prod); | ||
340 | else | ||
341 | gk20a_writel(g, gm20b_slcg_chiplet[i].addr, | ||
342 | gm20b_slcg_chiplet[i].disable); | ||
343 | } | ||
344 | } | ||
345 | |||
346 | void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g, | ||
347 | bool prod) | ||
348 | { | ||
349 | } | ||
350 | |||
351 | void gm20b_slcg_fb_load_gating_prod(struct gk20a *g, | ||
352 | bool prod) | ||
353 | { | ||
354 | u32 i; | ||
355 | u32 size = sizeof(gm20b_slcg_fb) / sizeof(struct gating_desc); | ||
356 | |||
357 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
358 | return; | ||
359 | |||
360 | for (i = 0; i < size; i++) { | ||
361 | if (prod) | ||
362 | gk20a_writel(g, gm20b_slcg_fb[i].addr, | ||
363 | gm20b_slcg_fb[i].prod); | ||
364 | else | ||
365 | gk20a_writel(g, gm20b_slcg_fb[i].addr, | ||
366 | gm20b_slcg_fb[i].disable); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g, | ||
371 | bool prod) | ||
372 | { | ||
373 | u32 i; | ||
374 | u32 size = sizeof(gm20b_slcg_fifo) / sizeof(struct gating_desc); | ||
375 | |||
376 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
377 | return; | ||
378 | |||
379 | for (i = 0; i < size; i++) { | ||
380 | if (prod) | ||
381 | gk20a_writel(g, gm20b_slcg_fifo[i].addr, | ||
382 | gm20b_slcg_fifo[i].prod); | ||
383 | else | ||
384 | gk20a_writel(g, gm20b_slcg_fifo[i].addr, | ||
385 | gm20b_slcg_fifo[i].disable); | ||
386 | } | ||
387 | } | ||
388 | |||
389 | void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g, | ||
390 | bool prod) | ||
391 | { | ||
392 | u32 i; | ||
393 | u32 size = sizeof(gm20b_slcg_gr) / sizeof(struct gating_desc); | ||
394 | |||
395 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
396 | return; | ||
397 | |||
398 | for (i = 0; i < size; i++) { | ||
399 | if (prod) | ||
400 | gk20a_writel(g, gm20b_slcg_gr[i].addr, | ||
401 | gm20b_slcg_gr[i].prod); | ||
402 | else | ||
403 | gk20a_writel(g, gm20b_slcg_gr[i].addr, | ||
404 | gm20b_slcg_gr[i].disable); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g, | ||
409 | bool prod) | ||
410 | { | ||
411 | u32 i; | ||
412 | u32 size = sizeof(gm20b_slcg_ltc) / sizeof(struct gating_desc); | ||
413 | |||
414 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
415 | return; | ||
416 | |||
417 | for (i = 0; i < size; i++) { | ||
418 | if (prod) | ||
419 | gk20a_writel(g, gm20b_slcg_ltc[i].addr, | ||
420 | gm20b_slcg_ltc[i].prod); | ||
421 | else | ||
422 | gk20a_writel(g, gm20b_slcg_ltc[i].addr, | ||
423 | gm20b_slcg_ltc[i].disable); | ||
424 | } | ||
425 | } | ||
426 | |||
427 | void gm20b_slcg_perf_load_gating_prod(struct gk20a *g, | ||
428 | bool prod) | ||
429 | { | ||
430 | u32 i; | ||
431 | u32 size = sizeof(gm20b_slcg_perf) / sizeof(struct gating_desc); | ||
432 | |||
433 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
434 | return; | ||
435 | |||
436 | for (i = 0; i < size; i++) { | ||
437 | if (prod) | ||
438 | gk20a_writel(g, gm20b_slcg_perf[i].addr, | ||
439 | gm20b_slcg_perf[i].prod); | ||
440 | else | ||
441 | gk20a_writel(g, gm20b_slcg_perf[i].addr, | ||
442 | gm20b_slcg_perf[i].disable); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | void gm20b_slcg_priring_load_gating_prod(struct gk20a *g, | ||
447 | bool prod) | ||
448 | { | ||
449 | u32 i; | ||
450 | u32 size = sizeof(gm20b_slcg_priring) / sizeof(struct gating_desc); | ||
451 | |||
452 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
453 | return; | ||
454 | |||
455 | for (i = 0; i < size; i++) { | ||
456 | if (prod) | ||
457 | gk20a_writel(g, gm20b_slcg_priring[i].addr, | ||
458 | gm20b_slcg_priring[i].prod); | ||
459 | else | ||
460 | gk20a_writel(g, gm20b_slcg_priring[i].addr, | ||
461 | gm20b_slcg_priring[i].disable); | ||
462 | } | ||
463 | } | ||
464 | |||
465 | void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g, | ||
466 | bool prod) | ||
467 | { | ||
468 | u32 i; | ||
469 | u32 size = sizeof(gm20b_slcg_pwr_csb) / sizeof(struct gating_desc); | ||
470 | |||
471 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
472 | return; | ||
473 | |||
474 | for (i = 0; i < size; i++) { | ||
475 | if (prod) | ||
476 | gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr, | ||
477 | gm20b_slcg_pwr_csb[i].prod); | ||
478 | else | ||
479 | gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr, | ||
480 | gm20b_slcg_pwr_csb[i].disable); | ||
481 | } | ||
482 | } | ||
483 | |||
484 | void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g, | ||
485 | bool prod) | ||
486 | { | ||
487 | u32 i; | ||
488 | u32 size = sizeof(gm20b_slcg_pmu) / sizeof(struct gating_desc); | ||
489 | |||
490 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
491 | return; | ||
492 | |||
493 | for (i = 0; i < size; i++) { | ||
494 | if (prod) | ||
495 | gk20a_writel(g, gm20b_slcg_pmu[i].addr, | ||
496 | gm20b_slcg_pmu[i].prod); | ||
497 | else | ||
498 | gk20a_writel(g, gm20b_slcg_pmu[i].addr, | ||
499 | gm20b_slcg_pmu[i].disable); | ||
500 | } | ||
501 | } | ||
502 | |||
503 | void gm20b_slcg_therm_load_gating_prod(struct gk20a *g, | ||
504 | bool prod) | ||
505 | { | ||
506 | u32 i; | ||
507 | u32 size = sizeof(gm20b_slcg_therm) / sizeof(struct gating_desc); | ||
508 | |||
509 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
510 | return; | ||
511 | |||
512 | for (i = 0; i < size; i++) { | ||
513 | if (prod) | ||
514 | gk20a_writel(g, gm20b_slcg_therm[i].addr, | ||
515 | gm20b_slcg_therm[i].prod); | ||
516 | else | ||
517 | gk20a_writel(g, gm20b_slcg_therm[i].addr, | ||
518 | gm20b_slcg_therm[i].disable); | ||
519 | } | ||
520 | } | ||
521 | |||
522 | void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g, | ||
523 | bool prod) | ||
524 | { | ||
525 | u32 i; | ||
526 | u32 size = sizeof(gm20b_slcg_xbar) / sizeof(struct gating_desc); | ||
527 | |||
528 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG)) | ||
529 | return; | ||
530 | |||
531 | for (i = 0; i < size; i++) { | ||
532 | if (prod) | ||
533 | gk20a_writel(g, gm20b_slcg_xbar[i].addr, | ||
534 | gm20b_slcg_xbar[i].prod); | ||
535 | else | ||
536 | gk20a_writel(g, gm20b_slcg_xbar[i].addr, | ||
537 | gm20b_slcg_xbar[i].disable); | ||
538 | } | ||
539 | } | ||
540 | |||
541 | void gm20b_blcg_bus_load_gating_prod(struct gk20a *g, | ||
542 | bool prod) | ||
543 | { | ||
544 | u32 i; | ||
545 | u32 size = sizeof(gm20b_blcg_bus) / sizeof(struct gating_desc); | ||
546 | |||
547 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
548 | return; | ||
549 | |||
550 | for (i = 0; i < size; i++) { | ||
551 | if (prod) | ||
552 | gk20a_writel(g, gm20b_blcg_bus[i].addr, | ||
553 | gm20b_blcg_bus[i].prod); | ||
554 | else | ||
555 | gk20a_writel(g, gm20b_blcg_bus[i].addr, | ||
556 | gm20b_blcg_bus[i].disable); | ||
557 | } | ||
558 | } | ||
559 | |||
560 | void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g, | ||
561 | bool prod) | ||
562 | { | ||
563 | u32 i; | ||
564 | u32 size = sizeof(gm20b_blcg_ctxsw_prog) / sizeof(struct gating_desc); | ||
565 | |||
566 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
567 | return; | ||
568 | |||
569 | for (i = 0; i < size; i++) { | ||
570 | if (prod) | ||
571 | gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr, | ||
572 | gm20b_blcg_ctxsw_prog[i].prod); | ||
573 | else | ||
574 | gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr, | ||
575 | gm20b_blcg_ctxsw_prog[i].disable); | ||
576 | } | ||
577 | } | ||
578 | |||
579 | void gm20b_blcg_fb_load_gating_prod(struct gk20a *g, | ||
580 | bool prod) | ||
581 | { | ||
582 | u32 i; | ||
583 | u32 size = sizeof(gm20b_blcg_fb) / sizeof(struct gating_desc); | ||
584 | |||
585 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
586 | return; | ||
587 | |||
588 | for (i = 0; i < size; i++) { | ||
589 | if (prod) | ||
590 | gk20a_writel(g, gm20b_blcg_fb[i].addr, | ||
591 | gm20b_blcg_fb[i].prod); | ||
592 | else | ||
593 | gk20a_writel(g, gm20b_blcg_fb[i].addr, | ||
594 | gm20b_blcg_fb[i].disable); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g, | ||
599 | bool prod) | ||
600 | { | ||
601 | u32 i; | ||
602 | u32 size = sizeof(gm20b_blcg_fifo) / sizeof(struct gating_desc); | ||
603 | |||
604 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
605 | return; | ||
606 | |||
607 | for (i = 0; i < size; i++) { | ||
608 | if (prod) | ||
609 | gk20a_writel(g, gm20b_blcg_fifo[i].addr, | ||
610 | gm20b_blcg_fifo[i].prod); | ||
611 | else | ||
612 | gk20a_writel(g, gm20b_blcg_fifo[i].addr, | ||
613 | gm20b_blcg_fifo[i].disable); | ||
614 | } | ||
615 | } | ||
616 | |||
617 | void gm20b_blcg_gr_load_gating_prod(struct gk20a *g, | ||
618 | bool prod) | ||
619 | { | ||
620 | u32 i; | ||
621 | u32 size = sizeof(gm20b_blcg_gr) / sizeof(struct gating_desc); | ||
622 | |||
623 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
624 | return; | ||
625 | |||
626 | for (i = 0; i < size; i++) { | ||
627 | if (prod) | ||
628 | gk20a_writel(g, gm20b_blcg_gr[i].addr, | ||
629 | gm20b_blcg_gr[i].prod); | ||
630 | else | ||
631 | gk20a_writel(g, gm20b_blcg_gr[i].addr, | ||
632 | gm20b_blcg_gr[i].disable); | ||
633 | } | ||
634 | } | ||
635 | |||
636 | void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g, | ||
637 | bool prod) | ||
638 | { | ||
639 | u32 i; | ||
640 | u32 size = sizeof(gm20b_blcg_ltc) / sizeof(struct gating_desc); | ||
641 | |||
642 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
643 | return; | ||
644 | |||
645 | for (i = 0; i < size; i++) { | ||
646 | if (prod) | ||
647 | gk20a_writel(g, gm20b_blcg_ltc[i].addr, | ||
648 | gm20b_blcg_ltc[i].prod); | ||
649 | else | ||
650 | gk20a_writel(g, gm20b_blcg_ltc[i].addr, | ||
651 | gm20b_blcg_ltc[i].disable); | ||
652 | } | ||
653 | } | ||
654 | |||
655 | void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g, | ||
656 | bool prod) | ||
657 | { | ||
658 | u32 i; | ||
659 | u32 size = sizeof(gm20b_blcg_pwr_csb) / sizeof(struct gating_desc); | ||
660 | |||
661 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
662 | return; | ||
663 | |||
664 | for (i = 0; i < size; i++) { | ||
665 | if (prod) | ||
666 | gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr, | ||
667 | gm20b_blcg_pwr_csb[i].prod); | ||
668 | else | ||
669 | gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr, | ||
670 | gm20b_blcg_pwr_csb[i].disable); | ||
671 | } | ||
672 | } | ||
673 | |||
674 | void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g, | ||
675 | bool prod) | ||
676 | { | ||
677 | u32 i; | ||
678 | u32 size = sizeof(gm20b_blcg_pmu) / sizeof(struct gating_desc); | ||
679 | |||
680 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
681 | return; | ||
682 | |||
683 | for (i = 0; i < size; i++) { | ||
684 | if (prod) | ||
685 | gk20a_writel(g, gm20b_blcg_pmu[i].addr, | ||
686 | gm20b_blcg_pmu[i].prod); | ||
687 | else | ||
688 | gk20a_writel(g, gm20b_blcg_pmu[i].addr, | ||
689 | gm20b_blcg_pmu[i].disable); | ||
690 | } | ||
691 | } | ||
692 | |||
693 | void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g, | ||
694 | bool prod) | ||
695 | { | ||
696 | u32 i; | ||
697 | u32 size = sizeof(gm20b_blcg_xbar) / sizeof(struct gating_desc); | ||
698 | |||
699 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
700 | return; | ||
701 | |||
702 | for (i = 0; i < size; i++) { | ||
703 | if (prod) | ||
704 | gk20a_writel(g, gm20b_blcg_xbar[i].addr, | ||
705 | gm20b_blcg_xbar[i].prod); | ||
706 | else | ||
707 | gk20a_writel(g, gm20b_blcg_xbar[i].addr, | ||
708 | gm20b_blcg_xbar[i].disable); | ||
709 | } | ||
710 | } | ||
711 | |||
712 | void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g, | ||
713 | bool prod) | ||
714 | { | ||
715 | u32 i; | ||
716 | u32 size = sizeof(gm20b_pg_gr) / sizeof(struct gating_desc); | ||
717 | |||
718 | if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG)) | ||
719 | return; | ||
720 | |||
721 | for (i = 0; i < size; i++) { | ||
722 | if (prod) | ||
723 | gk20a_writel(g, gm20b_pg_gr[i].addr, | ||
724 | gm20b_pg_gr[i].prod); | ||
725 | else | ||
726 | gk20a_writel(g, gm20b_pg_gr[i].addr, | ||
727 | gm20b_pg_gr[i].disable); | ||
728 | } | ||
729 | } | ||
730 | |||
731 | #endif /* __gm20b_gating_reglist_h__ */ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h new file mode 100644 index 00000000..557f5689 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gm20b/gm20b_gating_reglist.h | ||
3 | * | ||
4 | * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | * This file is autogenerated. Do not edit. | ||
25 | */ | ||
26 | |||
27 | #include "gk20a/gk20a.h" | ||
28 | |||
29 | void gm20b_slcg_bus_load_gating_prod(struct gk20a *g, | ||
30 | bool prod); | ||
31 | |||
32 | void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g, | ||
33 | bool prod); | ||
34 | |||
35 | void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g, | ||
36 | bool prod); | ||
37 | |||
38 | void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g, | ||
39 | bool prod); | ||
40 | |||
41 | void gm20b_slcg_fb_load_gating_prod(struct gk20a *g, | ||
42 | bool prod); | ||
43 | |||
44 | void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g, | ||
45 | bool prod); | ||
46 | |||
47 | void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g, | ||
48 | bool prod); | ||
49 | |||
50 | void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g, | ||
51 | bool prod); | ||
52 | |||
53 | void gm20b_slcg_perf_load_gating_prod(struct gk20a *g, | ||
54 | bool prod); | ||
55 | |||
56 | void gm20b_slcg_priring_load_gating_prod(struct gk20a *g, | ||
57 | bool prod); | ||
58 | |||
59 | void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g, | ||
60 | bool prod); | ||
61 | |||
62 | void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g, | ||
63 | bool prod); | ||
64 | |||
65 | void gm20b_slcg_therm_load_gating_prod(struct gk20a *g, | ||
66 | bool prod); | ||
67 | |||
68 | void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g, | ||
69 | bool prod); | ||
70 | |||
71 | void gm20b_blcg_bus_load_gating_prod(struct gk20a *g, | ||
72 | bool prod); | ||
73 | |||
74 | void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g, | ||
75 | bool prod); | ||
76 | |||
77 | void gm20b_blcg_fb_load_gating_prod(struct gk20a *g, | ||
78 | bool prod); | ||
79 | |||
80 | void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g, | ||
81 | bool prod); | ||
82 | |||
83 | void gm20b_blcg_gr_load_gating_prod(struct gk20a *g, | ||
84 | bool prod); | ||
85 | |||
86 | void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g, | ||
87 | bool prod); | ||
88 | |||
89 | void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g, | ||
90 | bool prod); | ||
91 | |||
92 | void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g, | ||
93 | bool prod); | ||
94 | |||
95 | void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g, | ||
96 | bool prod); | ||
97 | |||
98 | void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g, | ||
99 | bool prod); | ||
100 | |||
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c new file mode 100644 index 00000000..cd7433b3 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * drivers/video/tegra/host/gm20b/gr_ctx_gm20b.c | ||
3 | * | ||
4 | * GM20B Graphics Context | ||
5 | * | ||
6 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
7 | * | ||
8 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
9 | * copy of this software and associated documentation files (the "Software"), | ||
10 | * to deal in the Software without restriction, including without limitation | ||
11 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
12 | * and/or sell copies of the Software, and to permit persons to whom the | ||
13 | * Software is furnished to do so, subject to the following conditions: | ||
14 | * | ||
15 | * The above copyright notice and this permission notice shall be included in | ||
16 | * all copies or substantial portions of the Software. | ||
17 | * | ||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
24 | * DEALINGS IN THE SOFTWARE. | ||
25 | */ | ||
26 | |||
27 | #include "gk20a/gk20a.h" | ||
28 | #include "gr_ctx_gm20b.h" | ||
29 | |||
30 | int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name) | ||
31 | { | ||
32 | switch (index) { | ||
33 | #ifdef GM20B_NETLIST_IMAGE_FW_NAME | ||
34 | case NETLIST_FINAL: | ||
35 | sprintf(name, GM20B_NETLIST_IMAGE_FW_NAME); | ||
36 | return 0; | ||
37 | #endif | ||
38 | #ifdef GK20A_NETLIST_IMAGE_A | ||
39 | case NETLIST_SLOT_A: | ||
40 | sprintf(name, GK20A_NETLIST_IMAGE_A); | ||
41 | return 0; | ||
42 | #endif | ||
43 | #ifdef GK20A_NETLIST_IMAGE_B | ||
44 | case NETLIST_SLOT_B: | ||
45 | sprintf(name, GK20A_NETLIST_IMAGE_B); | ||
46 | return 0; | ||
47 | #endif | ||
48 | #ifdef GK20A_NETLIST_IMAGE_C | ||
49 | case NETLIST_SLOT_C: | ||
50 | sprintf(name, GK20A_NETLIST_IMAGE_C); | ||
51 | return 0; | ||
52 | #endif | ||
53 | #ifdef GK20A_NETLIST_IMAGE_D | ||
54 | case NETLIST_SLOT_D: | ||
55 | sprintf(name, GK20A_NETLIST_IMAGE_D); | ||
56 | return 0; | ||
57 | #endif | ||
58 | default: | ||
59 | return -1; | ||
60 | } | ||
61 | |||
62 | return -1; | ||
63 | } | ||
64 | |||
65 | bool gr_gm20b_is_firmware_defined(void) | ||
66 | { | ||
67 | #ifdef GM20B_NETLIST_IMAGE_FW_NAME | ||
68 | return true; | ||
69 | #else | ||
70 | return false; | ||
71 | #endif | ||
72 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h new file mode 100644 index 00000000..8712b353 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * GM20B Graphics Context | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | #ifndef __GR_CTX_GM20B_H__ | ||
25 | #define __GR_CTX_GM20B_H__ | ||
26 | |||
27 | #include "gk20a/gr_ctx_gk20a.h" | ||
28 | |||
29 | /* production netlist, one and only one from below */ | ||
30 | /*#undef GM20B_NETLIST_IMAGE_FW_NAME*/ | ||
31 | #define GM20B_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B | ||
32 | |||
33 | int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name); | ||
34 | bool gr_gm20b_is_firmware_defined(void); | ||
35 | |||
36 | #endif /*__GR_CTX_GM20B_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c new file mode 100644 index 00000000..ef46c1ee --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -0,0 +1,1527 @@ | |||
1 | /* | ||
2 | * GM20B GPC MMU | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <uapi/linux/nvgpu.h> | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | #include <nvgpu/debug.h> | ||
31 | #include <nvgpu/fuse.h> | ||
32 | |||
33 | #include "gk20a/gk20a.h" | ||
34 | #include "gk20a/gr_gk20a.h" | ||
35 | #include "common/linux/os_linux.h" | ||
36 | |||
37 | #include "gr_gm20b.h" | ||
38 | #include "pmu_gm20b.h" | ||
39 | |||
40 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | ||
41 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> | ||
42 | #include <nvgpu/hw/gm20b/hw_fb_gm20b.h> | ||
43 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
44 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
45 | #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> | ||
46 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | ||
47 | |||
48 | void gr_gm20b_init_gpc_mmu(struct gk20a *g) | ||
49 | { | ||
50 | u32 temp; | ||
51 | |||
52 | gk20a_dbg_info("initialize gpc mmu"); | ||
53 | |||
54 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
55 | /* Bypass MMU check for non-secure boot. For | ||
56 | * secure-boot,this register write has no-effect */ | ||
57 | gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); | ||
58 | } | ||
59 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
60 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
61 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
62 | gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() | | ||
63 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
64 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
65 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
66 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
67 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
68 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
69 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); | ||
70 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
71 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
72 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
73 | |||
74 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
75 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
76 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
77 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
78 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
79 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
80 | |||
81 | gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), | ||
82 | gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); | ||
83 | } | ||
84 | |||
85 | void gr_gm20b_bundle_cb_defaults(struct gk20a *g) | ||
86 | { | ||
87 | struct gr_gk20a *gr = &g->gr; | ||
88 | |||
89 | gr->bundle_cb_default_size = | ||
90 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
91 | gr->min_gpm_fifo_depth = | ||
92 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
93 | gr->bundle_cb_token_limit = | ||
94 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
95 | } | ||
96 | |||
97 | void gr_gm20b_cb_size_default(struct gk20a *g) | ||
98 | { | ||
99 | struct gr_gk20a *gr = &g->gr; | ||
100 | |||
101 | if (!gr->attrib_cb_default_size) | ||
102 | gr->attrib_cb_default_size = | ||
103 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
104 | gr->alpha_cb_default_size = | ||
105 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
106 | } | ||
107 | |||
108 | int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) | ||
109 | { | ||
110 | struct gr_gk20a *gr = &g->gr; | ||
111 | int size; | ||
112 | |||
113 | gr->attrib_cb_size = gr->attrib_cb_default_size | ||
114 | + (gr->attrib_cb_default_size >> 1); | ||
115 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
116 | + (gr->alpha_cb_default_size >> 1); | ||
117 | |||
118 | size = gr->attrib_cb_size * | ||
119 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * | ||
120 | gr->max_tpc_count; | ||
121 | |||
122 | size += gr->alpha_cb_size * | ||
123 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * | ||
124 | gr->max_tpc_count; | ||
125 | |||
126 | return size; | ||
127 | } | ||
128 | |||
129 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | ||
130 | struct channel_ctx_gk20a *ch_ctx, | ||
131 | u64 addr, bool patch) | ||
132 | { | ||
133 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
134 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
135 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
136 | |||
137 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
138 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
139 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
140 | |||
141 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | ||
142 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | ||
143 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | ||
144 | } | ||
145 | |||
146 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | ||
147 | struct channel_ctx_gk20a *ch_ctx, | ||
148 | u64 addr, u64 size, bool patch) | ||
149 | { | ||
150 | u32 data; | ||
151 | |||
152 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
153 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
154 | |||
155 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
156 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
157 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
158 | |||
159 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), | ||
160 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); | ||
161 | |||
162 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), | ||
163 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | | ||
164 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); | ||
165 | |||
166 | /* data for state_limit */ | ||
167 | data = (g->gr.bundle_cb_default_size * | ||
168 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
169 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
170 | |||
171 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
172 | |||
173 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
174 | g->gr.bundle_cb_token_limit, data); | ||
175 | |||
176 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
177 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
178 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
179 | |||
180 | } | ||
181 | |||
182 | int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | ||
183 | struct channel_gk20a *c, bool patch) | ||
184 | { | ||
185 | struct gr_gk20a *gr = &g->gr; | ||
186 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
187 | u32 attrib_offset_in_chunk = 0; | ||
188 | u32 alpha_offset_in_chunk = 0; | ||
189 | u32 pd_ab_max_output; | ||
190 | u32 gpc_index, ppc_index; | ||
191 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
192 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
193 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
194 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
195 | GPU_LIT_NUM_PES_PER_GPC); | ||
196 | |||
197 | gk20a_dbg_fn(""); | ||
198 | |||
199 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
200 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
201 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
202 | patch); | ||
203 | |||
204 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
205 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / | ||
206 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
207 | |||
208 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
209 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
210 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
211 | |||
212 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
213 | gr->tpc_count * gr->attrib_cb_size; | ||
214 | |||
215 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
216 | u32 temp = gpc_stride * gpc_index; | ||
217 | u32 temp2 = num_pes_per_gpc * gpc_index; | ||
218 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
219 | ppc_index++) { | ||
220 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
221 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
222 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
223 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
224 | |||
225 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
226 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | ||
227 | ppc_in_gpc_stride * ppc_index, | ||
228 | cbm_cfg_size1, patch); | ||
229 | |||
230 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
231 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | ||
232 | ppc_in_gpc_stride * ppc_index, | ||
233 | attrib_offset_in_chunk, patch); | ||
234 | |||
235 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
236 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
237 | |||
238 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
239 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | ||
240 | ppc_in_gpc_stride * ppc_index, | ||
241 | cbm_cfg_size2, patch); | ||
242 | |||
243 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
244 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | ||
245 | ppc_in_gpc_stride * ppc_index, | ||
246 | alpha_offset_in_chunk, patch); | ||
247 | |||
248 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
249 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
250 | |||
251 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
252 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), | ||
253 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | | ||
254 | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), | ||
255 | patch); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, | ||
263 | struct channel_ctx_gk20a *ch_ctx, | ||
264 | u64 addr, u32 size, bool patch) | ||
265 | { | ||
266 | gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); | ||
267 | |||
268 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(), | ||
269 | gr_gpcs_swdx_rm_pagepool_total_pages_f(size) | | ||
270 | gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch); | ||
271 | |||
272 | } | ||
273 | |||
274 | void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data) | ||
275 | { | ||
276 | u32 val; | ||
277 | |||
278 | gk20a_dbg_fn(""); | ||
279 | |||
280 | val = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); | ||
281 | val = set_field(val, gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), | ||
282 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data)); | ||
283 | gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val); | ||
284 | |||
285 | gk20a_dbg_fn("done"); | ||
286 | } | ||
287 | |||
288 | int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, | ||
289 | u32 class_num, u32 offset, u32 data) | ||
290 | { | ||
291 | gk20a_dbg_fn(""); | ||
292 | |||
293 | if (class_num == MAXWELL_COMPUTE_B) { | ||
294 | switch (offset << 2) { | ||
295 | case NVB1C0_SET_SHADER_EXCEPTIONS: | ||
296 | gk20a_gr_set_shader_exceptions(g, data); | ||
297 | break; | ||
298 | case NVB1C0_SET_RD_COALESCE: | ||
299 | gr_gm20b_set_rd_coalesce(g, data); | ||
300 | break; | ||
301 | default: | ||
302 | goto fail; | ||
303 | } | ||
304 | } | ||
305 | |||
306 | if (class_num == MAXWELL_B) { | ||
307 | switch (offset << 2) { | ||
308 | case NVB197_SET_SHADER_EXCEPTIONS: | ||
309 | gk20a_gr_set_shader_exceptions(g, data); | ||
310 | break; | ||
311 | case NVB197_SET_CIRCULAR_BUFFER_SIZE: | ||
312 | g->ops.gr.set_circular_buffer_size(g, data); | ||
313 | break; | ||
314 | case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
315 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
316 | break; | ||
317 | case NVB197_SET_RD_COALESCE: | ||
318 | gr_gm20b_set_rd_coalesce(g, data); | ||
319 | break; | ||
320 | default: | ||
321 | goto fail; | ||
322 | } | ||
323 | } | ||
324 | return 0; | ||
325 | |||
326 | fail: | ||
327 | return -EINVAL; | ||
328 | } | ||
329 | |||
330 | void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
331 | { | ||
332 | struct gr_gk20a *gr = &g->gr; | ||
333 | u32 gpc_index, ppc_index, stride, val; | ||
334 | u32 pd_ab_max_output; | ||
335 | u32 alpha_cb_size = data * 4; | ||
336 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
337 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
338 | |||
339 | gk20a_dbg_fn(""); | ||
340 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
341 | return; */ | ||
342 | |||
343 | if (alpha_cb_size > gr->alpha_cb_size) | ||
344 | alpha_cb_size = gr->alpha_cb_size; | ||
345 | |||
346 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
347 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
348 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
349 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
350 | |||
351 | pd_ab_max_output = alpha_cb_size * | ||
352 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / | ||
353 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
354 | |||
355 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
356 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
357 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | ||
358 | |||
359 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
360 | stride = gpc_stride * gpc_index; | ||
361 | |||
362 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
363 | ppc_index++) { | ||
364 | |||
365 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
366 | stride + | ||
367 | ppc_in_gpc_stride * ppc_index); | ||
368 | |||
369 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | ||
370 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | ||
371 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
372 | |||
373 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
374 | stride + | ||
375 | ppc_in_gpc_stride * ppc_index, val); | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
381 | { | ||
382 | struct gr_gk20a *gr = &g->gr; | ||
383 | u32 gpc_index, ppc_index, stride, val; | ||
384 | u32 cb_size = data * 4; | ||
385 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
386 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
387 | |||
388 | gk20a_dbg_fn(""); | ||
389 | |||
390 | if (cb_size > gr->attrib_cb_size) | ||
391 | cb_size = gr->attrib_cb_size; | ||
392 | |||
393 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
394 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
395 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
396 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
397 | |||
398 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
399 | stride = gpc_stride * gpc_index; | ||
400 | |||
401 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
402 | ppc_index++) { | ||
403 | |||
404 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
405 | stride + | ||
406 | ppc_in_gpc_stride * ppc_index); | ||
407 | |||
408 | val = set_field(val, | ||
409 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | ||
410 | gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * | ||
411 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
412 | |||
413 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
414 | stride + | ||
415 | ppc_in_gpc_stride * ppc_index, val); | ||
416 | |||
417 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
418 | ppc_index + gpc_index)); | ||
419 | |||
420 | val = set_field(val, | ||
421 | gr_gpcs_swdx_tc_beta_cb_size_v_m(), | ||
422 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * | ||
423 | gr->gpc_ppc_count[gpc_index])); | ||
424 | val = set_field(val, | ||
425 | gr_gpcs_swdx_tc_beta_cb_size_div3_m(), | ||
426 | gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * | ||
427 | gr->gpc_ppc_count[gpc_index])/3)); | ||
428 | |||
429 | gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
430 | ppc_index + gpc_index), val); | ||
431 | } | ||
432 | } | ||
433 | } | ||
434 | |||
435 | void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) | ||
436 | { | ||
437 | /* setup sm warp esr report masks */ | ||
438 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
439 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
440 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
441 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
442 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
443 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
444 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
445 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
446 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
447 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
448 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
449 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
450 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
451 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
452 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
453 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
454 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
455 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
456 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
457 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() | | ||
458 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() | | ||
459 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
460 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
461 | |||
462 | /* setup sm global esr report mask */ | ||
463 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
464 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
465 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); | ||
466 | } | ||
467 | |||
468 | bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | ||
469 | { | ||
470 | bool valid = false; | ||
471 | |||
472 | switch (class_num) { | ||
473 | case MAXWELL_COMPUTE_B: | ||
474 | case MAXWELL_B: | ||
475 | case FERMI_TWOD_A: | ||
476 | case KEPLER_DMA_COPY_A: | ||
477 | case MAXWELL_DMA_COPY_A: | ||
478 | valid = true; | ||
479 | break; | ||
480 | |||
481 | default: | ||
482 | break; | ||
483 | } | ||
484 | |||
485 | return valid; | ||
486 | } | ||
487 | |||
488 | bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
489 | { | ||
490 | if (class_num == MAXWELL_B) | ||
491 | return true; | ||
492 | else | ||
493 | return false; | ||
494 | } | ||
495 | |||
496 | bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
497 | { | ||
498 | if (class_num == MAXWELL_COMPUTE_B) | ||
499 | return true; | ||
500 | else | ||
501 | return false; | ||
502 | } | ||
503 | |||
504 | |||
505 | /* Following are the blocks of registers that the ucode | ||
506 | stores in the extended region.*/ | ||
507 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
508 | static const u32 _num_sm_dsm_perf_regs; | ||
509 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
510 | static const u32 _num_sm_dsm_perf_ctrl_regs = 2; | ||
511 | static u32 *_sm_dsm_perf_regs; | ||
512 | static u32 _sm_dsm_perf_ctrl_regs[2]; | ||
513 | |||
514 | void gr_gm20b_init_sm_dsm_reg_info(void) | ||
515 | { | ||
516 | if (_sm_dsm_perf_ctrl_regs[0] != 0) | ||
517 | return; | ||
518 | |||
519 | _sm_dsm_perf_ctrl_regs[0] = | ||
520 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); | ||
521 | _sm_dsm_perf_ctrl_regs[1] = | ||
522 | gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); | ||
523 | } | ||
524 | |||
525 | void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | ||
526 | u32 *num_sm_dsm_perf_regs, | ||
527 | u32 **sm_dsm_perf_regs, | ||
528 | u32 *perf_register_stride) | ||
529 | { | ||
530 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
531 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
532 | *perf_register_stride = 0; | ||
533 | } | ||
534 | |||
535 | void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
536 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
537 | u32 **sm_dsm_perf_ctrl_regs, | ||
538 | u32 *ctrl_register_stride) | ||
539 | { | ||
540 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
541 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
542 | |||
543 | *ctrl_register_stride = | ||
544 | ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
545 | } | ||
546 | |||
547 | u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
548 | { | ||
549 | u32 val; | ||
550 | struct gr_gk20a *gr = &g->gr; | ||
551 | |||
552 | /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ | ||
553 | val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index)); | ||
554 | |||
555 | return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); | ||
556 | } | ||
557 | |||
558 | void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
559 | { | ||
560 | nvgpu_tegra_fuse_write_bypass(g, 0x1); | ||
561 | nvgpu_tegra_fuse_write_access_sw(g, 0x0); | ||
562 | |||
563 | if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) { | ||
564 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); | ||
565 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1); | ||
566 | } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) { | ||
567 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); | ||
568 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); | ||
569 | } else { | ||
570 | nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); | ||
571 | nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | void gr_gm20b_load_tpc_mask(struct gk20a *g) | ||
576 | { | ||
577 | u32 pes_tpc_mask = 0, fuse_tpc_mask; | ||
578 | u32 gpc, pes; | ||
579 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
580 | |||
581 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) | ||
582 | for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) { | ||
583 | pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] << | ||
584 | num_tpc_per_gpc * gpc; | ||
585 | } | ||
586 | |||
587 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); | ||
588 | if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask && | ||
589 | fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) { | ||
590 | u32 val = g->tpc_fs_mask_user; | ||
591 | val &= (0x1U << g->gr.max_tpc_count) - 1U; | ||
592 | /* skip tpc to disable the other tpc cause channel timeout */ | ||
593 | val = (0x1U << hweight32(val)) - 1U; | ||
594 | gk20a_writel(g, gr_fe_tpc_fs_r(), val); | ||
595 | } else { | ||
596 | gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask); | ||
597 | } | ||
598 | } | ||
599 | |||
600 | void gr_gm20b_program_sm_id_numbering(struct gk20a *g, | ||
601 | u32 gpc, u32 tpc, u32 smid) | ||
602 | { | ||
603 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
604 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
605 | u32 gpc_offset = gpc_stride * gpc; | ||
606 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
607 | |||
608 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
609 | gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); | ||
610 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
611 | gr_gpc0_gpm_pd_sm_id_id_f(smid)); | ||
612 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
613 | gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); | ||
614 | } | ||
615 | |||
616 | int gr_gm20b_load_smid_config(struct gk20a *g) | ||
617 | { | ||
618 | u32 *tpc_sm_id; | ||
619 | u32 i, j; | ||
620 | u32 tpc_index, gpc_index; | ||
621 | |||
622 | tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32)); | ||
623 | if (!tpc_sm_id) | ||
624 | return -ENOMEM; | ||
625 | |||
626 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ | ||
627 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
628 | u32 reg = 0; | ||
629 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
630 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
631 | |||
632 | for (j = 0; j < 4; j++) { | ||
633 | u32 sm_id = (i * 4) + j; | ||
634 | u32 bits; | ||
635 | |||
636 | if (sm_id >= g->gr.tpc_count) | ||
637 | break; | ||
638 | |||
639 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
640 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
641 | |||
642 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | | ||
643 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
644 | reg |= bits << (j * bit_stride); | ||
645 | |||
646 | tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride; | ||
647 | } | ||
648 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); | ||
649 | } | ||
650 | |||
651 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
652 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
653 | |||
654 | nvgpu_kfree(g, tpc_sm_id); | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | int gr_gm20b_init_fs_state(struct gk20a *g) | ||
660 | { | ||
661 | int err = 0; | ||
662 | |||
663 | gk20a_dbg_fn(""); | ||
664 | |||
665 | err = gr_gk20a_init_fs_state(g); | ||
666 | if (err) | ||
667 | return err; | ||
668 | |||
669 | g->ops.gr.load_tpc_mask(g); | ||
670 | |||
671 | gk20a_writel(g, gr_bes_zrop_settings_r(), | ||
672 | gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count)); | ||
673 | gk20a_writel(g, gr_bes_crop_settings_r(), | ||
674 | gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count)); | ||
675 | |||
676 | gk20a_writel(g, gr_bes_crop_debug3_r(), | ||
677 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | ||
678 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | ||
679 | |||
680 | g->ops.gr.load_smid_config(g); | ||
681 | |||
682 | return err; | ||
683 | } | ||
684 | |||
685 | int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
686 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
687 | { | ||
688 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
689 | gr_fecs_dmactl_require_ctx_f(0)); | ||
690 | |||
691 | /* Copy falcon bootloader into dmem */ | ||
692 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); | ||
693 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); | ||
694 | |||
695 | /* start the falcon immediately if PRIV security is disabled*/ | ||
696 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
697 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
698 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
699 | } | ||
700 | |||
701 | return 0; | ||
702 | } | ||
703 | |||
704 | static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) | ||
705 | { | ||
706 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
707 | u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); | ||
708 | return (addr >= tpc_in_gpc_shared_base) && | ||
709 | (addr < (tpc_in_gpc_shared_base + | ||
710 | tpc_in_gpc_stride)); | ||
711 | } | ||
712 | |||
713 | bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) | ||
714 | { | ||
715 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
716 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
717 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
718 | return ((addr >= tpc_in_gpc_base) && | ||
719 | (addr < tpc_in_gpc_base + | ||
720 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
721 | || gr_gm20b_is_tpc_addr_shared(g, addr); | ||
722 | } | ||
723 | |||
724 | u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) | ||
725 | { | ||
726 | u32 i, start; | ||
727 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
728 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
729 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
730 | |||
731 | for (i = 0; i < num_tpcs; i++) { | ||
732 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); | ||
733 | if ((addr >= start) && | ||
734 | (addr < (start + tpc_in_gpc_stride))) | ||
735 | return i; | ||
736 | } | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | #ifdef CONFIG_TEGRA_ACR | ||
741 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) | ||
742 | { | ||
743 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | ||
744 | u64 addr_base = ucode_info->surface_desc.gpu_va; | ||
745 | |||
746 | gr_gk20a_load_falcon_bind_instblk(g); | ||
747 | |||
748 | g->ops.gr.falcon_load_ucode(g, addr_base, | ||
749 | &g->ctxsw_ucode_info.gpccs, | ||
750 | gr_gpcs_gpccs_falcon_hwcfg_r() - | ||
751 | gr_fecs_falcon_hwcfg_r()); | ||
752 | } | ||
753 | |||
754 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) | ||
755 | { | ||
756 | u32 err, flags; | ||
757 | u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - | ||
758 | gr_fecs_falcon_hwcfg_r(); | ||
759 | u8 falcon_id_mask = 0; | ||
760 | |||
761 | gk20a_dbg_fn(""); | ||
762 | |||
763 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
764 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), | ||
765 | gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
766 | gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), | ||
767 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); | ||
768 | } | ||
769 | |||
770 | flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; | ||
771 | g->pmu_lsf_loaded_falcon_id = 0; | ||
772 | if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) { | ||
773 | /* this must be recovery so bootstrap fecs and gpccs */ | ||
774 | if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
775 | gr_gm20b_load_gpccs_with_bootloader(g); | ||
776 | err = g->ops.pmu.load_lsfalcon_ucode(g, | ||
777 | (1 << LSF_FALCON_ID_FECS)); | ||
778 | } else { | ||
779 | /* bind WPR VA inst block */ | ||
780 | gr_gk20a_load_falcon_bind_instblk(g); | ||
781 | err = g->ops.pmu.load_lsfalcon_ucode(g, | ||
782 | (1 << LSF_FALCON_ID_FECS) | | ||
783 | (1 << LSF_FALCON_ID_GPCCS)); | ||
784 | } | ||
785 | if (err) { | ||
786 | nvgpu_err(g, "Unable to recover GR falcon"); | ||
787 | return err; | ||
788 | } | ||
789 | |||
790 | } else { | ||
791 | /* cold boot or rg exit */ | ||
792 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true); | ||
793 | if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
794 | gr_gm20b_load_gpccs_with_bootloader(g); | ||
795 | } else { | ||
796 | /* bind WPR VA inst block */ | ||
797 | gr_gk20a_load_falcon_bind_instblk(g); | ||
798 | if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_FECS)) | ||
799 | falcon_id_mask |= (1 << LSF_FALCON_ID_FECS); | ||
800 | if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_GPCCS)) | ||
801 | falcon_id_mask |= (1 << LSF_FALCON_ID_GPCCS); | ||
802 | |||
803 | err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask); | ||
804 | |||
805 | if (err) { | ||
806 | nvgpu_err(g, "Unable to boot GPCCS"); | ||
807 | return err; | ||
808 | } | ||
809 | } | ||
810 | } | ||
811 | |||
812 | /*start gpccs */ | ||
813 | if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { | ||
814 | gk20a_writel(g, reg_offset + | ||
815 | gr_fecs_cpuctl_alias_r(), | ||
816 | gr_gpccs_cpuctl_startcpu_f(1)); | ||
817 | } else { | ||
818 | gk20a_writel(g, gr_gpccs_dmactl_r(), | ||
819 | gr_gpccs_dmactl_require_ctx_f(0)); | ||
820 | gk20a_writel(g, gr_gpccs_cpuctl_r(), | ||
821 | gr_gpccs_cpuctl_startcpu_f(1)); | ||
822 | } | ||
823 | /* start fecs */ | ||
824 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0); | ||
825 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1); | ||
826 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff); | ||
827 | gk20a_writel(g, gr_fecs_cpuctl_alias_r(), | ||
828 | gr_fecs_cpuctl_startcpu_f(1)); | ||
829 | gk20a_dbg_fn("done"); | ||
830 | |||
831 | return 0; | ||
832 | } | ||
833 | #else | ||
834 | |||
835 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) | ||
836 | { | ||
837 | return -EPERM; | ||
838 | } | ||
839 | |||
840 | #endif | ||
841 | |||
842 | void gr_gm20b_detect_sm_arch(struct gk20a *g) | ||
843 | { | ||
844 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
845 | |||
846 | g->params.sm_arch_spa_version = | ||
847 | gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
848 | g->params.sm_arch_sm_version = | ||
849 | gr_gpc0_tpc0_sm_arch_sm_version_v(v); | ||
850 | g->params.sm_arch_warp_count = | ||
851 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
852 | } | ||
853 | |||
854 | u32 gr_gm20b_pagepool_default_size(struct gk20a *g) | ||
855 | { | ||
856 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | ||
857 | } | ||
858 | |||
859 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | ||
860 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | ||
861 | u32 class, | ||
862 | u32 flags) | ||
863 | { | ||
864 | int err; | ||
865 | |||
866 | gk20a_dbg_fn(""); | ||
867 | |||
868 | err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); | ||
869 | if (err) | ||
870 | return err; | ||
871 | |||
872 | if (class == MAXWELL_COMPUTE_B) | ||
873 | (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
874 | |||
875 | gk20a_dbg_fn("done"); | ||
876 | |||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | ||
881 | struct channel_ctx_gk20a *ch_ctx, | ||
882 | struct nvgpu_mem *mem) | ||
883 | { | ||
884 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
885 | u32 cta_preempt_option = | ||
886 | ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); | ||
887 | |||
888 | gk20a_dbg_fn(""); | ||
889 | |||
890 | if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { | ||
891 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | ||
892 | nvgpu_mem_wr(g, mem, | ||
893 | ctxsw_prog_main_image_preemption_options_o(), | ||
894 | cta_preempt_option); | ||
895 | } | ||
896 | |||
897 | gk20a_dbg_fn("done"); | ||
898 | } | ||
899 | |||
900 | int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | ||
901 | struct gk20a_debug_output *o) | ||
902 | { | ||
903 | struct gr_gk20a *gr = &g->gr; | ||
904 | u32 gr_engine_id; | ||
905 | |||
906 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
907 | |||
908 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | ||
909 | gk20a_readl(g, gr_status_r())); | ||
910 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | ||
911 | gk20a_readl(g, gr_status_1_r())); | ||
912 | gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", | ||
913 | gk20a_readl(g, gr_status_2_r())); | ||
914 | gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", | ||
915 | gk20a_readl(g, gr_engine_status_r())); | ||
916 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", | ||
917 | gk20a_readl(g, gr_gpfifo_status_r())); | ||
918 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", | ||
919 | gk20a_readl(g, gr_gpfifo_ctl_r())); | ||
920 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
921 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
922 | gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", | ||
923 | gk20a_readl(g, gr_exception_r())); | ||
924 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | ||
925 | gk20a_readl(g, gr_fecs_intr_r())); | ||
926 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | ||
927 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); | ||
928 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | ||
929 | gk20a_readl(g, gr_activity_0_r())); | ||
930 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | ||
931 | gk20a_readl(g, gr_activity_1_r())); | ||
932 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", | ||
933 | gk20a_readl(g, gr_activity_2_r())); | ||
934 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", | ||
935 | gk20a_readl(g, gr_activity_4_r())); | ||
936 | gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", | ||
937 | gk20a_readl(g, gr_pri_sked_activity_r())); | ||
938 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
939 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); | ||
940 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
941 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); | ||
942 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
943 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); | ||
944 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
945 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); | ||
946 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
947 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); | ||
948 | if (gr->gpc_tpc_count[0] == 2) | ||
949 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
950 | gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r())); | ||
951 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
952 | gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); | ||
953 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
954 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); | ||
955 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
956 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); | ||
957 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
958 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); | ||
959 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
960 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); | ||
961 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
962 | gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); | ||
963 | if (gr->gpc_tpc_count[0] == 2) | ||
964 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
965 | gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r())); | ||
966 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
967 | gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); | ||
968 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", | ||
969 | gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); | ||
970 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n", | ||
971 | gk20a_readl(g, gr_pri_be1_becs_be_activity0_r())); | ||
972 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", | ||
973 | gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); | ||
974 | gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", | ||
975 | gk20a_readl(g, gr_pri_ds_mpipe_status_r())); | ||
976 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n", | ||
977 | gk20a_readl(g, gr_pri_fe_go_idle_on_status_r())); | ||
978 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", | ||
979 | gk20a_readl(g, gr_fe_go_idle_timeout_r())); | ||
980 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n", | ||
981 | gk20a_readl(g, gr_pri_fe_go_idle_check_r())); | ||
982 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", | ||
983 | gk20a_readl(g, gr_pri_fe_go_idle_info_r())); | ||
984 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", | ||
985 | gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); | ||
986 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n", | ||
987 | gk20a_readl(g, gr_cwd_fs_r())); | ||
988 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n", | ||
989 | gk20a_readl(g, gr_fe_tpc_fs_r())); | ||
990 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n", | ||
991 | gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0))); | ||
992 | gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n", | ||
993 | gk20a_readl(g, gr_cwd_sm_id_r(0))); | ||
994 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", | ||
995 | gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); | ||
996 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", | ||
997 | gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); | ||
998 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", | ||
999 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); | ||
1000 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", | ||
1001 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); | ||
1002 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", | ||
1003 | gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); | ||
1004 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", | ||
1005 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); | ||
1006 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", | ||
1007 | gk20a_readl(g, gr_fecs_current_ctx_r())); | ||
1008 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", | ||
1009 | gk20a_readl(g, gr_fecs_new_ctx_r())); | ||
1010 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", | ||
1011 | gk20a_readl(g, gr_pri_be0_crop_status1_r())); | ||
1012 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", | ||
1013 | gk20a_readl(g, gr_pri_bes_crop_status1_r())); | ||
1014 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", | ||
1015 | gk20a_readl(g, gr_pri_be0_zrop_status_r())); | ||
1016 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", | ||
1017 | gk20a_readl(g, gr_pri_be0_zrop_status2_r())); | ||
1018 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", | ||
1019 | gk20a_readl(g, gr_pri_bes_zrop_status_r())); | ||
1020 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", | ||
1021 | gk20a_readl(g, gr_pri_bes_zrop_status2_r())); | ||
1022 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", | ||
1023 | gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); | ||
1024 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", | ||
1025 | gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); | ||
1026 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", | ||
1027 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); | ||
1028 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", | ||
1029 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); | ||
1030 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", | ||
1031 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | ||
1032 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | ||
1033 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | ||
1034 | |||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | ||
1039 | bool enable) | ||
1040 | { | ||
1041 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
1042 | struct nvgpu_mem *mem; | ||
1043 | u32 v; | ||
1044 | |||
1045 | gk20a_dbg_fn(""); | ||
1046 | |||
1047 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) | ||
1048 | return -EINVAL; | ||
1049 | |||
1050 | mem = &ch_ctx->gr_ctx->mem; | ||
1051 | |||
1052 | if (nvgpu_mem_begin(c->g, mem)) | ||
1053 | return -ENOMEM; | ||
1054 | |||
1055 | v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o()); | ||
1056 | v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); | ||
1057 | v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); | ||
1058 | nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v); | ||
1059 | |||
1060 | nvgpu_mem_end(c->g, mem); | ||
1061 | |||
1062 | gk20a_dbg_fn("done"); | ||
1063 | |||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) | ||
1068 | { | ||
1069 | u32 fbp_en_mask, opt_fbio; | ||
1070 | u32 tmp, max_fbps_count; | ||
1071 | |||
1072 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1073 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1074 | |||
1075 | opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); | ||
1076 | fbp_en_mask = | ||
1077 | ((1 << max_fbps_count) - 1) ^ | ||
1078 | fuse_status_opt_fbio_data_v(opt_fbio); | ||
1079 | return fbp_en_mask; | ||
1080 | } | ||
1081 | |||
1082 | u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) | ||
1083 | { | ||
1084 | u32 ltc_per_fbp, reg; | ||
1085 | reg = gk20a_readl(g, top_ltc_per_fbp_r()); | ||
1086 | ltc_per_fbp = top_ltc_per_fbp_value_v(reg); | ||
1087 | return ltc_per_fbp; | ||
1088 | } | ||
1089 | |||
1090 | u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) | ||
1091 | { | ||
1092 | u32 lts_per_ltc, reg; | ||
1093 | reg = gk20a_readl(g, top_slices_per_ltc_r()); | ||
1094 | lts_per_ltc = top_slices_per_ltc_value_v(reg); | ||
1095 | return lts_per_ltc; | ||
1096 | } | ||
1097 | |||
1098 | u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) | ||
1099 | { | ||
1100 | struct gr_gk20a *gr = &g->gr; | ||
1101 | u32 i, tmp, max_fbps_count, max_ltc_per_fbp; | ||
1102 | u32 rop_l2_all_en; | ||
1103 | |||
1104 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1105 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1106 | max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g); | ||
1107 | rop_l2_all_en = (1 << max_ltc_per_fbp) - 1; | ||
1108 | |||
1109 | /* mask of Rop_L2 for each FBP */ | ||
1110 | for (i = 0; i < max_fbps_count; i++) { | ||
1111 | tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i)); | ||
1112 | gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp; | ||
1113 | } | ||
1114 | |||
1115 | return gr->fbp_rop_l2_en_mask; | ||
1116 | } | ||
1117 | |||
1118 | u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) | ||
1119 | { | ||
1120 | u32 tmp, max_fbps_count; | ||
1121 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
1122 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
1123 | return max_fbps_count; | ||
1124 | } | ||
1125 | |||
1126 | void gr_gm20b_init_cyclestats(struct gk20a *g) | ||
1127 | { | ||
1128 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
1129 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, true); | ||
1130 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, true); | ||
1131 | g->gr.max_css_buffer_size = 0xffffffff; | ||
1132 | #else | ||
1133 | (void)g; | ||
1134 | #endif | ||
1135 | } | ||
1136 | |||
1137 | void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) | ||
1138 | { | ||
1139 | u32 cde_v; | ||
1140 | |||
1141 | cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o()); | ||
1142 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); | ||
1143 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); | ||
1144 | } | ||
1145 | |||
1146 | void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) | ||
1147 | { | ||
1148 | /* Check if we have at least one valid warp */ | ||
1149 | /* get paused state on maxwell */ | ||
1150 | struct gr_gk20a *gr = &g->gr; | ||
1151 | u32 gpc, tpc, sm_id; | ||
1152 | u32 tpc_offset, gpc_offset, reg_offset; | ||
1153 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
1154 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1155 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1156 | |||
1157 | /* for maxwell & kepler */ | ||
1158 | u32 numSmPerTpc = 1; | ||
1159 | u32 numWarpPerTpc = g->params.sm_arch_warp_count * numSmPerTpc; | ||
1160 | |||
1161 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1162 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1163 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1164 | |||
1165 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
1166 | gpc_offset = gpc_stride * gpc; | ||
1167 | reg_offset = tpc_offset + gpc_offset; | ||
1168 | |||
1169 | /* 64 bit read */ | ||
1170 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
1171 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
1172 | |||
1173 | /* 64 bit read */ | ||
1174 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
1175 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
1176 | |||
1177 | /* 64 bit read */ | ||
1178 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
1179 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
1180 | |||
1181 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
1182 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
1183 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
1184 | |||
1185 | |||
1186 | if (numWarpPerTpc > 64) { | ||
1187 | /* 64 bit read */ | ||
1188 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32; | ||
1189 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset); | ||
1190 | |||
1191 | /* 64 bit read */ | ||
1192 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32; | ||
1193 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset); | ||
1194 | |||
1195 | /* 64 bit read */ | ||
1196 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32; | ||
1197 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset); | ||
1198 | |||
1199 | w_state[sm_id].valid_warps[1] = warps_valid; | ||
1200 | w_state[sm_id].trapped_warps[1] = warps_trapped; | ||
1201 | w_state[sm_id].paused_warps[1] = warps_paused; | ||
1202 | } | ||
1203 | } | ||
1204 | |||
1205 | |||
1206 | /* Only for debug purpose */ | ||
1207 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1208 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
1209 | sm_id, w_state[sm_id].valid_warps[0]); | ||
1210 | gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n", | ||
1211 | sm_id, w_state[sm_id].valid_warps[1]); | ||
1212 | |||
1213 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
1214 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
1215 | gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n", | ||
1216 | sm_id, w_state[sm_id].trapped_warps[1]); | ||
1217 | |||
1218 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
1219 | sm_id, w_state[sm_id].paused_warps[0]); | ||
1220 | gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n", | ||
1221 | sm_id, w_state[sm_id].paused_warps[1]); | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | void gr_gm20b_get_access_map(struct gk20a *g, | ||
1226 | u32 **whitelist, int *num_entries) | ||
1227 | { | ||
1228 | static u32 wl_addr_gm20b[] = { | ||
1229 | /* this list must be sorted (low to high) */ | ||
1230 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
1231 | 0x418300, /* gr_pri_gpcs_rasterarb_line_class */ | ||
1232 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
1233 | 0x418e00, /* gr_pri_gpcs_swdx_config */ | ||
1234 | 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1235 | 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1236 | 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1237 | 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1238 | 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */ | ||
1239 | 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1240 | 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1241 | 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1242 | 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1243 | 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1244 | 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1245 | 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1246 | 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1247 | 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1248 | 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1249 | 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1250 | 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1251 | 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1252 | 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1253 | 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1254 | 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */ | ||
1255 | 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */ | ||
1256 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
1257 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
1258 | 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ | ||
1259 | 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
1260 | }; | ||
1261 | |||
1262 | *whitelist = wl_addr_gm20b; | ||
1263 | *num_entries = ARRAY_SIZE(wl_addr_gm20b); | ||
1264 | } | ||
1265 | |||
1266 | int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
1267 | { | ||
1268 | int sm_id; | ||
1269 | struct gr_gk20a *gr = &g->gr; | ||
1270 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1271 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1272 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1273 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1274 | |||
1275 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1276 | |||
1277 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
1278 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
1279 | |||
1280 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
1281 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1282 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
1283 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
1284 | gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g, | ||
1285 | gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset); | ||
1286 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
1287 | gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset); | ||
1288 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
1289 | gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset); | ||
1290 | |||
1291 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1292 | |||
1293 | return 0; | ||
1294 | } | ||
1295 | |||
1296 | int gm20b_gr_update_sm_error_state(struct gk20a *g, | ||
1297 | struct channel_gk20a *ch, u32 sm_id, | ||
1298 | struct nvgpu_gr_sm_error_state *sm_error_state) | ||
1299 | { | ||
1300 | u32 gpc, tpc, offset; | ||
1301 | struct gr_gk20a *gr = &g->gr; | ||
1302 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
1303 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1304 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1305 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1306 | int err = 0; | ||
1307 | |||
1308 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1309 | |||
1310 | gr->sm_error_states[sm_id].hww_global_esr = | ||
1311 | sm_error_state->hww_global_esr; | ||
1312 | gr->sm_error_states[sm_id].hww_warp_esr = | ||
1313 | sm_error_state->hww_warp_esr; | ||
1314 | gr->sm_error_states[sm_id].hww_warp_esr_pc = | ||
1315 | sm_error_state->hww_warp_esr_pc; | ||
1316 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = | ||
1317 | sm_error_state->hww_global_esr_report_mask; | ||
1318 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = | ||
1319 | sm_error_state->hww_warp_esr_report_mask; | ||
1320 | |||
1321 | err = gr_gk20a_disable_ctxsw(g); | ||
1322 | if (err) { | ||
1323 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1324 | goto fail; | ||
1325 | } | ||
1326 | |||
1327 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1328 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1329 | |||
1330 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1331 | |||
1332 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
1333 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1334 | gr->sm_error_states[sm_id].hww_global_esr); | ||
1335 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
1336 | gr->sm_error_states[sm_id].hww_warp_esr); | ||
1337 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset, | ||
1338 | gr->sm_error_states[sm_id].hww_warp_esr_pc); | ||
1339 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
1340 | gr->sm_error_states[sm_id].hww_global_esr_report_mask); | ||
1341 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
1342 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask); | ||
1343 | } else { | ||
1344 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | ||
1345 | if (err) | ||
1346 | goto enable_ctxsw; | ||
1347 | |||
1348 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
1349 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
1350 | gr->sm_error_states[sm_id].hww_global_esr_report_mask, | ||
1351 | true); | ||
1352 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
1353 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
1354 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask, | ||
1355 | true); | ||
1356 | |||
1357 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); | ||
1358 | } | ||
1359 | |||
1360 | enable_ctxsw: | ||
1361 | err = gr_gk20a_enable_ctxsw(g); | ||
1362 | |||
1363 | fail: | ||
1364 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1365 | return err; | ||
1366 | } | ||
1367 | |||
1368 | int gm20b_gr_clear_sm_error_state(struct gk20a *g, | ||
1369 | struct channel_gk20a *ch, u32 sm_id) | ||
1370 | { | ||
1371 | u32 gpc, tpc, offset; | ||
1372 | u32 val; | ||
1373 | struct gr_gk20a *gr = &g->gr; | ||
1374 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1375 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
1376 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1377 | int err = 0; | ||
1378 | |||
1379 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1380 | |||
1381 | memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states)); | ||
1382 | |||
1383 | err = gr_gk20a_disable_ctxsw(g); | ||
1384 | if (err) { | ||
1385 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1386 | goto fail; | ||
1387 | } | ||
1388 | |||
1389 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
1390 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1391 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1392 | |||
1393 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1394 | |||
1395 | val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1396 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1397 | val); | ||
1398 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
1399 | 0); | ||
1400 | } | ||
1401 | |||
1402 | err = gr_gk20a_enable_ctxsw(g); | ||
1403 | |||
1404 | fail: | ||
1405 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1406 | return err; | ||
1407 | } | ||
1408 | |||
1409 | int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | ||
1410 | struct nvgpu_preemption_modes_rec *preemption_modes_rec) | ||
1411 | { | ||
1412 | preemption_modes_rec->graphics_preemption_mode_flags = | ||
1413 | NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
1414 | preemption_modes_rec->compute_preemption_mode_flags = ( | ||
1415 | NVGPU_PREEMPTION_MODE_COMPUTE_WFI | | ||
1416 | NVGPU_PREEMPTION_MODE_COMPUTE_CTA); | ||
1417 | |||
1418 | preemption_modes_rec->default_graphics_preempt_mode = | ||
1419 | NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
1420 | preemption_modes_rec->default_compute_preempt_mode = | ||
1421 | NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
1422 | |||
1423 | return 0; | ||
1424 | } | ||
1425 | |||
1426 | bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) | ||
1427 | { | ||
1428 | u32 ltc_shared_base = ltc_ltcs_ltss_v(); | ||
1429 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1430 | |||
1431 | return (addr >= ltc_shared_base) && | ||
1432 | (addr < (ltc_shared_base + lts_stride)); | ||
1433 | } | ||
1434 | |||
1435 | bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) | ||
1436 | { | ||
1437 | u32 lts_shared_base = ltc_ltc0_ltss_v(); | ||
1438 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1439 | u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; | ||
1440 | u32 base_offset = lts_shared_base & addr_mask; | ||
1441 | u32 end_offset = base_offset + lts_stride; | ||
1442 | |||
1443 | return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && | ||
1444 | ((addr & addr_mask) >= base_offset) && | ||
1445 | ((addr & addr_mask) < end_offset); | ||
1446 | } | ||
1447 | |||
1448 | static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, | ||
1449 | u32 *priv_addr_table, | ||
1450 | u32 *priv_addr_table_index) | ||
1451 | { | ||
1452 | u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); | ||
1453 | u32 index = *priv_addr_table_index; | ||
1454 | u32 lts_num; | ||
1455 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1456 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1457 | |||
1458 | for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) | ||
1459 | priv_addr_table[index++] = ltc_ltc0_lts0_v() + | ||
1460 | ltc_num * ltc_stride + | ||
1461 | lts_num * lts_stride + | ||
1462 | (addr & (lts_stride - 1)); | ||
1463 | |||
1464 | *priv_addr_table_index = index; | ||
1465 | } | ||
1466 | |||
1467 | void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
1468 | u32 *priv_addr_table, | ||
1469 | u32 *priv_addr_table_index) | ||
1470 | { | ||
1471 | u32 num_ltc = g->ltc_count; | ||
1472 | u32 i, start, ltc_num = 0; | ||
1473 | u32 pltcg_base = ltc_pltcg_base_v(); | ||
1474 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1475 | |||
1476 | for (i = 0; i < num_ltc; i++) { | ||
1477 | start = pltcg_base + i * ltc_stride; | ||
1478 | if ((addr >= start) && (addr < (start + ltc_stride))) { | ||
1479 | ltc_num = i; | ||
1480 | break; | ||
1481 | } | ||
1482 | } | ||
1483 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, | ||
1484 | priv_addr_table_index); | ||
1485 | } | ||
1486 | |||
1487 | void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
1488 | u32 *priv_addr_table, | ||
1489 | u32 *priv_addr_table_index) | ||
1490 | { | ||
1491 | u32 num_ltc = g->ltc_count; | ||
1492 | u32 ltc_num; | ||
1493 | |||
1494 | for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) | ||
1495 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, | ||
1496 | priv_addr_table, priv_addr_table_index); | ||
1497 | } | ||
1498 | |||
1499 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
1500 | u32 global_esr) | ||
1501 | { | ||
1502 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); | ||
1503 | |||
1504 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1505 | global_esr); | ||
1506 | |||
1507 | /* clear the warp hww */ | ||
1508 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0); | ||
1509 | } | ||
1510 | |||
1511 | /* | ||
1512 | * Disable both surface and LG coalesce. | ||
1513 | */ | ||
1514 | void gm20a_gr_disable_rd_coalesce(struct gk20a *g) | ||
1515 | { | ||
1516 | u32 dbg2_reg; | ||
1517 | |||
1518 | dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r()); | ||
1519 | dbg2_reg = set_field(dbg2_reg, | ||
1520 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(), | ||
1521 | gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0)); | ||
1522 | dbg2_reg = set_field(dbg2_reg, | ||
1523 | gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(), | ||
1524 | gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0)); | ||
1525 | |||
1526 | gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); | ||
1527 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h new file mode 100644 index 00000000..18e6b032 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * GM20B GPC MMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_GR_MMU_H | ||
26 | #define _NVHOST_GM20B_GR_MMU_H | ||
27 | |||
28 | struct gk20a; | ||
29 | struct nvgpu_warpstate; | ||
30 | |||
31 | enum { | ||
32 | MAXWELL_B = 0xB197, | ||
33 | MAXWELL_COMPUTE_B = 0xB1C0, | ||
34 | KEPLER_INLINE_TO_MEMORY_B= 0xA140, | ||
35 | MAXWELL_DMA_COPY_A = 0xB0B5, | ||
36 | MAXWELL_CHANNEL_GPFIFO_A= 0xB06F, | ||
37 | }; | ||
38 | |||
39 | #define NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc | ||
40 | #define NVB197_SET_CIRCULAR_BUFFER_SIZE 0x1280 | ||
41 | #define NVB197_SET_SHADER_EXCEPTIONS 0x1528 | ||
42 | #define NVB197_SET_RD_COALESCE 0x102c | ||
43 | #define NVB1C0_SET_SHADER_EXCEPTIONS 0x1528 | ||
44 | #define NVB1C0_SET_RD_COALESCE 0x0228 | ||
45 | |||
46 | #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 | ||
47 | |||
48 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | ||
49 | struct channel_ctx_gk20a *ch_ctx, | ||
50 | u64 addr, bool patch); | ||
51 | int gr_gm20b_init_fs_state(struct gk20a *g); | ||
52 | int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); | ||
53 | void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data); | ||
54 | void gm20a_gr_disable_rd_coalesce(struct gk20a *g); | ||
55 | void gr_gm20b_init_gpc_mmu(struct gk20a *g); | ||
56 | void gr_gm20b_bundle_cb_defaults(struct gk20a *g); | ||
57 | void gr_gm20b_cb_size_default(struct gk20a *g); | ||
58 | int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); | ||
59 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | ||
60 | struct channel_ctx_gk20a *ch_ctx, | ||
61 | u64 addr, u64 size, bool patch); | ||
62 | int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | ||
63 | struct channel_gk20a *c, bool patch); | ||
64 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, | ||
65 | struct channel_ctx_gk20a *ch_ctx, | ||
66 | u64 addr, u32 size, bool patch); | ||
67 | int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, | ||
68 | u32 class_num, u32 offset, u32 data); | ||
69 | void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); | ||
70 | void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data); | ||
71 | void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g); | ||
72 | bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num); | ||
73 | bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num); | ||
74 | bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num); | ||
75 | void gr_gm20b_init_sm_dsm_reg_info(void); | ||
76 | void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | ||
77 | u32 *num_sm_dsm_perf_regs, | ||
78 | u32 **sm_dsm_perf_regs, | ||
79 | u32 *perf_register_stride); | ||
80 | void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
81 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
82 | u32 **sm_dsm_perf_ctrl_regs, | ||
83 | u32 *ctrl_register_stride); | ||
84 | u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); | ||
85 | void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); | ||
86 | void gr_gm20b_load_tpc_mask(struct gk20a *g); | ||
87 | void gr_gm20b_program_sm_id_numbering(struct gk20a *g, | ||
88 | u32 gpc, u32 tpc, u32 smid); | ||
89 | int gr_gm20b_load_smid_config(struct gk20a *g); | ||
90 | int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
91 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); | ||
92 | bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr); | ||
93 | u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr); | ||
94 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); | ||
95 | int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); | ||
96 | void gr_gm20b_detect_sm_arch(struct gk20a *g); | ||
97 | u32 gr_gm20b_pagepool_default_size(struct gk20a *g); | ||
98 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | ||
99 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | ||
100 | u32 class, | ||
101 | u32 flags); | ||
102 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | ||
103 | struct channel_ctx_gk20a *ch_ctx, | ||
104 | struct nvgpu_mem *mem); | ||
105 | int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | ||
106 | struct gk20a_debug_output *o); | ||
107 | int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | ||
108 | bool enable); | ||
109 | u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g); | ||
110 | u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g); | ||
111 | u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g); | ||
112 | u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g); | ||
113 | u32 gr_gm20b_get_max_fbps_count(struct gk20a *g); | ||
114 | void gr_gm20b_init_cyclestats(struct gk20a *g); | ||
115 | void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem); | ||
116 | void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state); | ||
117 | void gr_gm20b_get_access_map(struct gk20a *g, | ||
118 | u32 **whitelist, int *num_entries); | ||
119 | int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc); | ||
120 | int gm20b_gr_update_sm_error_state(struct gk20a *g, | ||
121 | struct channel_gk20a *ch, u32 sm_id, | ||
122 | struct nvgpu_gr_sm_error_state *sm_error_state); | ||
123 | int gm20b_gr_clear_sm_error_state(struct gk20a *g, | ||
124 | struct channel_gk20a *ch, u32 sm_id); | ||
125 | int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | ||
126 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | ||
127 | bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr); | ||
128 | bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr); | ||
129 | void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
130 | u32 *priv_addr_table, | ||
131 | u32 *priv_addr_table_index); | ||
132 | void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
133 | u32 *priv_addr_table, | ||
134 | u32 *priv_addr_table_index); | ||
135 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
136 | u32 global_esr); | ||
137 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c new file mode 100644 index 00000000..227b6b6c --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -0,0 +1,708 @@ | |||
1 | /* | ||
2 | * GM20B Graphics | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/ce2_gk20a.h" | ||
27 | #include "gk20a/dbg_gpu_gk20a.h" | ||
28 | #include "gk20a/fb_gk20a.h" | ||
29 | #include "gk20a/fifo_gk20a.h" | ||
30 | #include "gk20a/therm_gk20a.h" | ||
31 | #include "gk20a/mm_gk20a.h" | ||
32 | #include "gk20a/css_gr_gk20a.h" | ||
33 | #include "gk20a/mc_gk20a.h" | ||
34 | #include "gk20a/bus_gk20a.h" | ||
35 | #include "gk20a/flcn_gk20a.h" | ||
36 | #include "gk20a/priv_ring_gk20a.h" | ||
37 | #include "gk20a/regops_gk20a.h" | ||
38 | #include "gk20a/pmu_gk20a.h" | ||
39 | #include "gk20a/gr_gk20a.h" | ||
40 | #include "gk20a/tsg_gk20a.h" | ||
41 | |||
42 | #include "ltc_gm20b.h" | ||
43 | #include "gr_gm20b.h" | ||
44 | #include "ltc_gm20b.h" | ||
45 | #include "fb_gm20b.h" | ||
46 | #include "gm20b_gating_reglist.h" | ||
47 | #include "fifo_gm20b.h" | ||
48 | #include "gr_ctx_gm20b.h" | ||
49 | #include "mm_gm20b.h" | ||
50 | #include "pmu_gm20b.h" | ||
51 | #include "clk_gm20b.h" | ||
52 | #include "regops_gm20b.h" | ||
53 | #include "therm_gm20b.h" | ||
54 | #include "bus_gm20b.h" | ||
55 | #include "hal_gm20b.h" | ||
56 | #include "acr_gm20b.h" | ||
57 | |||
58 | #include <nvgpu/debug.h> | ||
59 | #include <nvgpu/bug.h> | ||
60 | #include <nvgpu/enabled.h> | ||
61 | #include <nvgpu/bus.h> | ||
62 | |||
63 | #include <nvgpu/hw/gm20b/hw_proj_gm20b.h> | ||
64 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | ||
65 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> | ||
66 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | ||
67 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
68 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | ||
69 | #include <nvgpu/hw/gm20b/hw_pwr_gm20b.h> | ||
70 | |||
71 | #define PRIV_SECURITY_DISABLE 0x01 | ||
72 | |||
73 | int gm20b_get_litter_value(struct gk20a *g, int value) | ||
74 | { | ||
75 | int ret = EINVAL; | ||
76 | switch (value) { | ||
77 | case GPU_LIT_NUM_GPCS: | ||
78 | ret = proj_scal_litter_num_gpcs_v(); | ||
79 | break; | ||
80 | case GPU_LIT_NUM_PES_PER_GPC: | ||
81 | ret = proj_scal_litter_num_pes_per_gpc_v(); | ||
82 | break; | ||
83 | case GPU_LIT_NUM_ZCULL_BANKS: | ||
84 | ret = proj_scal_litter_num_zcull_banks_v(); | ||
85 | break; | ||
86 | case GPU_LIT_NUM_TPC_PER_GPC: | ||
87 | ret = proj_scal_litter_num_tpc_per_gpc_v(); | ||
88 | break; | ||
89 | case GPU_LIT_NUM_SM_PER_TPC: | ||
90 | ret = proj_scal_litter_num_sm_per_tpc_v(); | ||
91 | break; | ||
92 | case GPU_LIT_NUM_FBPS: | ||
93 | ret = proj_scal_litter_num_fbps_v(); | ||
94 | break; | ||
95 | case GPU_LIT_GPC_BASE: | ||
96 | ret = proj_gpc_base_v(); | ||
97 | break; | ||
98 | case GPU_LIT_GPC_STRIDE: | ||
99 | ret = proj_gpc_stride_v(); | ||
100 | break; | ||
101 | case GPU_LIT_GPC_SHARED_BASE: | ||
102 | ret = proj_gpc_shared_base_v(); | ||
103 | break; | ||
104 | case GPU_LIT_TPC_IN_GPC_BASE: | ||
105 | ret = proj_tpc_in_gpc_base_v(); | ||
106 | break; | ||
107 | case GPU_LIT_TPC_IN_GPC_STRIDE: | ||
108 | ret = proj_tpc_in_gpc_stride_v(); | ||
109 | break; | ||
110 | case GPU_LIT_TPC_IN_GPC_SHARED_BASE: | ||
111 | ret = proj_tpc_in_gpc_shared_base_v(); | ||
112 | break; | ||
113 | case GPU_LIT_PPC_IN_GPC_BASE: | ||
114 | ret = proj_ppc_in_gpc_base_v(); | ||
115 | break; | ||
116 | case GPU_LIT_PPC_IN_GPC_STRIDE: | ||
117 | ret = proj_ppc_in_gpc_stride_v(); | ||
118 | break; | ||
119 | case GPU_LIT_PPC_IN_GPC_SHARED_BASE: | ||
120 | ret = proj_ppc_in_gpc_shared_base_v(); | ||
121 | break; | ||
122 | case GPU_LIT_ROP_BASE: | ||
123 | ret = proj_rop_base_v(); | ||
124 | break; | ||
125 | case GPU_LIT_ROP_STRIDE: | ||
126 | ret = proj_rop_stride_v(); | ||
127 | break; | ||
128 | case GPU_LIT_ROP_SHARED_BASE: | ||
129 | ret = proj_rop_shared_base_v(); | ||
130 | break; | ||
131 | case GPU_LIT_HOST_NUM_ENGINES: | ||
132 | ret = proj_host_num_engines_v(); | ||
133 | break; | ||
134 | case GPU_LIT_HOST_NUM_PBDMA: | ||
135 | ret = proj_host_num_pbdma_v(); | ||
136 | break; | ||
137 | case GPU_LIT_LTC_STRIDE: | ||
138 | ret = proj_ltc_stride_v(); | ||
139 | break; | ||
140 | case GPU_LIT_LTS_STRIDE: | ||
141 | ret = proj_lts_stride_v(); | ||
142 | break; | ||
143 | /* Even though GM20B doesn't have an FBPA unit, the HW reports one, | ||
144 | * and the microcode as a result leaves space in the context buffer | ||
145 | * for one, so make sure SW accounts for this also. | ||
146 | */ | ||
147 | case GPU_LIT_NUM_FBPAS: | ||
148 | ret = proj_scal_litter_num_fbpas_v(); | ||
149 | break; | ||
150 | /* Hardcode FBPA values other than NUM_FBPAS to 0. */ | ||
151 | case GPU_LIT_FBPA_STRIDE: | ||
152 | case GPU_LIT_FBPA_BASE: | ||
153 | case GPU_LIT_FBPA_SHARED_BASE: | ||
154 | ret = 0; | ||
155 | break; | ||
156 | case GPU_LIT_TWOD_CLASS: | ||
157 | ret = FERMI_TWOD_A; | ||
158 | break; | ||
159 | case GPU_LIT_THREED_CLASS: | ||
160 | ret = MAXWELL_B; | ||
161 | break; | ||
162 | case GPU_LIT_COMPUTE_CLASS: | ||
163 | ret = MAXWELL_COMPUTE_B; | ||
164 | break; | ||
165 | case GPU_LIT_GPFIFO_CLASS: | ||
166 | ret = MAXWELL_CHANNEL_GPFIFO_A; | ||
167 | break; | ||
168 | case GPU_LIT_I2M_CLASS: | ||
169 | ret = KEPLER_INLINE_TO_MEMORY_B; | ||
170 | break; | ||
171 | case GPU_LIT_DMA_COPY_CLASS: | ||
172 | ret = MAXWELL_DMA_COPY_A; | ||
173 | break; | ||
174 | default: | ||
175 | nvgpu_err(g, "Missing definition %d", value); | ||
176 | BUG(); | ||
177 | break; | ||
178 | } | ||
179 | |||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | static const struct gpu_ops gm20b_ops = { | ||
184 | .ltc = { | ||
185 | .determine_L2_size_bytes = gm20b_determine_L2_size_bytes, | ||
186 | .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, | ||
187 | .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, | ||
188 | .init_cbc = gm20b_ltc_init_cbc, | ||
189 | .init_fs_state = gm20b_ltc_init_fs_state, | ||
190 | .init_comptags = gm20b_ltc_init_comptags, | ||
191 | .cbc_ctrl = gm20b_ltc_cbc_ctrl, | ||
192 | .isr = gm20b_ltc_isr, | ||
193 | .cbc_fix_config = gm20b_ltc_cbc_fix_config, | ||
194 | .flush = gm20b_flush_ltc, | ||
195 | .set_enabled = gm20b_ltc_set_enabled, | ||
196 | }, | ||
197 | .ce2 = { | ||
198 | .isr_stall = gk20a_ce2_isr, | ||
199 | .isr_nonstall = gk20a_ce2_nonstall_isr, | ||
200 | }, | ||
201 | .gr = { | ||
202 | .get_patch_slots = gr_gk20a_get_patch_slots, | ||
203 | .init_gpc_mmu = gr_gm20b_init_gpc_mmu, | ||
204 | .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults, | ||
205 | .cb_size_default = gr_gm20b_cb_size_default, | ||
206 | .calc_global_ctx_buffer_size = | ||
207 | gr_gm20b_calc_global_ctx_buffer_size, | ||
208 | .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb, | ||
209 | .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb, | ||
210 | .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager, | ||
211 | .commit_global_pagepool = gr_gm20b_commit_global_pagepool, | ||
212 | .handle_sw_method = gr_gm20b_handle_sw_method, | ||
213 | .set_alpha_circular_buffer_size = | ||
214 | gr_gm20b_set_alpha_circular_buffer_size, | ||
215 | .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size, | ||
216 | .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions, | ||
217 | .is_valid_class = gr_gm20b_is_valid_class, | ||
218 | .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class, | ||
219 | .is_valid_compute_class = gr_gm20b_is_valid_compute_class, | ||
220 | .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, | ||
221 | .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, | ||
222 | .init_fs_state = gr_gm20b_init_fs_state, | ||
223 | .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, | ||
224 | .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, | ||
225 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | ||
226 | .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, | ||
227 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | ||
228 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
229 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | ||
230 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | ||
231 | .get_zcull_info = gr_gk20a_get_zcull_info, | ||
232 | .is_tpc_addr = gr_gm20b_is_tpc_addr, | ||
233 | .get_tpc_num = gr_gm20b_get_tpc_num, | ||
234 | .detect_sm_arch = gr_gm20b_detect_sm_arch, | ||
235 | .add_zbc_color = gr_gk20a_add_zbc_color, | ||
236 | .add_zbc_depth = gr_gk20a_add_zbc_depth, | ||
237 | .zbc_set_table = gk20a_gr_zbc_set_table, | ||
238 | .zbc_query_table = gr_gk20a_query_zbc, | ||
239 | .pmu_save_zbc = gk20a_pmu_save_zbc, | ||
240 | .add_zbc = gr_gk20a_add_zbc, | ||
241 | .pagepool_default_size = gr_gm20b_pagepool_default_size, | ||
242 | .init_ctx_state = gr_gk20a_init_ctx_state, | ||
243 | .alloc_gr_ctx = gr_gm20b_alloc_gr_ctx, | ||
244 | .free_gr_ctx = gr_gk20a_free_gr_ctx, | ||
245 | .update_ctxsw_preemption_mode = | ||
246 | gr_gm20b_update_ctxsw_preemption_mode, | ||
247 | .dump_gr_regs = gr_gm20b_dump_gr_status_regs, | ||
248 | .update_pc_sampling = gr_gm20b_update_pc_sampling, | ||
249 | .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, | ||
250 | .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp, | ||
251 | .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc, | ||
252 | .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, | ||
253 | .get_max_fbps_count = gr_gm20b_get_max_fbps_count, | ||
254 | .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, | ||
255 | .wait_empty = gr_gk20a_wait_idle, | ||
256 | .init_cyclestats = gr_gm20b_init_cyclestats, | ||
257 | .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, | ||
258 | .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, | ||
259 | .bpt_reg_info = gr_gm20b_bpt_reg_info, | ||
260 | .get_access_map = gr_gm20b_get_access_map, | ||
261 | .handle_fecs_error = gk20a_gr_handle_fecs_error, | ||
262 | .handle_sm_exception = gr_gk20a_handle_sm_exception, | ||
263 | .handle_tex_exception = gr_gk20a_handle_tex_exception, | ||
264 | .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions, | ||
265 | .enable_exceptions = gk20a_gr_enable_exceptions, | ||
266 | .get_lrf_tex_ltc_dram_override = NULL, | ||
267 | .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, | ||
268 | .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, | ||
269 | .record_sm_error_state = gm20b_gr_record_sm_error_state, | ||
270 | .update_sm_error_state = gm20b_gr_update_sm_error_state, | ||
271 | .clear_sm_error_state = gm20b_gr_clear_sm_error_state, | ||
272 | .suspend_contexts = gr_gk20a_suspend_contexts, | ||
273 | .resume_contexts = gr_gk20a_resume_contexts, | ||
274 | .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags, | ||
275 | .init_sm_id_table = gr_gk20a_init_sm_id_table, | ||
276 | .load_smid_config = gr_gm20b_load_smid_config, | ||
277 | .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, | ||
278 | .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, | ||
279 | .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, | ||
280 | .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, | ||
281 | .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, | ||
282 | .setup_rop_mapping = gr_gk20a_setup_rop_mapping, | ||
283 | .program_zcull_mapping = gr_gk20a_program_zcull_mapping, | ||
284 | .commit_global_timeslice = gr_gk20a_commit_global_timeslice, | ||
285 | .commit_inst = gr_gk20a_commit_inst, | ||
286 | .write_zcull_ptr = gr_gk20a_write_zcull_ptr, | ||
287 | .write_pm_ptr = gr_gk20a_write_pm_ptr, | ||
288 | .init_elcg_mode = gr_gk20a_init_elcg_mode, | ||
289 | .load_tpc_mask = gr_gm20b_load_tpc_mask, | ||
290 | .inval_icache = gr_gk20a_inval_icache, | ||
291 | .trigger_suspend = gr_gk20a_trigger_suspend, | ||
292 | .wait_for_pause = gr_gk20a_wait_for_pause, | ||
293 | .resume_from_pause = gr_gk20a_resume_from_pause, | ||
294 | .clear_sm_errors = gr_gk20a_clear_sm_errors, | ||
295 | .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions, | ||
296 | .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel, | ||
297 | .sm_debugger_attached = gk20a_gr_sm_debugger_attached, | ||
298 | .suspend_single_sm = gk20a_gr_suspend_single_sm, | ||
299 | .suspend_all_sms = gk20a_gr_suspend_all_sms, | ||
300 | .resume_single_sm = gk20a_gr_resume_single_sm, | ||
301 | .resume_all_sms = gk20a_gr_resume_all_sms, | ||
302 | .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr, | ||
303 | .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr, | ||
304 | .get_sm_no_lock_down_hww_global_esr_mask = | ||
305 | gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask, | ||
306 | .lock_down_sm = gk20a_gr_lock_down_sm, | ||
307 | .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down, | ||
308 | .clear_sm_hww = gm20b_gr_clear_sm_hww, | ||
309 | .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, | ||
310 | .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, | ||
311 | .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, | ||
312 | .init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data, | ||
313 | }, | ||
314 | .fb = { | ||
315 | .reset = fb_gk20a_reset, | ||
316 | .init_hw = gk20a_fb_init_hw, | ||
317 | .init_fs_state = fb_gm20b_init_fs_state, | ||
318 | .set_mmu_page_size = gm20b_fb_set_mmu_page_size, | ||
319 | .set_use_full_comp_tag_line = | ||
320 | gm20b_fb_set_use_full_comp_tag_line, | ||
321 | .compression_page_size = gm20b_fb_compression_page_size, | ||
322 | .compressible_page_size = gm20b_fb_compressible_page_size, | ||
323 | .vpr_info_fetch = gm20b_fb_vpr_info_fetch, | ||
324 | .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info, | ||
325 | .read_wpr_info = gm20b_fb_read_wpr_info, | ||
326 | .is_debug_mode_enabled = gm20b_fb_debug_mode_enabled, | ||
327 | .set_debug_mode = gm20b_fb_set_debug_mode, | ||
328 | .tlb_invalidate = gk20a_fb_tlb_invalidate, | ||
329 | .mem_unlock = NULL, | ||
330 | }, | ||
331 | .clock_gating = { | ||
332 | .slcg_bus_load_gating_prod = | ||
333 | gm20b_slcg_bus_load_gating_prod, | ||
334 | .slcg_ce2_load_gating_prod = | ||
335 | gm20b_slcg_ce2_load_gating_prod, | ||
336 | .slcg_chiplet_load_gating_prod = | ||
337 | gm20b_slcg_chiplet_load_gating_prod, | ||
338 | .slcg_ctxsw_firmware_load_gating_prod = | ||
339 | gm20b_slcg_ctxsw_firmware_load_gating_prod, | ||
340 | .slcg_fb_load_gating_prod = | ||
341 | gm20b_slcg_fb_load_gating_prod, | ||
342 | .slcg_fifo_load_gating_prod = | ||
343 | gm20b_slcg_fifo_load_gating_prod, | ||
344 | .slcg_gr_load_gating_prod = | ||
345 | gr_gm20b_slcg_gr_load_gating_prod, | ||
346 | .slcg_ltc_load_gating_prod = | ||
347 | ltc_gm20b_slcg_ltc_load_gating_prod, | ||
348 | .slcg_perf_load_gating_prod = | ||
349 | gm20b_slcg_perf_load_gating_prod, | ||
350 | .slcg_priring_load_gating_prod = | ||
351 | gm20b_slcg_priring_load_gating_prod, | ||
352 | .slcg_pmu_load_gating_prod = | ||
353 | gm20b_slcg_pmu_load_gating_prod, | ||
354 | .slcg_therm_load_gating_prod = | ||
355 | gm20b_slcg_therm_load_gating_prod, | ||
356 | .slcg_xbar_load_gating_prod = | ||
357 | gm20b_slcg_xbar_load_gating_prod, | ||
358 | .blcg_bus_load_gating_prod = | ||
359 | gm20b_blcg_bus_load_gating_prod, | ||
360 | .blcg_ctxsw_firmware_load_gating_prod = | ||
361 | gm20b_blcg_ctxsw_firmware_load_gating_prod, | ||
362 | .blcg_fb_load_gating_prod = | ||
363 | gm20b_blcg_fb_load_gating_prod, | ||
364 | .blcg_fifo_load_gating_prod = | ||
365 | gm20b_blcg_fifo_load_gating_prod, | ||
366 | .blcg_gr_load_gating_prod = | ||
367 | gm20b_blcg_gr_load_gating_prod, | ||
368 | .blcg_ltc_load_gating_prod = | ||
369 | gm20b_blcg_ltc_load_gating_prod, | ||
370 | .blcg_pwr_csb_load_gating_prod = | ||
371 | gm20b_blcg_pwr_csb_load_gating_prod, | ||
372 | .blcg_xbar_load_gating_prod = | ||
373 | gm20b_blcg_xbar_load_gating_prod, | ||
374 | .blcg_pmu_load_gating_prod = | ||
375 | gm20b_blcg_pmu_load_gating_prod, | ||
376 | .pg_gr_load_gating_prod = | ||
377 | gr_gm20b_pg_gr_load_gating_prod, | ||
378 | }, | ||
379 | .fifo = { | ||
380 | .init_fifo_setup_hw = gk20a_init_fifo_setup_hw, | ||
381 | .bind_channel = channel_gm20b_bind, | ||
382 | .unbind_channel = gk20a_fifo_channel_unbind, | ||
383 | .disable_channel = gk20a_fifo_disable_channel, | ||
384 | .enable_channel = gk20a_fifo_enable_channel, | ||
385 | .alloc_inst = gk20a_fifo_alloc_inst, | ||
386 | .free_inst = gk20a_fifo_free_inst, | ||
387 | .setup_ramfc = gk20a_fifo_setup_ramfc, | ||
388 | .channel_set_timeslice = gk20a_fifo_set_timeslice, | ||
389 | .default_timeslice_us = gk20a_fifo_default_timeslice_us, | ||
390 | .setup_userd = gk20a_fifo_setup_userd, | ||
391 | .userd_gp_get = gk20a_fifo_userd_gp_get, | ||
392 | .userd_gp_put = gk20a_fifo_userd_gp_put, | ||
393 | .userd_pb_get = gk20a_fifo_userd_pb_get, | ||
394 | .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val, | ||
395 | .preempt_channel = gk20a_fifo_preempt_channel, | ||
396 | .preempt_tsg = gk20a_fifo_preempt_tsg, | ||
397 | .enable_tsg = gk20a_enable_tsg, | ||
398 | .disable_tsg = gk20a_disable_tsg, | ||
399 | .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status, | ||
400 | .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload, | ||
401 | .update_runlist = gk20a_fifo_update_runlist, | ||
402 | .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault, | ||
403 | .get_mmu_fault_info = gk20a_fifo_get_mmu_fault_info, | ||
404 | .wait_engine_idle = gk20a_fifo_wait_engine_idle, | ||
405 | .get_num_fifos = gm20b_fifo_get_num_fifos, | ||
406 | .get_pbdma_signature = gk20a_fifo_get_pbdma_signature, | ||
407 | .set_runlist_interleave = gk20a_fifo_set_runlist_interleave, | ||
408 | .tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice, | ||
409 | .force_reset_ch = gk20a_fifo_force_reset_ch, | ||
410 | .engine_enum_from_type = gk20a_fifo_engine_enum_from_type, | ||
411 | .device_info_data_parse = gm20b_device_info_data_parse, | ||
412 | .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v, | ||
413 | .init_engine_info = gk20a_fifo_init_engine_info, | ||
414 | .runlist_entry_size = ram_rl_entry_size_v, | ||
415 | .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry, | ||
416 | .get_ch_runlist_entry = gk20a_get_ch_runlist_entry, | ||
417 | .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc, | ||
418 | .dump_pbdma_status = gk20a_dump_pbdma_status, | ||
419 | .dump_eng_status = gk20a_dump_eng_status, | ||
420 | .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc, | ||
421 | .intr_0_error_mask = gk20a_fifo_intr_0_error_mask, | ||
422 | .is_preempt_pending = gk20a_fifo_is_preempt_pending, | ||
423 | .init_pbdma_intr_descs = gm20b_fifo_init_pbdma_intr_descs, | ||
424 | .reset_enable_hw = gk20a_init_fifo_reset_enable_hw, | ||
425 | .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg, | ||
426 | .handle_sched_error = gk20a_fifo_handle_sched_error, | ||
427 | .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0, | ||
428 | .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1, | ||
429 | .tsg_bind_channel = gk20a_tsg_bind_channel, | ||
430 | .tsg_unbind_channel = gk20a_tsg_unbind_channel, | ||
431 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
432 | .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf, | ||
433 | .free_syncpt_buf = gk20a_fifo_free_syncpt_buf, | ||
434 | .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd, | ||
435 | .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size, | ||
436 | .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd, | ||
437 | .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size, | ||
438 | #endif | ||
439 | }, | ||
440 | .gr_ctx = { | ||
441 | .get_netlist_name = gr_gm20b_get_netlist_name, | ||
442 | .is_fw_defined = gr_gm20b_is_firmware_defined, | ||
443 | }, | ||
444 | .mm = { | ||
445 | .support_sparse = gm20b_mm_support_sparse, | ||
446 | .gmmu_map = gk20a_locked_gmmu_map, | ||
447 | .gmmu_unmap = gk20a_locked_gmmu_unmap, | ||
448 | .vm_bind_channel = gk20a_vm_bind_channel, | ||
449 | .fb_flush = gk20a_mm_fb_flush, | ||
450 | .l2_invalidate = gk20a_mm_l2_invalidate, | ||
451 | .l2_flush = gk20a_mm_l2_flush, | ||
452 | .cbc_clean = gk20a_mm_cbc_clean, | ||
453 | .set_big_page_size = gm20b_mm_set_big_page_size, | ||
454 | .get_big_page_sizes = gm20b_mm_get_big_page_sizes, | ||
455 | .get_default_big_page_size = gm20b_mm_get_default_big_page_size, | ||
456 | .gpu_phys_addr = gm20b_gpu_phys_addr, | ||
457 | .get_iommu_bit = gk20a_mm_get_iommu_bit, | ||
458 | .get_mmu_levels = gk20a_mm_get_mmu_levels, | ||
459 | .init_pdb = gk20a_mm_init_pdb, | ||
460 | .init_mm_setup_hw = gk20a_init_mm_setup_hw, | ||
461 | .is_bar1_supported = gm20b_mm_is_bar1_supported, | ||
462 | .alloc_inst_block = gk20a_alloc_inst_block, | ||
463 | .init_inst_block = gk20a_init_inst_block, | ||
464 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, | ||
465 | .get_kind_invalid = gm20b_get_kind_invalid, | ||
466 | .get_kind_pitch = gm20b_get_kind_pitch, | ||
467 | }, | ||
468 | .therm = { | ||
469 | .init_therm_setup_hw = gm20b_init_therm_setup_hw, | ||
470 | .elcg_init_idle_filters = gk20a_elcg_init_idle_filters, | ||
471 | }, | ||
472 | .pmu = { | ||
473 | .pmu_setup_elpg = gm20b_pmu_setup_elpg, | ||
474 | .pmu_get_queue_head = pwr_pmu_queue_head_r, | ||
475 | .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v, | ||
476 | .pmu_get_queue_tail = pwr_pmu_queue_tail_r, | ||
477 | .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v, | ||
478 | .pmu_queue_head = gk20a_pmu_queue_head, | ||
479 | .pmu_queue_tail = gk20a_pmu_queue_tail, | ||
480 | .pmu_msgq_tail = gk20a_pmu_msgq_tail, | ||
481 | .pmu_mutex_size = pwr_pmu_mutex__size_1_v, | ||
482 | .pmu_mutex_acquire = gk20a_pmu_mutex_acquire, | ||
483 | .pmu_mutex_release = gk20a_pmu_mutex_release, | ||
484 | .write_dmatrfbase = gm20b_write_dmatrfbase, | ||
485 | .pmu_elpg_statistics = gk20a_pmu_elpg_statistics, | ||
486 | .pmu_pg_init_param = NULL, | ||
487 | .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list, | ||
488 | .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list, | ||
489 | .pmu_is_lpwr_feature_supported = NULL, | ||
490 | .pmu_lpwr_enable_pg = NULL, | ||
491 | .pmu_lpwr_disable_pg = NULL, | ||
492 | .pmu_pg_param_post_init = NULL, | ||
493 | .dump_secure_fuses = pmu_dump_security_fuses_gm20b, | ||
494 | .reset_engine = gk20a_pmu_engine_reset, | ||
495 | .is_engine_in_reset = gk20a_pmu_is_engine_in_reset, | ||
496 | }, | ||
497 | .clk = { | ||
498 | .init_clk_support = gm20b_init_clk_support, | ||
499 | .suspend_clk_support = gm20b_suspend_clk_support, | ||
500 | #ifdef CONFIG_DEBUG_FS | ||
501 | .init_debugfs = gm20b_clk_init_debugfs, | ||
502 | #endif | ||
503 | .get_voltage = gm20b_clk_get_voltage, | ||
504 | .get_gpcclk_clock_counter = gm20b_clk_get_gpcclk_clock_counter, | ||
505 | .pll_reg_write = gm20b_clk_pll_reg_write, | ||
506 | .get_pll_debug_data = gm20b_clk_get_pll_debug_data, | ||
507 | }, | ||
508 | .regops = { | ||
509 | .get_global_whitelist_ranges = | ||
510 | gm20b_get_global_whitelist_ranges, | ||
511 | .get_global_whitelist_ranges_count = | ||
512 | gm20b_get_global_whitelist_ranges_count, | ||
513 | .get_context_whitelist_ranges = | ||
514 | gm20b_get_context_whitelist_ranges, | ||
515 | .get_context_whitelist_ranges_count = | ||
516 | gm20b_get_context_whitelist_ranges_count, | ||
517 | .get_runcontrol_whitelist = gm20b_get_runcontrol_whitelist, | ||
518 | .get_runcontrol_whitelist_count = | ||
519 | gm20b_get_runcontrol_whitelist_count, | ||
520 | .get_runcontrol_whitelist_ranges = | ||
521 | gm20b_get_runcontrol_whitelist_ranges, | ||
522 | .get_runcontrol_whitelist_ranges_count = | ||
523 | gm20b_get_runcontrol_whitelist_ranges_count, | ||
524 | .get_qctl_whitelist = gm20b_get_qctl_whitelist, | ||
525 | .get_qctl_whitelist_count = gm20b_get_qctl_whitelist_count, | ||
526 | .get_qctl_whitelist_ranges = gm20b_get_qctl_whitelist_ranges, | ||
527 | .get_qctl_whitelist_ranges_count = | ||
528 | gm20b_get_qctl_whitelist_ranges_count, | ||
529 | .apply_smpc_war = gm20b_apply_smpc_war, | ||
530 | }, | ||
531 | .mc = { | ||
532 | .intr_enable = mc_gk20a_intr_enable, | ||
533 | .intr_unit_config = mc_gk20a_intr_unit_config, | ||
534 | .isr_stall = mc_gk20a_isr_stall, | ||
535 | .intr_stall = mc_gk20a_intr_stall, | ||
536 | .intr_stall_pause = mc_gk20a_intr_stall_pause, | ||
537 | .intr_stall_resume = mc_gk20a_intr_stall_resume, | ||
538 | .intr_nonstall = mc_gk20a_intr_nonstall, | ||
539 | .intr_nonstall_pause = mc_gk20a_intr_nonstall_pause, | ||
540 | .intr_nonstall_resume = mc_gk20a_intr_nonstall_resume, | ||
541 | .enable = gk20a_mc_enable, | ||
542 | .disable = gk20a_mc_disable, | ||
543 | .reset = gk20a_mc_reset, | ||
544 | .boot_0 = gk20a_mc_boot_0, | ||
545 | .is_intr1_pending = mc_gk20a_is_intr1_pending, | ||
546 | }, | ||
547 | .debug = { | ||
548 | .show_dump = gk20a_debug_show_dump, | ||
549 | }, | ||
550 | .dbg_session_ops = { | ||
551 | .exec_reg_ops = exec_regops_gk20a, | ||
552 | .dbg_set_powergate = dbg_set_powergate, | ||
553 | .check_and_set_global_reservation = | ||
554 | nvgpu_check_and_set_global_reservation, | ||
555 | .check_and_set_context_reservation = | ||
556 | nvgpu_check_and_set_context_reservation, | ||
557 | .release_profiler_reservation = | ||
558 | nvgpu_release_profiler_reservation, | ||
559 | .perfbuffer_enable = gk20a_perfbuf_enable_locked, | ||
560 | .perfbuffer_disable = gk20a_perfbuf_disable_locked, | ||
561 | }, | ||
562 | .bus = { | ||
563 | .init_hw = gk20a_bus_init_hw, | ||
564 | .isr = gk20a_bus_isr, | ||
565 | .read_ptimer = gk20a_read_ptimer, | ||
566 | .get_timestamps_zipper = nvgpu_get_timestamps_zipper, | ||
567 | .bar1_bind = gm20b_bus_bar1_bind, | ||
568 | }, | ||
569 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
570 | .css = { | ||
571 | .enable_snapshot = css_hw_enable_snapshot, | ||
572 | .disable_snapshot = css_hw_disable_snapshot, | ||
573 | .check_data_available = css_hw_check_data_available, | ||
574 | .set_handled_snapshots = css_hw_set_handled_snapshots, | ||
575 | .allocate_perfmon_ids = css_gr_allocate_perfmon_ids, | ||
576 | .release_perfmon_ids = css_gr_release_perfmon_ids, | ||
577 | }, | ||
578 | #endif | ||
579 | .falcon = { | ||
580 | .falcon_hal_sw_init = gk20a_falcon_hal_sw_init, | ||
581 | }, | ||
582 | .priv_ring = { | ||
583 | .isr = gk20a_priv_ring_isr, | ||
584 | }, | ||
585 | .chip_init_gpu_characteristics = gk20a_init_gpu_characteristics, | ||
586 | .get_litter_value = gm20b_get_litter_value, | ||
587 | }; | ||
588 | |||
589 | int gm20b_init_hal(struct gk20a *g) | ||
590 | { | ||
591 | struct gpu_ops *gops = &g->ops; | ||
592 | u32 val; | ||
593 | |||
594 | gops->ltc = gm20b_ops.ltc; | ||
595 | gops->ce2 = gm20b_ops.ce2; | ||
596 | gops->gr = gm20b_ops.gr; | ||
597 | gops->fb = gm20b_ops.fb; | ||
598 | gops->clock_gating = gm20b_ops.clock_gating; | ||
599 | gops->fifo = gm20b_ops.fifo; | ||
600 | gops->gr_ctx = gm20b_ops.gr_ctx; | ||
601 | gops->mm = gm20b_ops.mm; | ||
602 | gops->therm = gm20b_ops.therm; | ||
603 | gops->pmu = gm20b_ops.pmu; | ||
604 | /* | ||
605 | * clk must be assigned member by member | ||
606 | * since some clk ops are assigned during probe prior to HAL init | ||
607 | */ | ||
608 | gops->clk.init_clk_support = gm20b_ops.clk.init_clk_support; | ||
609 | gops->clk.suspend_clk_support = gm20b_ops.clk.suspend_clk_support; | ||
610 | gops->clk.get_voltage = gm20b_ops.clk.get_voltage; | ||
611 | gops->clk.get_gpcclk_clock_counter = | ||
612 | gm20b_ops.clk.get_gpcclk_clock_counter; | ||
613 | gops->clk.pll_reg_write = gm20b_ops.clk.pll_reg_write; | ||
614 | gops->clk.get_pll_debug_data = gm20b_ops.clk.get_pll_debug_data; | ||
615 | |||
616 | gops->regops = gm20b_ops.regops; | ||
617 | gops->mc = gm20b_ops.mc; | ||
618 | gops->dbg_session_ops = gm20b_ops.dbg_session_ops; | ||
619 | gops->debug = gm20b_ops.debug; | ||
620 | gops->bus = gm20b_ops.bus; | ||
621 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
622 | gops->css = gm20b_ops.css; | ||
623 | #endif | ||
624 | gops->falcon = gm20b_ops.falcon; | ||
625 | |||
626 | gops->priv_ring = gm20b_ops.priv_ring; | ||
627 | |||
628 | /* Lone functions */ | ||
629 | gops->chip_init_gpu_characteristics = | ||
630 | gm20b_ops.chip_init_gpu_characteristics; | ||
631 | gops->get_litter_value = gm20b_ops.get_litter_value; | ||
632 | |||
633 | __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true); | ||
634 | __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false); | ||
635 | __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false); | ||
636 | |||
637 | #ifdef CONFIG_TEGRA_ACR | ||
638 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
639 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); | ||
640 | } else { | ||
641 | val = gk20a_readl(g, fuse_opt_priv_sec_en_r()); | ||
642 | if (!val) { | ||
643 | gk20a_dbg_info("priv security is disabled in HW"); | ||
644 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); | ||
645 | } else { | ||
646 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); | ||
647 | } | ||
648 | } | ||
649 | #else | ||
650 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
651 | gk20a_dbg_info("running ASIM with PRIV security disabled"); | ||
652 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); | ||
653 | } else { | ||
654 | val = gk20a_readl(g, fuse_opt_priv_sec_en_r()); | ||
655 | if (!val) { | ||
656 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false); | ||
657 | } else { | ||
658 | gk20a_dbg_info("priv security is not supported but enabled"); | ||
659 | __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true); | ||
660 | return -EPERM; | ||
661 | } | ||
662 | } | ||
663 | #endif | ||
664 | |||
665 | /* priv security dependent ops */ | ||
666 | if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
667 | /* Add in ops from gm20b acr */ | ||
668 | gops->pmu.is_pmu_supported = gm20b_is_pmu_supported; | ||
669 | gops->pmu.prepare_ucode = prepare_ucode_blob; | ||
670 | gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn; | ||
671 | gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap; | ||
672 | gops->pmu.is_priv_load = gm20b_is_priv_load; | ||
673 | gops->pmu.get_wpr = gm20b_wpr_info; | ||
674 | gops->pmu.alloc_blob_space = gm20b_alloc_blob_space; | ||
675 | gops->pmu.pmu_populate_loader_cfg = | ||
676 | gm20b_pmu_populate_loader_cfg; | ||
677 | gops->pmu.flcn_populate_bl_dmem_desc = | ||
678 | gm20b_flcn_populate_bl_dmem_desc; | ||
679 | gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt; | ||
680 | gops->pmu.falcon_clear_halt_interrupt_status = | ||
681 | clear_halt_interrupt_status; | ||
682 | gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1; | ||
683 | |||
684 | gops->pmu.init_wpr_region = gm20b_pmu_init_acr; | ||
685 | gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode; | ||
686 | |||
687 | gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; | ||
688 | } else { | ||
689 | /* Inherit from gk20a */ | ||
690 | gops->pmu.is_pmu_supported = gk20a_is_pmu_supported; | ||
691 | gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob; | ||
692 | gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1; | ||
693 | gops->pmu.pmu_nsbootstrap = pmu_bootstrap; | ||
694 | |||
695 | gops->pmu.load_lsfalcon_ucode = NULL; | ||
696 | gops->pmu.init_wpr_region = NULL; | ||
697 | |||
698 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | ||
699 | } | ||
700 | |||
701 | __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); | ||
702 | g->pmu_lsf_pmu_wpr_init_done = 0; | ||
703 | g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT; | ||
704 | |||
705 | g->name = "gm20b"; | ||
706 | |||
707 | return 0; | ||
708 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.h b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h new file mode 100644 index 00000000..22eae182 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * GM20B Graphics | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_HAL_GM20B_H | ||
26 | #define _NVHOST_HAL_GM20B_H | ||
27 | struct gk20a; | ||
28 | |||
29 | int gm20b_init_hal(struct gk20a *g); | ||
30 | int gm20b_get_litter_value(struct gk20a *g, int value); | ||
31 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c new file mode 100644 index 00000000..6ec9aec5 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -0,0 +1,487 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/ltc.h> | ||
31 | |||
32 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> | ||
33 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
34 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
35 | #include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h> | ||
36 | |||
37 | #include "gk20a/gk20a.h" | ||
38 | |||
39 | #include "ltc_gm20b.h" | ||
40 | |||
41 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
42 | { | ||
43 | /* max memory size (MB) to cover */ | ||
44 | u32 max_size = gr->max_comptag_mem; | ||
45 | /* one tag line covers 128KB */ | ||
46 | u32 max_comptag_lines = max_size << 3; | ||
47 | |||
48 | u32 hw_max_comptag_lines = | ||
49 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
50 | |||
51 | u32 cbc_param = | ||
52 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
53 | u32 comptags_per_cacheline = | ||
54 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
55 | u32 cacheline_size = | ||
56 | 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
57 | u32 slices_per_ltc = | ||
58 | ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); | ||
59 | |||
60 | u32 compbit_backing_size; | ||
61 | |||
62 | int err; | ||
63 | |||
64 | gk20a_dbg_fn(""); | ||
65 | |||
66 | if (max_comptag_lines == 0) | ||
67 | return 0; | ||
68 | |||
69 | if (max_comptag_lines > hw_max_comptag_lines) | ||
70 | max_comptag_lines = hw_max_comptag_lines; | ||
71 | |||
72 | compbit_backing_size = | ||
73 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
74 | cacheline_size * slices_per_ltc * g->ltc_count; | ||
75 | |||
76 | /* aligned to 2KB * ltc_count */ | ||
77 | compbit_backing_size += | ||
78 | g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
79 | |||
80 | /* must be a multiple of 64KB */ | ||
81 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
82 | |||
83 | max_comptag_lines = | ||
84 | (compbit_backing_size * comptags_per_cacheline) / | ||
85 | (cacheline_size * slices_per_ltc * g->ltc_count); | ||
86 | |||
87 | if (max_comptag_lines > hw_max_comptag_lines) | ||
88 | max_comptag_lines = hw_max_comptag_lines; | ||
89 | |||
90 | gk20a_dbg_info("compbit backing store size : %d", | ||
91 | compbit_backing_size); | ||
92 | gk20a_dbg_info("max comptag lines : %d", | ||
93 | max_comptag_lines); | ||
94 | |||
95 | err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size); | ||
96 | if (err) | ||
97 | return err; | ||
98 | |||
99 | err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); | ||
100 | if (err) | ||
101 | return err; | ||
102 | |||
103 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
104 | gr->slices_per_ltc = slices_per_ltc; | ||
105 | gr->cacheline_size = cacheline_size; | ||
106 | |||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
111 | u32 min, u32 max) | ||
112 | { | ||
113 | struct gr_gk20a *gr = &g->gr; | ||
114 | struct nvgpu_timeout timeout; | ||
115 | int err = 0; | ||
116 | u32 ltc, slice, ctrl1, val, hw_op = 0; | ||
117 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | ||
118 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
119 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
120 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
121 | const u32 max_lines = 16384; | ||
122 | |||
123 | gk20a_dbg_fn(""); | ||
124 | |||
125 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
126 | |||
127 | if (gr->compbit_store.mem.size == 0) | ||
128 | return 0; | ||
129 | |||
130 | while (1) { | ||
131 | const u32 iter_max = min(min + max_lines - 1, max); | ||
132 | bool full_cache_op = true; | ||
133 | |||
134 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
135 | |||
136 | gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max); | ||
137 | |||
138 | if (op == gk20a_cbc_op_clear) { | ||
139 | gk20a_writel( | ||
140 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
141 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( | ||
142 | min)); | ||
143 | gk20a_writel( | ||
144 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
145 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( | ||
146 | iter_max)); | ||
147 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
148 | full_cache_op = false; | ||
149 | } else if (op == gk20a_cbc_op_clean) { | ||
150 | /* this is full-cache op */ | ||
151 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
152 | } else if (op == gk20a_cbc_op_invalidate) { | ||
153 | /* this is full-cache op */ | ||
154 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
155 | } else { | ||
156 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); | ||
157 | err = -EINVAL; | ||
158 | goto out; | ||
159 | } | ||
160 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
161 | gk20a_readl(g, | ||
162 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
163 | |||
164 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
165 | for (slice = 0; slice < slices_per_ltc; slice++) { | ||
166 | |||
167 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
168 | ltc * ltc_stride + slice * lts_stride; | ||
169 | |||
170 | nvgpu_timeout_init(g, &timeout, 2000, | ||
171 | NVGPU_TIMER_RETRY_TIMER); | ||
172 | do { | ||
173 | val = gk20a_readl(g, ctrl1); | ||
174 | if (!(val & hw_op)) | ||
175 | break; | ||
176 | nvgpu_udelay(5); | ||
177 | } while (!nvgpu_timeout_expired(&timeout)); | ||
178 | |||
179 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
180 | nvgpu_err(g, "comp tag clear timeout"); | ||
181 | err = -EBUSY; | ||
182 | goto out; | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | |||
187 | /* are we done? */ | ||
188 | if (full_cache_op || iter_max == max) | ||
189 | break; | ||
190 | |||
191 | /* note: iter_max is inclusive upper bound */ | ||
192 | min = iter_max + 1; | ||
193 | |||
194 | /* give a chance for higher-priority threads to progress */ | ||
195 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
196 | } | ||
197 | out: | ||
198 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
199 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
200 | return err; | ||
201 | } | ||
202 | |||
203 | void gm20b_ltc_init_fs_state(struct gk20a *g) | ||
204 | { | ||
205 | u32 reg; | ||
206 | |||
207 | gk20a_dbg_info("initialize gm20b l2"); | ||
208 | |||
209 | g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); | ||
210 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | ||
211 | gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count); | ||
212 | |||
213 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), | ||
214 | g->ltc_count); | ||
215 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), | ||
216 | g->ltc_count); | ||
217 | |||
218 | gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), | ||
219 | gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | | ||
220 | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); | ||
221 | |||
222 | /* Disable LTC interrupts */ | ||
223 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
224 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | ||
225 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); | ||
226 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
227 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | ||
228 | } | ||
229 | |||
230 | void gm20b_ltc_isr(struct gk20a *g) | ||
231 | { | ||
232 | u32 mc_intr, ltc_intr; | ||
233 | unsigned int ltc, slice; | ||
234 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
235 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
236 | |||
237 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | ||
238 | nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); | ||
239 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
240 | if ((mc_intr & 1 << ltc) == 0) | ||
241 | continue; | ||
242 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | ||
243 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + | ||
244 | ltc_stride * ltc + | ||
245 | lts_stride * slice); | ||
246 | nvgpu_err(g, "ltc%d, slice %d: %08x", | ||
247 | ltc, slice, ltc_intr); | ||
248 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + | ||
249 | ltc_stride * ltc + | ||
250 | lts_stride * slice, | ||
251 | ltc_intr); | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | |||
256 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | ||
257 | { | ||
258 | u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); | ||
259 | if (val == 2) { | ||
260 | return base * 2; | ||
261 | } else if (val != 1) { | ||
262 | nvgpu_err(g, "Invalid number of active ltcs: %08x", val); | ||
263 | } | ||
264 | |||
265 | return base; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Performs a full flush of the L2 cache. | ||
270 | */ | ||
271 | void gm20b_flush_ltc(struct gk20a *g) | ||
272 | { | ||
273 | struct nvgpu_timeout timeout; | ||
274 | unsigned int ltc; | ||
275 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
276 | |||
277 | /* Clean... */ | ||
278 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), | ||
279 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | | ||
280 | ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() | | ||
281 | ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() | | ||
282 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() | | ||
283 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() | | ||
284 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); | ||
285 | |||
286 | /* Wait on each LTC individually. */ | ||
287 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
288 | u32 op_pending; | ||
289 | |||
290 | /* | ||
291 | * Use 5ms - this should be sufficient time to flush the cache. | ||
292 | * On tegra, rough EMC BW available for old tegra chips (newer | ||
293 | * chips are strictly faster) can be estimated as follows: | ||
294 | * | ||
295 | * Lowest reasonable EMC clock speed will be around 102MHz on | ||
296 | * t124 for display enabled boards and generally fixed to max | ||
297 | * for non-display boards (since they are generally plugged in). | ||
298 | * | ||
299 | * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that | ||
300 | * BW the GPU will likely get about half (display and overhead/ | ||
301 | * utilization inefficiency eating the rest) so 650MB/s at | ||
302 | * worst. Assuming at most 1MB of GPU L2 cache (less for most | ||
303 | * chips) worst case is we take 1MB/650MB/s = 1.5ms. | ||
304 | * | ||
305 | * So 5ms timeout here should be more than sufficient. | ||
306 | */ | ||
307 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
308 | |||
309 | do { | ||
310 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | ||
311 | ltc * ltc_stride; | ||
312 | op_pending = gk20a_readl(g, cmgmt1); | ||
313 | } while ((op_pending & | ||
314 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && | ||
315 | !nvgpu_timeout_expired_msg(&timeout, | ||
316 | "L2 flush timeout!")); | ||
317 | } | ||
318 | |||
319 | /* And invalidate. */ | ||
320 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(), | ||
321 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() | | ||
322 | ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() | | ||
323 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() | | ||
324 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() | | ||
325 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); | ||
326 | |||
327 | /* Wait on each LTC individually. */ | ||
328 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
329 | u32 op_pending; | ||
330 | |||
331 | /* Again, 5ms. */ | ||
332 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
333 | |||
334 | do { | ||
335 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | ||
336 | ltc * ltc_stride; | ||
337 | op_pending = gk20a_readl(g, cmgmt0); | ||
338 | } while ((op_pending & | ||
339 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && | ||
340 | !nvgpu_timeout_expired_msg(&timeout, | ||
341 | "L2 flush timeout!")); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | int gm20b_determine_L2_size_bytes(struct gk20a *g) | ||
346 | { | ||
347 | u32 lts_per_ltc; | ||
348 | u32 ways; | ||
349 | u32 sets; | ||
350 | u32 bytes_per_line; | ||
351 | u32 active_ltcs; | ||
352 | u32 cache_size; | ||
353 | |||
354 | u32 tmp; | ||
355 | u32 active_sets_value; | ||
356 | |||
357 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
358 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
359 | |||
360 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
361 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
362 | sets = 64; | ||
363 | } else if (active_sets_value == | ||
364 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
365 | sets = 32; | ||
366 | } else if (active_sets_value == | ||
367 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
368 | sets = 16; | ||
369 | } else { | ||
370 | nvgpu_err(g, "Unknown constant %u for active sets", | ||
371 | (unsigned)active_sets_value); | ||
372 | sets = 0; | ||
373 | } | ||
374 | |||
375 | active_ltcs = g->gr.num_fbps; | ||
376 | |||
377 | /* chip-specific values */ | ||
378 | lts_per_ltc = 2; | ||
379 | bytes_per_line = 128; | ||
380 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
381 | |||
382 | return cache_size; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Sets the ZBC color for the passed index. | ||
387 | */ | ||
388 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
389 | struct zbc_entry *color_val, | ||
390 | u32 index) | ||
391 | { | ||
392 | u32 i; | ||
393 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
394 | |||
395 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
396 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
397 | |||
398 | for (i = 0; | ||
399 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
400 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
401 | color_val->color_l2[i]); | ||
402 | } | ||
403 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Sets the ZBC depth for the passed index. | ||
408 | */ | ||
409 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
410 | struct zbc_entry *depth_val, | ||
411 | u32 index) | ||
412 | { | ||
413 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
414 | |||
415 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
416 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
417 | |||
418 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
419 | depth_val->depth); | ||
420 | |||
421 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
422 | } | ||
423 | |||
424 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
425 | { | ||
426 | u32 max_size = gr->max_comptag_mem; | ||
427 | u32 max_comptag_lines = max_size << 3; | ||
428 | |||
429 | u32 compbit_base_post_divide; | ||
430 | u64 compbit_base_post_multiply64; | ||
431 | u64 compbit_store_iova; | ||
432 | u64 compbit_base_post_divide64; | ||
433 | |||
434 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
435 | compbit_store_iova = nvgpu_mem_get_phys_addr(g, | ||
436 | &gr->compbit_store.mem); | ||
437 | else | ||
438 | compbit_store_iova = nvgpu_mem_get_addr(g, | ||
439 | &gr->compbit_store.mem); | ||
440 | |||
441 | compbit_base_post_divide64 = compbit_store_iova >> | ||
442 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
443 | |||
444 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
445 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
446 | |||
447 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
448 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
449 | |||
450 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
451 | compbit_base_post_divide++; | ||
452 | |||
453 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
454 | if (g->ops.ltc.cbc_fix_config) | ||
455 | compbit_base_post_divide = | ||
456 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
457 | |||
458 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
459 | compbit_base_post_divide); | ||
460 | |||
461 | gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
462 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
463 | (u32)(compbit_store_iova >> 32), | ||
464 | (u32)(compbit_store_iova & 0xffffffff), | ||
465 | compbit_base_post_divide); | ||
466 | |||
467 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
468 | |||
469 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
470 | 0, max_comptag_lines - 1); | ||
471 | |||
472 | } | ||
473 | |||
474 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) | ||
475 | { | ||
476 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
477 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
478 | |||
479 | if (enabled) | ||
480 | /* bypass disabled (normal caching ops)*/ | ||
481 | reg &= ~reg_f; | ||
482 | else | ||
483 | /* bypass enabled (no caching) */ | ||
484 | reg |= reg_f; | ||
485 | |||
486 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
487 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h new file mode 100644 index 00000000..0f9145be --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_LTC | ||
26 | #define _NVHOST_GM20B_LTC | ||
27 | struct gpu_ops; | ||
28 | |||
29 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); | ||
30 | int gm20b_determine_L2_size_bytes(struct gk20a *g); | ||
31 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
32 | struct zbc_entry *color_val, | ||
33 | u32 index); | ||
34 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
35 | struct zbc_entry *depth_val, | ||
36 | u32 index); | ||
37 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); | ||
38 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled); | ||
39 | void gm20b_ltc_init_fs_state(struct gk20a *g); | ||
40 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
41 | u32 min, u32 max); | ||
42 | void gm20b_ltc_isr(struct gk20a *g); | ||
43 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); | ||
44 | void gm20b_flush_ltc(struct gk20a *g); | ||
45 | int gm20b_ltc_alloc_phys_cbc(struct gk20a *g, | ||
46 | size_t compbit_backing_size); | ||
47 | int gm20b_ltc_alloc_virt_cbc(struct gk20a *g, | ||
48 | size_t compbit_backing_size); | ||
49 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c new file mode 100644 index 00000000..5cd7706d --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * GM20B MMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | |||
27 | #include "mm_gm20b.h" | ||
28 | |||
29 | #include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h> | ||
30 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | ||
31 | |||
32 | void gm20b_mm_set_big_page_size(struct gk20a *g, | ||
33 | struct nvgpu_mem *mem, int size) | ||
34 | { | ||
35 | u32 val; | ||
36 | |||
37 | gk20a_dbg_fn(""); | ||
38 | |||
39 | gk20a_dbg_info("big page size %d\n", size); | ||
40 | val = nvgpu_mem_rd32(g, mem, ram_in_big_page_size_w()); | ||
41 | val &= ~ram_in_big_page_size_m(); | ||
42 | |||
43 | if (size == SZ_64K) | ||
44 | val |= ram_in_big_page_size_64kb_f(); | ||
45 | else | ||
46 | val |= ram_in_big_page_size_128kb_f(); | ||
47 | |||
48 | nvgpu_mem_wr32(g, mem, ram_in_big_page_size_w(), val); | ||
49 | gk20a_dbg_fn("done"); | ||
50 | } | ||
51 | |||
52 | u32 gm20b_mm_get_big_page_sizes(void) | ||
53 | { | ||
54 | return SZ_64K | SZ_128K; | ||
55 | } | ||
56 | |||
57 | u32 gm20b_mm_get_default_big_page_size(void) | ||
58 | { | ||
59 | return SZ_128K; | ||
60 | } | ||
61 | |||
62 | bool gm20b_mm_support_sparse(struct gk20a *g) | ||
63 | { | ||
64 | return true; | ||
65 | } | ||
66 | |||
67 | bool gm20b_mm_is_bar1_supported(struct gk20a *g) | ||
68 | { | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | u64 gm20b_gpu_phys_addr(struct gk20a *g, | ||
73 | struct nvgpu_gmmu_attrs *attrs, u64 phys) | ||
74 | { | ||
75 | return phys; | ||
76 | } | ||
77 | |||
78 | u32 gm20b_get_kind_invalid(void) | ||
79 | { | ||
80 | return gmmu_pte_kind_invalid_v(); | ||
81 | } | ||
82 | |||
83 | u32 gm20b_get_kind_pitch(void) | ||
84 | { | ||
85 | return gmmu_pte_kind_pitch_v(); | ||
86 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h new file mode 100644 index 00000000..af67845a --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * GM20B GMMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_MM | ||
26 | #define _NVHOST_GM20B_MM | ||
27 | struct gk20a; | ||
28 | |||
29 | #define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1)) | ||
30 | #define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1)) | ||
31 | |||
32 | void gm20b_mm_set_big_page_size(struct gk20a *g, | ||
33 | struct nvgpu_mem *mem, int size); | ||
34 | u32 gm20b_mm_get_big_page_sizes(void); | ||
35 | u32 gm20b_mm_get_default_big_page_size(void); | ||
36 | bool gm20b_mm_support_sparse(struct gk20a *g); | ||
37 | bool gm20b_mm_is_bar1_supported(struct gk20a *g); | ||
38 | int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g); | ||
39 | u64 gm20b_gpu_phys_addr(struct gk20a *g, | ||
40 | struct nvgpu_gmmu_attrs *attrs, u64 phys); | ||
41 | u32 gm20b_get_kind_invalid(void); | ||
42 | u32 gm20b_get_kind_pitch(void); | ||
43 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c new file mode 100644 index 00000000..664134f9 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c | |||
@@ -0,0 +1,283 @@ | |||
1 | /* | ||
2 | * GM20B PMU | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/timers.h> | ||
26 | #include <nvgpu/pmu.h> | ||
27 | #include <nvgpu/fuse.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | |||
30 | #include "gk20a/gk20a.h" | ||
31 | #include "gk20a/pmu_gk20a.h" | ||
32 | |||
33 | #include "acr_gm20b.h" | ||
34 | #include "pmu_gm20b.h" | ||
35 | |||
36 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | ||
37 | #include <nvgpu/hw/gm20b/hw_pwr_gm20b.h> | ||
38 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | ||
39 | |||
40 | /*! | ||
41 | * Structure/object which single register write need to be done during PG init | ||
42 | * sequence to set PROD values. | ||
43 | */ | ||
44 | struct pg_init_sequence_list { | ||
45 | u32 regaddr; | ||
46 | u32 writeval; | ||
47 | }; | ||
48 | |||
49 | #define gm20b_dbg_pmu(fmt, arg...) \ | ||
50 | gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) | ||
51 | |||
52 | |||
53 | /* PROD settings for ELPG sequencing registers*/ | ||
54 | static struct pg_init_sequence_list _pginitseq_gm20b[] = { | ||
55 | { 0x0010ab10, 0x8180}, | ||
56 | { 0x0010e118, 0x83828180}, | ||
57 | { 0x0010e068, 0}, | ||
58 | { 0x0010e06c, 0x00000080}, | ||
59 | { 0x0010e06c, 0x00000081}, | ||
60 | { 0x0010e06c, 0x00000082}, | ||
61 | { 0x0010e06c, 0x00000083}, | ||
62 | { 0x0010e06c, 0x00000084}, | ||
63 | { 0x0010e06c, 0x00000085}, | ||
64 | { 0x0010e06c, 0x00000086}, | ||
65 | { 0x0010e06c, 0x00000087}, | ||
66 | { 0x0010e06c, 0x00000088}, | ||
67 | { 0x0010e06c, 0x00000089}, | ||
68 | { 0x0010e06c, 0x0000008a}, | ||
69 | { 0x0010e06c, 0x0000008b}, | ||
70 | { 0x0010e06c, 0x0000008c}, | ||
71 | { 0x0010e06c, 0x0000008d}, | ||
72 | { 0x0010e06c, 0x0000008e}, | ||
73 | { 0x0010e06c, 0x0000008f}, | ||
74 | { 0x0010e06c, 0x00000090}, | ||
75 | { 0x0010e06c, 0x00000091}, | ||
76 | { 0x0010e06c, 0x00000092}, | ||
77 | { 0x0010e06c, 0x00000093}, | ||
78 | { 0x0010e06c, 0x00000094}, | ||
79 | { 0x0010e06c, 0x00000095}, | ||
80 | { 0x0010e06c, 0x00000096}, | ||
81 | { 0x0010e06c, 0x00000097}, | ||
82 | { 0x0010e06c, 0x00000098}, | ||
83 | { 0x0010e06c, 0x00000099}, | ||
84 | { 0x0010e06c, 0x0000009a}, | ||
85 | { 0x0010e06c, 0x0000009b}, | ||
86 | { 0x0010ab14, 0x00000000}, | ||
87 | { 0x0010ab18, 0x00000000}, | ||
88 | { 0x0010e024, 0x00000000}, | ||
89 | { 0x0010e028, 0x00000000}, | ||
90 | { 0x0010e11c, 0x00000000}, | ||
91 | { 0x0010e120, 0x00000000}, | ||
92 | { 0x0010ab1c, 0x02010155}, | ||
93 | { 0x0010e020, 0x001b1b55}, | ||
94 | { 0x0010e124, 0x01030355}, | ||
95 | { 0x0010ab20, 0x89abcdef}, | ||
96 | { 0x0010ab24, 0x00000000}, | ||
97 | { 0x0010e02c, 0x89abcdef}, | ||
98 | { 0x0010e030, 0x00000000}, | ||
99 | { 0x0010e128, 0x89abcdef}, | ||
100 | { 0x0010e12c, 0x00000000}, | ||
101 | { 0x0010ab28, 0x74444444}, | ||
102 | { 0x0010ab2c, 0x70000000}, | ||
103 | { 0x0010e034, 0x74444444}, | ||
104 | { 0x0010e038, 0x70000000}, | ||
105 | { 0x0010e130, 0x74444444}, | ||
106 | { 0x0010e134, 0x70000000}, | ||
107 | { 0x0010ab30, 0x00000000}, | ||
108 | { 0x0010ab34, 0x00000001}, | ||
109 | { 0x00020004, 0x00000000}, | ||
110 | { 0x0010e138, 0x00000000}, | ||
111 | { 0x0010e040, 0x00000000}, | ||
112 | }; | ||
113 | |||
114 | int gm20b_pmu_setup_elpg(struct gk20a *g) | ||
115 | { | ||
116 | int ret = 0; | ||
117 | u32 reg_writes; | ||
118 | u32 index; | ||
119 | |||
120 | gk20a_dbg_fn(""); | ||
121 | |||
122 | if (g->elpg_enabled) { | ||
123 | reg_writes = ((sizeof(_pginitseq_gm20b) / | ||
124 | sizeof((_pginitseq_gm20b)[0]))); | ||
125 | /* Initialize registers with production values*/ | ||
126 | for (index = 0; index < reg_writes; index++) { | ||
127 | gk20a_writel(g, _pginitseq_gm20b[index].regaddr, | ||
128 | _pginitseq_gm20b[index].writeval); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | gk20a_dbg_fn("done"); | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | static void pmu_handle_acr_init_wpr_msg(struct gk20a *g, struct pmu_msg *msg, | ||
137 | void *param, u32 handle, u32 status) | ||
138 | { | ||
139 | gk20a_dbg_fn(""); | ||
140 | |||
141 | gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_INIT_WPR_REGION"); | ||
142 | |||
143 | if (msg->msg.acr.acrmsg.errorcode == PMU_ACR_SUCCESS) | ||
144 | g->pmu_lsf_pmu_wpr_init_done = 1; | ||
145 | gk20a_dbg_fn("done"); | ||
146 | } | ||
147 | |||
148 | |||
149 | int gm20b_pmu_init_acr(struct gk20a *g) | ||
150 | { | ||
151 | struct nvgpu_pmu *pmu = &g->pmu; | ||
152 | struct pmu_cmd cmd; | ||
153 | u32 seq; | ||
154 | |||
155 | gk20a_dbg_fn(""); | ||
156 | |||
157 | /* init ACR */ | ||
158 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
159 | cmd.hdr.unit_id = PMU_UNIT_ACR; | ||
160 | cmd.hdr.size = PMU_CMD_HDR_SIZE + | ||
161 | sizeof(struct pmu_acr_cmd_init_wpr_details); | ||
162 | cmd.cmd.acr.init_wpr.cmd_type = PMU_ACR_CMD_ID_INIT_WPR_REGION; | ||
163 | cmd.cmd.acr.init_wpr.regionid = 0x01; | ||
164 | cmd.cmd.acr.init_wpr.wproffset = 0x00; | ||
165 | gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_INIT_WPR_REGION"); | ||
166 | nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
167 | pmu_handle_acr_init_wpr_msg, pmu, &seq, ~0); | ||
168 | |||
169 | gk20a_dbg_fn("done"); | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg, | ||
174 | void *param, u32 handle, u32 status) | ||
175 | { | ||
176 | |||
177 | gk20a_dbg_fn(""); | ||
178 | |||
179 | |||
180 | gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON"); | ||
181 | |||
182 | gm20b_dbg_pmu("response code = %x\n", msg->msg.acr.acrmsg.falconid); | ||
183 | g->pmu_lsf_loaded_falcon_id = msg->msg.acr.acrmsg.falconid; | ||
184 | gk20a_dbg_fn("done"); | ||
185 | } | ||
186 | |||
187 | static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout_ms, | ||
188 | u32 val) | ||
189 | { | ||
190 | unsigned long delay = GR_FECS_POLL_INTERVAL; | ||
191 | u32 reg; | ||
192 | struct nvgpu_timeout timeout; | ||
193 | |||
194 | gk20a_dbg_fn(""); | ||
195 | reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); | ||
196 | |||
197 | nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); | ||
198 | |||
199 | do { | ||
200 | reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); | ||
201 | if (reg == val) | ||
202 | return 0; | ||
203 | nvgpu_udelay(delay); | ||
204 | } while (!nvgpu_timeout_expired(&timeout)); | ||
205 | |||
206 | return -ETIMEDOUT; | ||
207 | } | ||
208 | |||
209 | void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags) | ||
210 | { | ||
211 | struct nvgpu_pmu *pmu = &g->pmu; | ||
212 | struct pmu_cmd cmd; | ||
213 | u32 seq; | ||
214 | |||
215 | gk20a_dbg_fn(""); | ||
216 | |||
217 | gm20b_dbg_pmu("wprinit status = %x\n", g->pmu_lsf_pmu_wpr_init_done); | ||
218 | if (g->pmu_lsf_pmu_wpr_init_done) { | ||
219 | /* send message to load FECS falcon */ | ||
220 | memset(&cmd, 0, sizeof(struct pmu_cmd)); | ||
221 | cmd.hdr.unit_id = PMU_UNIT_ACR; | ||
222 | cmd.hdr.size = PMU_CMD_HDR_SIZE + | ||
223 | sizeof(struct pmu_acr_cmd_bootstrap_falcon); | ||
224 | cmd.cmd.acr.bootstrap_falcon.cmd_type = | ||
225 | PMU_ACR_CMD_ID_BOOTSTRAP_FALCON; | ||
226 | cmd.cmd.acr.bootstrap_falcon.flags = flags; | ||
227 | cmd.cmd.acr.bootstrap_falcon.falconid = falcon_id; | ||
228 | gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_BOOTSTRAP_FALCON: %x\n", | ||
229 | falcon_id); | ||
230 | nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ, | ||
231 | pmu_handle_fecs_boot_acr_msg, pmu, &seq, ~0); | ||
232 | } | ||
233 | |||
234 | gk20a_dbg_fn("done"); | ||
235 | return; | ||
236 | } | ||
237 | |||
238 | int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask) | ||
239 | { | ||
240 | u32 err = 0; | ||
241 | u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; | ||
242 | unsigned long timeout = gk20a_get_gr_idle_timeout(g); | ||
243 | |||
244 | /* GM20B PMU supports loading FECS only */ | ||
245 | if (!(falconidmask == (1 << LSF_FALCON_ID_FECS))) | ||
246 | return -EINVAL; | ||
247 | /* check whether pmu is ready to bootstrap lsf if not wait for it */ | ||
248 | if (!g->pmu_lsf_pmu_wpr_init_done) { | ||
249 | pmu_wait_message_cond(&g->pmu, | ||
250 | gk20a_get_gr_idle_timeout(g), | ||
251 | &g->pmu_lsf_pmu_wpr_init_done, 1); | ||
252 | /* check again if it still not ready indicate an error */ | ||
253 | if (!g->pmu_lsf_pmu_wpr_init_done) { | ||
254 | nvgpu_err(g, "PMU not ready to load LSF"); | ||
255 | return -ETIMEDOUT; | ||
256 | } | ||
257 | } | ||
258 | /* load FECS */ | ||
259 | gk20a_writel(g, | ||
260 | gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0); | ||
261 | gm20b_pmu_load_lsf(g, LSF_FALCON_ID_FECS, flags); | ||
262 | err = pmu_gm20b_ctx_wait_lsf_ready(g, timeout, | ||
263 | 0x55AA55AA); | ||
264 | return err; | ||
265 | } | ||
266 | |||
267 | void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr) | ||
268 | { | ||
269 | gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr); | ||
270 | } | ||
271 | |||
272 | /*Dump Security related fuses*/ | ||
273 | void pmu_dump_security_fuses_gm20b(struct gk20a *g) | ||
274 | { | ||
275 | u32 val; | ||
276 | |||
277 | nvgpu_err(g, "FUSE_OPT_SEC_DEBUG_EN_0: 0x%x", | ||
278 | gk20a_readl(g, fuse_opt_sec_debug_en_r())); | ||
279 | nvgpu_err(g, "FUSE_OPT_PRIV_SEC_EN_0: 0x%x", | ||
280 | gk20a_readl(g, fuse_opt_priv_sec_en_r())); | ||
281 | nvgpu_tegra_fuse_read_gcplex_config_fuse(g, &val); | ||
282 | nvgpu_err(g, "FUSE_GCPLEX_CONFIG_FUSE_0: 0x%x", val); | ||
283 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h new file mode 100644 index 00000000..ec50fb06 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * GM20B PMU | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef __PMU_GM20B_H_ | ||
26 | #define __PMU_GM20B_H_ | ||
27 | |||
28 | struct gk20a; | ||
29 | |||
30 | int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask); | ||
31 | int gm20b_pmu_setup_elpg(struct gk20a *g); | ||
32 | void pmu_dump_security_fuses_gm20b(struct gk20a *g); | ||
33 | void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags); | ||
34 | int gm20b_pmu_init_acr(struct gk20a *g); | ||
35 | void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr); | ||
36 | |||
37 | #endif /*__PMU_GM20B_H_*/ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.c b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c new file mode 100644 index 00000000..79c980f4 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c | |||
@@ -0,0 +1,450 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger Driver Register Ops | ||
3 | * | ||
4 | * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/dbg_gpu_gk20a.h" | ||
27 | #include "gk20a/regops_gk20a.h" | ||
28 | #include "regops_gm20b.h" | ||
29 | |||
30 | #include <nvgpu/bsearch.h> | ||
31 | |||
32 | static const struct regop_offset_range gm20b_global_whitelist_ranges[] = { | ||
33 | { 0x00001a00, 3 }, | ||
34 | { 0x0000259c, 1 }, | ||
35 | { 0x0000280c, 1 }, | ||
36 | { 0x00009400, 1 }, | ||
37 | { 0x00009410, 1 }, | ||
38 | { 0x00021970, 1 }, | ||
39 | { 0x00021c00, 4 }, | ||
40 | { 0x00021c14, 3 }, | ||
41 | { 0x00021c24, 1 }, | ||
42 | { 0x00021c2c, 5 }, | ||
43 | { 0x00021cb8, 2 }, | ||
44 | { 0x00021d38, 2 }, | ||
45 | { 0x00021d44, 1 }, | ||
46 | { 0x00021d4c, 1 }, | ||
47 | { 0x00021d54, 1 }, | ||
48 | { 0x00021d5c, 1 }, | ||
49 | { 0x00021d64, 2 }, | ||
50 | { 0x00021d70, 1 }, | ||
51 | { 0x00022430, 7 }, | ||
52 | { 0x00100c18, 3 }, | ||
53 | { 0x00100c84, 1 }, | ||
54 | { 0x00100cc4, 1 }, | ||
55 | { 0x00106640, 1 }, | ||
56 | { 0x0010a0a8, 1 }, | ||
57 | { 0x0010a4f0, 1 }, | ||
58 | { 0x0010e064, 1 }, | ||
59 | { 0x0010e164, 1 }, | ||
60 | { 0x0010e490, 1 }, | ||
61 | { 0x00140028, 1 }, | ||
62 | { 0x00140350, 1 }, | ||
63 | { 0x00140550, 1 }, | ||
64 | { 0x00142028, 1 }, | ||
65 | { 0x00142350, 1 }, | ||
66 | { 0x00142550, 1 }, | ||
67 | { 0x0017e028, 1 }, | ||
68 | { 0x0017e350, 1 }, | ||
69 | { 0x0017e550, 1 }, | ||
70 | { 0x00180040, 52 }, | ||
71 | { 0x00180240, 52 }, | ||
72 | { 0x00180440, 52 }, | ||
73 | { 0x001a0040, 52 }, | ||
74 | { 0x001b0040, 52 }, | ||
75 | { 0x001b0240, 52 }, | ||
76 | { 0x001b0440, 52 }, | ||
77 | { 0x001b0640, 52 }, | ||
78 | { 0x001b4000, 3 }, | ||
79 | { 0x001b4010, 3 }, | ||
80 | { 0x001b4020, 3 }, | ||
81 | { 0x001b4030, 3 }, | ||
82 | { 0x001b4040, 3 }, | ||
83 | { 0x001b4050, 3 }, | ||
84 | { 0x001b4060, 4 }, | ||
85 | { 0x001b4074, 11 }, | ||
86 | { 0x001b40a4, 1 }, | ||
87 | { 0x001b4100, 6 }, | ||
88 | { 0x001b4124, 2 }, | ||
89 | { 0x001b8000, 7 }, | ||
90 | { 0x001bc000, 7 }, | ||
91 | { 0x001be000, 7 }, | ||
92 | { 0x00400500, 1 }, | ||
93 | { 0x00400700, 1 }, | ||
94 | { 0x0040415c, 1 }, | ||
95 | { 0x00405850, 1 }, | ||
96 | { 0x00405908, 1 }, | ||
97 | { 0x00405b40, 1 }, | ||
98 | { 0x00405b50, 1 }, | ||
99 | { 0x00406024, 1 }, | ||
100 | { 0x00407010, 1 }, | ||
101 | { 0x00407808, 1 }, | ||
102 | { 0x0040803c, 1 }, | ||
103 | { 0x0040880c, 1 }, | ||
104 | { 0x00408910, 1 }, | ||
105 | { 0x00408984, 1 }, | ||
106 | { 0x004090a8, 1 }, | ||
107 | { 0x004098a0, 1 }, | ||
108 | { 0x00409b00, 1 }, | ||
109 | { 0x0041000c, 1 }, | ||
110 | { 0x00410110, 1 }, | ||
111 | { 0x00410184, 1 }, | ||
112 | { 0x0041040c, 1 }, | ||
113 | { 0x00410510, 1 }, | ||
114 | { 0x00410584, 1 }, | ||
115 | { 0x00418384, 1 }, | ||
116 | { 0x004184a0, 1 }, | ||
117 | { 0x00418604, 1 }, | ||
118 | { 0x00418680, 1 }, | ||
119 | { 0x00418714, 1 }, | ||
120 | { 0x0041881c, 1 }, | ||
121 | { 0x00418884, 1 }, | ||
122 | { 0x004188b0, 1 }, | ||
123 | { 0x004188c8, 2 }, | ||
124 | { 0x00418b04, 1 }, | ||
125 | { 0x00418c04, 1 }, | ||
126 | { 0x00418c1c, 1 }, | ||
127 | { 0x00418c88, 1 }, | ||
128 | { 0x00418d00, 1 }, | ||
129 | { 0x00418e08, 1 }, | ||
130 | { 0x00418f08, 1 }, | ||
131 | { 0x00419000, 1 }, | ||
132 | { 0x0041900c, 1 }, | ||
133 | { 0x00419018, 1 }, | ||
134 | { 0x00419854, 1 }, | ||
135 | { 0x00419ab0, 1 }, | ||
136 | { 0x00419ab8, 3 }, | ||
137 | { 0x00419c0c, 1 }, | ||
138 | { 0x00419c90, 1 }, | ||
139 | { 0x00419d08, 2 }, | ||
140 | { 0x00419e00, 4 }, | ||
141 | { 0x00419e24, 2 }, | ||
142 | { 0x00419e44, 11 }, | ||
143 | { 0x00419e74, 9 }, | ||
144 | { 0x00419ea4, 1 }, | ||
145 | { 0x00419eb0, 1 }, | ||
146 | { 0x00419ef0, 26 }, | ||
147 | { 0x0041a0a0, 1 }, | ||
148 | { 0x0041a0a8, 1 }, | ||
149 | { 0x0041a17c, 1 }, | ||
150 | { 0x0041a890, 2 }, | ||
151 | { 0x0041a8a0, 3 }, | ||
152 | { 0x0041a8b0, 2 }, | ||
153 | { 0x0041b014, 1 }, | ||
154 | { 0x0041b0a0, 1 }, | ||
155 | { 0x0041b0cc, 1 }, | ||
156 | { 0x0041b0e8, 2 }, | ||
157 | { 0x0041b1dc, 1 }, | ||
158 | { 0x0041be14, 1 }, | ||
159 | { 0x0041bea0, 1 }, | ||
160 | { 0x0041becc, 1 }, | ||
161 | { 0x0041bee8, 2 }, | ||
162 | { 0x0041bfdc, 1 }, | ||
163 | { 0x0041c054, 1 }, | ||
164 | { 0x0041c2b0, 1 }, | ||
165 | { 0x0041c2b8, 3 }, | ||
166 | { 0x0041c40c, 1 }, | ||
167 | { 0x0041c490, 1 }, | ||
168 | { 0x0041c508, 2 }, | ||
169 | { 0x0041c600, 4 }, | ||
170 | { 0x0041c624, 2 }, | ||
171 | { 0x0041c644, 11 }, | ||
172 | { 0x0041c674, 9 }, | ||
173 | { 0x0041c6a4, 1 }, | ||
174 | { 0x0041c6b0, 1 }, | ||
175 | { 0x0041c6f0, 26 }, | ||
176 | { 0x0041c854, 1 }, | ||
177 | { 0x0041cab0, 1 }, | ||
178 | { 0x0041cab8, 3 }, | ||
179 | { 0x0041cc0c, 1 }, | ||
180 | { 0x0041cc90, 1 }, | ||
181 | { 0x0041cd08, 2 }, | ||
182 | { 0x0041ce00, 4 }, | ||
183 | { 0x0041ce24, 2 }, | ||
184 | { 0x0041ce44, 11 }, | ||
185 | { 0x0041ce74, 9 }, | ||
186 | { 0x0041cea4, 1 }, | ||
187 | { 0x0041ceb0, 1 }, | ||
188 | { 0x0041cef0, 26 }, | ||
189 | { 0x00500384, 1 }, | ||
190 | { 0x005004a0, 1 }, | ||
191 | { 0x00500604, 1 }, | ||
192 | { 0x00500680, 1 }, | ||
193 | { 0x00500714, 1 }, | ||
194 | { 0x0050081c, 1 }, | ||
195 | { 0x00500884, 1 }, | ||
196 | { 0x005008c8, 2 }, | ||
197 | { 0x00500b04, 1 }, | ||
198 | { 0x00500c04, 1 }, | ||
199 | { 0x00500c88, 1 }, | ||
200 | { 0x00500d00, 1 }, | ||
201 | { 0x00500e08, 1 }, | ||
202 | { 0x00500f08, 1 }, | ||
203 | { 0x00501000, 1 }, | ||
204 | { 0x0050100c, 1 }, | ||
205 | { 0x00501018, 1 }, | ||
206 | { 0x00501854, 1 }, | ||
207 | { 0x00501ab0, 1 }, | ||
208 | { 0x00501ab8, 3 }, | ||
209 | { 0x00501c0c, 1 }, | ||
210 | { 0x00501c90, 1 }, | ||
211 | { 0x00501d08, 2 }, | ||
212 | { 0x00501e00, 4 }, | ||
213 | { 0x00501e24, 2 }, | ||
214 | { 0x00501e44, 11 }, | ||
215 | { 0x00501e74, 9 }, | ||
216 | { 0x00501ea4, 1 }, | ||
217 | { 0x00501eb0, 1 }, | ||
218 | { 0x00501ef0, 26 }, | ||
219 | { 0x005020a0, 1 }, | ||
220 | { 0x005020a8, 1 }, | ||
221 | { 0x0050217c, 1 }, | ||
222 | { 0x00502890, 2 }, | ||
223 | { 0x005028a0, 3 }, | ||
224 | { 0x005028b0, 2 }, | ||
225 | { 0x00503014, 1 }, | ||
226 | { 0x005030a0, 1 }, | ||
227 | { 0x005030cc, 1 }, | ||
228 | { 0x005030e8, 2 }, | ||
229 | { 0x005031dc, 1 }, | ||
230 | { 0x00503e14, 1 }, | ||
231 | { 0x00503ea0, 1 }, | ||
232 | { 0x00503ecc, 1 }, | ||
233 | { 0x00503ee8, 2 }, | ||
234 | { 0x00503fdc, 1 }, | ||
235 | { 0x00504054, 1 }, | ||
236 | { 0x005042b0, 1 }, | ||
237 | { 0x005042b8, 3 }, | ||
238 | { 0x0050440c, 1 }, | ||
239 | { 0x00504490, 1 }, | ||
240 | { 0x00504508, 2 }, | ||
241 | { 0x00504600, 4 }, | ||
242 | { 0x00504614, 6 }, | ||
243 | { 0x00504634, 2 }, | ||
244 | { 0x00504644, 11 }, | ||
245 | { 0x00504674, 9 }, | ||
246 | { 0x005046a4, 1 }, | ||
247 | { 0x005046b0, 1 }, | ||
248 | { 0x005046f0, 28 }, | ||
249 | { 0x00504854, 1 }, | ||
250 | { 0x00504ab0, 1 }, | ||
251 | { 0x00504ab8, 3 }, | ||
252 | { 0x00504c0c, 1 }, | ||
253 | { 0x00504c90, 1 }, | ||
254 | { 0x00504d08, 2 }, | ||
255 | { 0x00504e00, 4 }, | ||
256 | { 0x00504e14, 6 }, | ||
257 | { 0x00504e34, 2 }, | ||
258 | { 0x00504e44, 11 }, | ||
259 | { 0x00504e74, 9 }, | ||
260 | { 0x00504ea4, 1 }, | ||
261 | { 0x00504eb0, 1 }, | ||
262 | { 0x00504ef0, 28 }, | ||
263 | }; | ||
264 | static const u32 gm20b_global_whitelist_ranges_count = | ||
265 | ARRAY_SIZE(gm20b_global_whitelist_ranges); | ||
266 | |||
267 | /* context */ | ||
268 | |||
269 | static const struct regop_offset_range gm20b_context_whitelist_ranges[] = { | ||
270 | { 0x0000259c, 1 }, | ||
271 | { 0x0000280c, 1 }, | ||
272 | { 0x00400500, 1 }, | ||
273 | { 0x00405b40, 1 }, | ||
274 | { 0x00418e00, 1 }, | ||
275 | { 0x00418e34, 1 }, | ||
276 | { 0x00418e40, 2 }, | ||
277 | { 0x00418e58, 2 }, | ||
278 | { 0x00419000, 1 }, | ||
279 | { 0x00419864, 1 }, | ||
280 | { 0x00419c90, 1 }, | ||
281 | { 0x00419d08, 2 }, | ||
282 | { 0x00419e04, 3 }, | ||
283 | { 0x00419e24, 2 }, | ||
284 | { 0x00419e44, 11 }, | ||
285 | { 0x00419e74, 10 }, | ||
286 | { 0x00419ea4, 1 }, | ||
287 | { 0x00419eac, 2 }, | ||
288 | { 0x00419ee8, 1 }, | ||
289 | { 0x00419ef0, 26 }, | ||
290 | { 0x0041b0e8, 2 }, | ||
291 | { 0x0041bee8, 2 }, | ||
292 | { 0x0041c490, 1 }, | ||
293 | { 0x0041c508, 2 }, | ||
294 | { 0x0041c604, 3 }, | ||
295 | { 0x0041c624, 2 }, | ||
296 | { 0x0041c644, 11 }, | ||
297 | { 0x0041c674, 10 }, | ||
298 | { 0x0041c6a4, 1 }, | ||
299 | { 0x0041c6ac, 2 }, | ||
300 | { 0x0041c6e8, 1 }, | ||
301 | { 0x0041c6f0, 26 }, | ||
302 | { 0x0041cc90, 1 }, | ||
303 | { 0x0041cd08, 2 }, | ||
304 | { 0x0041ce04, 3 }, | ||
305 | { 0x0041ce24, 2 }, | ||
306 | { 0x0041ce44, 11 }, | ||
307 | { 0x0041ce74, 10 }, | ||
308 | { 0x0041cea4, 1 }, | ||
309 | { 0x0041ceac, 2 }, | ||
310 | { 0x0041cee8, 1 }, | ||
311 | { 0x0041cef0, 26 }, | ||
312 | { 0x00501000, 1 }, | ||
313 | { 0x00501c90, 1 }, | ||
314 | { 0x00501d08, 2 }, | ||
315 | { 0x00501e04, 3 }, | ||
316 | { 0x00501e24, 2 }, | ||
317 | { 0x00501e44, 11 }, | ||
318 | { 0x00501e74, 10 }, | ||
319 | { 0x00501ea4, 1 }, | ||
320 | { 0x00501eac, 2 }, | ||
321 | { 0x00501ee8, 1 }, | ||
322 | { 0x00501ef0, 26 }, | ||
323 | { 0x005030e8, 2 }, | ||
324 | { 0x00503ee8, 2 }, | ||
325 | { 0x00504490, 1 }, | ||
326 | { 0x00504508, 2 }, | ||
327 | { 0x00504604, 3 }, | ||
328 | { 0x00504614, 6 }, | ||
329 | { 0x00504634, 2 }, | ||
330 | { 0x00504644, 11 }, | ||
331 | { 0x00504674, 10 }, | ||
332 | { 0x005046a4, 1 }, | ||
333 | { 0x005046ac, 2 }, | ||
334 | { 0x005046e8, 1 }, | ||
335 | { 0x005046f0, 28 }, | ||
336 | { 0x00504c90, 1 }, | ||
337 | { 0x00504d08, 2 }, | ||
338 | { 0x00504e04, 3 }, | ||
339 | { 0x00504e14, 6 }, | ||
340 | { 0x00504e34, 2 }, | ||
341 | { 0x00504e44, 11 }, | ||
342 | { 0x00504e74, 10 }, | ||
343 | { 0x00504ea4, 1 }, | ||
344 | { 0x00504eac, 2 }, | ||
345 | { 0x00504ee8, 1 }, | ||
346 | { 0x00504ef0, 28 }, | ||
347 | }; | ||
348 | static const u32 gm20b_context_whitelist_ranges_count = | ||
349 | ARRAY_SIZE(gm20b_context_whitelist_ranges); | ||
350 | |||
351 | /* runcontrol */ | ||
352 | static const u32 gm20b_runcontrol_whitelist[] = { | ||
353 | 0x00419e10, | ||
354 | 0x0041c610, | ||
355 | 0x0041ce10, | ||
356 | 0x00501e10, | ||
357 | 0x00504610, | ||
358 | 0x00504e10, | ||
359 | }; | ||
360 | static const u32 gm20b_runcontrol_whitelist_count = | ||
361 | ARRAY_SIZE(gm20b_runcontrol_whitelist); | ||
362 | |||
363 | static const struct regop_offset_range gm20b_runcontrol_whitelist_ranges[] = { | ||
364 | { 0x00419e10, 1 }, | ||
365 | { 0x0041c610, 1 }, | ||
366 | { 0x0041ce10, 1 }, | ||
367 | { 0x00501e10, 1 }, | ||
368 | { 0x00504610, 1 }, | ||
369 | { 0x00504e10, 1 }, | ||
370 | }; | ||
371 | static const u32 gm20b_runcontrol_whitelist_ranges_count = | ||
372 | ARRAY_SIZE(gm20b_runcontrol_whitelist_ranges); | ||
373 | |||
374 | |||
375 | /* quad ctl */ | ||
376 | static const u32 gm20b_qctl_whitelist[] = { | ||
377 | }; | ||
378 | static const u32 gm20b_qctl_whitelist_count = | ||
379 | ARRAY_SIZE(gm20b_qctl_whitelist); | ||
380 | |||
381 | static const struct regop_offset_range gm20b_qctl_whitelist_ranges[] = { | ||
382 | }; | ||
383 | static const u32 gm20b_qctl_whitelist_ranges_count = | ||
384 | ARRAY_SIZE(gm20b_qctl_whitelist_ranges); | ||
385 | |||
386 | const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void) | ||
387 | { | ||
388 | return gm20b_global_whitelist_ranges; | ||
389 | } | ||
390 | |||
391 | int gm20b_get_global_whitelist_ranges_count(void) | ||
392 | { | ||
393 | return gm20b_global_whitelist_ranges_count; | ||
394 | } | ||
395 | |||
396 | const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void) | ||
397 | { | ||
398 | return gm20b_context_whitelist_ranges; | ||
399 | } | ||
400 | |||
401 | int gm20b_get_context_whitelist_ranges_count(void) | ||
402 | { | ||
403 | return gm20b_context_whitelist_ranges_count; | ||
404 | } | ||
405 | |||
406 | const u32 *gm20b_get_runcontrol_whitelist(void) | ||
407 | { | ||
408 | return gm20b_runcontrol_whitelist; | ||
409 | } | ||
410 | |||
411 | int gm20b_get_runcontrol_whitelist_count(void) | ||
412 | { | ||
413 | return gm20b_runcontrol_whitelist_count; | ||
414 | } | ||
415 | |||
416 | const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void) | ||
417 | { | ||
418 | return gm20b_runcontrol_whitelist_ranges; | ||
419 | } | ||
420 | |||
421 | int gm20b_get_runcontrol_whitelist_ranges_count(void) | ||
422 | { | ||
423 | return gm20b_runcontrol_whitelist_ranges_count; | ||
424 | } | ||
425 | |||
426 | const u32 *gm20b_get_qctl_whitelist(void) | ||
427 | { | ||
428 | return gm20b_qctl_whitelist; | ||
429 | } | ||
430 | |||
431 | int gm20b_get_qctl_whitelist_count(void) | ||
432 | { | ||
433 | return gm20b_qctl_whitelist_count; | ||
434 | } | ||
435 | |||
436 | const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void) | ||
437 | { | ||
438 | return gm20b_qctl_whitelist_ranges; | ||
439 | } | ||
440 | |||
441 | int gm20b_get_qctl_whitelist_ranges_count(void) | ||
442 | { | ||
443 | return gm20b_qctl_whitelist_ranges_count; | ||
444 | } | ||
445 | |||
446 | int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s) | ||
447 | { | ||
448 | /* Not needed on gm20b */ | ||
449 | return 0; | ||
450 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.h b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h new file mode 100644 index 00000000..f0246e0e --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Tegra GK20A GPU Debugger Driver Register Ops | ||
4 | * | ||
5 | * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved. | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the "Software"), | ||
9 | * to deal in the Software without restriction, including without limitation | ||
10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
11 | * and/or sell copies of the Software, and to permit persons to whom the | ||
12 | * Software is furnished to do so, subject to the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice shall be included in | ||
15 | * all copies or substantial portions of the Software. | ||
16 | * | ||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
22 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
23 | * DEALINGS IN THE SOFTWARE. | ||
24 | */ | ||
25 | #ifndef __REGOPS_GM20B_H_ | ||
26 | #define __REGOPS_GM20B_H_ | ||
27 | |||
28 | struct dbg_session_gk20a; | ||
29 | |||
30 | const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void); | ||
31 | int gm20b_get_global_whitelist_ranges_count(void); | ||
32 | const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void); | ||
33 | int gm20b_get_context_whitelist_ranges_count(void); | ||
34 | const u32 *gm20b_get_runcontrol_whitelist(void); | ||
35 | int gm20b_get_runcontrol_whitelist_count(void); | ||
36 | const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void); | ||
37 | int gm20b_get_runcontrol_whitelist_ranges_count(void); | ||
38 | const u32 *gm20b_get_qctl_whitelist(void); | ||
39 | int gm20b_get_qctl_whitelist_count(void); | ||
40 | const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void); | ||
41 | int gm20b_get_qctl_whitelist_ranges_count(void); | ||
42 | int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s); | ||
43 | |||
44 | #endif /* __REGOPS_GM20B_H_ */ | ||
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.c b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c new file mode 100644 index 00000000..ce4d4fab --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * GM20B THERMAL | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | |||
27 | #include "therm_gm20b.h" | ||
28 | |||
29 | #include <nvgpu/hw/gm20b/hw_therm_gm20b.h> | ||
30 | |||
31 | int gm20b_init_therm_setup_hw(struct gk20a *g) | ||
32 | { | ||
33 | u32 v; | ||
34 | |||
35 | gk20a_dbg_fn(""); | ||
36 | |||
37 | /* program NV_THERM registers */ | ||
38 | gk20a_writel(g, therm_use_a_r(), therm_use_a_ext_therm_0_enable_f() | | ||
39 | therm_use_a_ext_therm_1_enable_f() | | ||
40 | therm_use_a_ext_therm_2_enable_f()); | ||
41 | gk20a_writel(g, therm_evt_ext_therm_0_r(), | ||
42 | therm_evt_ext_therm_0_slow_factor_f(0x2)); | ||
43 | gk20a_writel(g, therm_evt_ext_therm_1_r(), | ||
44 | therm_evt_ext_therm_1_slow_factor_f(0x6)); | ||
45 | gk20a_writel(g, therm_evt_ext_therm_2_r(), | ||
46 | therm_evt_ext_therm_2_slow_factor_f(0xe)); | ||
47 | |||
48 | gk20a_writel(g, therm_grad_stepping_table_r(0), | ||
49 | therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) | | ||
50 | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) | | ||
51 | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) | | ||
52 | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
53 | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); | ||
54 | gk20a_writel(g, therm_grad_stepping_table_r(1), | ||
55 | therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
56 | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
57 | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
58 | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | | ||
59 | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); | ||
60 | |||
61 | v = gk20a_readl(g, therm_clk_timing_r(0)); | ||
62 | v |= therm_clk_timing_grad_slowdown_enabled_f(); | ||
63 | gk20a_writel(g, therm_clk_timing_r(0), v); | ||
64 | |||
65 | v = gk20a_readl(g, therm_config2_r()); | ||
66 | v |= therm_config2_grad_enable_f(1); | ||
67 | v |= therm_config2_slowdown_factor_extended_f(1); | ||
68 | gk20a_writel(g, therm_config2_r(), v); | ||
69 | |||
70 | gk20a_writel(g, therm_grad_stepping1_r(), | ||
71 | therm_grad_stepping1_pdiv_duration_f(32)); | ||
72 | |||
73 | v = gk20a_readl(g, therm_grad_stepping0_r()); | ||
74 | v |= therm_grad_stepping0_feature_enable_f(); | ||
75 | gk20a_writel(g, therm_grad_stepping0_r(), v); | ||
76 | |||
77 | return 0; | ||
78 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.h b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h new file mode 100644 index 00000000..df0b4219 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * GM20B THERMAL | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | #ifndef THERM_GM20B_H | ||
25 | #define THERM_GM20B_H | ||
26 | |||
27 | struct gk20a; | ||
28 | int gm20b_init_therm_setup_hw(struct gk20a *g); | ||
29 | |||
30 | #endif /* THERM_GM20B_H */ | ||