summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c1444
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.h60
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c65
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.h33
-rw-r--r--drivers/gpu/nvgpu/gm20b/clk_gm20b.c1605
-rw-r--r--drivers/gpu/nvgpu/gm20b/clk_gm20b.h95
-rw-r--r--drivers/gpu/nvgpu/gm20b/fb_gm20b.c195
-rw-r--r--drivers/gpu/nvgpu/gm20b/fb_gm20b.h40
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c223
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.h39
-rw-r--r--drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c731
-rw-r--r--drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h100
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c72
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h36
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c1527
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.h137
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c708
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.h31
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c487
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.h49
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c86
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.h43
-rw-r--r--drivers/gpu/nvgpu/gm20b/pmu_gm20b.c283
-rw-r--r--drivers/gpu/nvgpu/gm20b/pmu_gm20b.h37
-rw-r--r--drivers/gpu/nvgpu/gm20b/regops_gm20b.c450
-rw-r--r--drivers/gpu/nvgpu/gm20b/regops_gm20b.h44
-rw-r--r--drivers/gpu/nvgpu/gm20b/therm_gm20b.c78
-rw-r--r--drivers/gpu/nvgpu/gm20b/therm_gm20b.h30
28 files changed, 8728 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
new file mode 100644
index 00000000..a39cdf2c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -0,0 +1,1444 @@
1/*
2 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/types.h>
24#include <nvgpu/dma.h>
25#include <nvgpu/gmmu.h>
26#include <nvgpu/timers.h>
27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/acr/nvgpu_acr.h>
31#include <nvgpu/firmware.h>
32#include <nvgpu/pmu.h>
33#include <nvgpu/falcon.h>
34#include <nvgpu/enabled.h>
35#include <nvgpu/mm.h>
36
37#include "gk20a/gk20a.h"
38#include "gk20a/pmu_gk20a.h"
39#include "mm_gm20b.h"
40#include "acr_gm20b.h"
41
42#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
43
44/*Defines*/
45#define gm20b_dbg_pmu(fmt, arg...) \
46 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
47
48typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata);
49
50/*Externs*/
51
52/*Forwards*/
53static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
54static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
55static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
56static int lsfm_discover_ucode_images(struct gk20a *g,
57 struct ls_flcn_mgr *plsfm);
58static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
59 struct flcn_ucode_img *ucode_image, u32 falcon_id);
60static void lsfm_free_ucode_img_res(struct gk20a *g,
61 struct flcn_ucode_img *p_img);
62static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g,
63 struct flcn_ucode_img *p_img);
64static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
65static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
66 struct nvgpu_mem *nonwpr);
67static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm);
68
69/*Globals*/
70static get_ucode_details pmu_acr_supp_ucode_list[] = {
71 pmu_ucode_details,
72 fecs_ucode_details,
73 gpccs_ucode_details,
74};
75
76/*Once is LS mode, cpuctl_alias is only accessible*/
77static void start_gm20b_pmu(struct gk20a *g)
78{
79 /*disable irqs for hs falcon booting as we will poll for halt*/
80 nvgpu_mutex_acquire(&g->pmu.isr_mutex);
81 pmu_enable_irq(&g->pmu, true);
82 g->pmu.isr_enabled = true;
83 nvgpu_mutex_release(&g->pmu.isr_mutex);
84 gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
85 pwr_falcon_cpuctl_startcpu_f(1));
86}
87
88void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
89{
90 g->ops.fb.read_wpr_info(g, inf);
91}
92
93bool gm20b_is_pmu_supported(struct gk20a *g)
94{
95 return true;
96}
97
98static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
99{
100 struct nvgpu_firmware *pmu_fw, *pmu_desc, *pmu_sig;
101 struct nvgpu_pmu *pmu = &g->pmu;
102 struct lsf_ucode_desc *lsf_desc;
103 int err;
104 gm20b_dbg_pmu("requesting PMU ucode in GM20B\n");
105 pmu_fw = nvgpu_request_firmware(g, GM20B_PMU_UCODE_IMAGE, 0);
106 if (!pmu_fw) {
107 nvgpu_err(g, "failed to load pmu ucode!!");
108 return -ENOENT;
109 }
110 g->acr.pmu_fw = pmu_fw;
111 gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation");
112
113 gm20b_dbg_pmu("requesting PMU ucode desc in GM20B\n");
114 pmu_desc = nvgpu_request_firmware(g, GM20B_PMU_UCODE_DESC, 0);
115 if (!pmu_desc) {
116 nvgpu_err(g, "failed to load pmu ucode desc!!");
117 err = -ENOENT;
118 goto release_img_fw;
119 }
120 pmu_sig = nvgpu_request_firmware(g, GM20B_PMU_UCODE_SIG, 0);
121 if (!pmu_sig) {
122 nvgpu_err(g, "failed to load pmu sig!!");
123 err = -ENOENT;
124 goto release_desc;
125 }
126 pmu->desc = (struct pmu_ucode_desc *)pmu_desc->data;
127 pmu->ucode_image = (u32 *)pmu_fw->data;
128 g->acr.pmu_desc = pmu_desc;
129
130 err = nvgpu_init_pmu_fw_support(pmu);
131 if (err) {
132 gm20b_dbg_pmu("failed to set function pointers\n");
133 goto release_sig;
134 }
135
136 lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
137 if (!lsf_desc) {
138 err = -ENOMEM;
139 goto release_sig;
140 }
141 memcpy(lsf_desc, (void *)pmu_sig->data, sizeof(struct lsf_ucode_desc));
142 lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
143
144 p_img->desc = pmu->desc;
145 p_img->data = pmu->ucode_image;
146 p_img->data_size = pmu->desc->image_size;
147 p_img->fw_ver = NULL;
148 p_img->header = NULL;
149 p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
150 gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
151 nvgpu_release_firmware(g, pmu_sig);
152 return 0;
153release_sig:
154 nvgpu_release_firmware(g, pmu_sig);
155release_desc:
156 nvgpu_release_firmware(g, pmu_desc);
157 g->acr.pmu_desc = NULL;
158release_img_fw:
159 nvgpu_release_firmware(g, pmu_fw);
160 g->acr.pmu_fw = NULL;
161 return err;
162}
163
164static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
165{
166 struct lsf_ucode_desc *lsf_desc;
167 struct nvgpu_firmware *fecs_sig;
168 int err;
169
170 fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0);
171 if (!fecs_sig) {
172 nvgpu_err(g, "failed to load fecs sig");
173 return -ENOENT;
174 }
175 lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
176 if (!lsf_desc) {
177 err = -ENOMEM;
178 goto rel_sig;
179 }
180 memcpy(lsf_desc, (void *)fecs_sig->data, sizeof(struct lsf_ucode_desc));
181 lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
182
183 p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
184 if (p_img->desc == NULL) {
185 err = -ENOMEM;
186 goto free_lsf_desc;
187 }
188
189 p_img->desc->bootloader_start_offset =
190 g->ctxsw_ucode_info.fecs.boot.offset;
191 p_img->desc->bootloader_size =
192 ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
193 p_img->desc->bootloader_imem_offset =
194 g->ctxsw_ucode_info.fecs.boot_imem_offset;
195 p_img->desc->bootloader_entry_point =
196 g->ctxsw_ucode_info.fecs.boot_entry;
197
198 p_img->desc->image_size =
199 ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
200 ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
201 ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
202 p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
203 ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
204 p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
205 p_img->desc->app_imem_offset = 0;
206 p_img->desc->app_imem_entry = 0;
207 p_img->desc->app_dmem_offset = 0;
208 p_img->desc->app_resident_code_offset = 0;
209 p_img->desc->app_resident_code_size =
210 g->ctxsw_ucode_info.fecs.code.size;
211 p_img->desc->app_resident_data_offset =
212 g->ctxsw_ucode_info.fecs.data.offset -
213 g->ctxsw_ucode_info.fecs.code.offset;
214 p_img->desc->app_resident_data_size =
215 g->ctxsw_ucode_info.fecs.data.size;
216 p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
217 p_img->data_size = p_img->desc->image_size;
218
219 p_img->fw_ver = NULL;
220 p_img->header = NULL;
221 p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
222 gm20b_dbg_pmu("fecs fw loaded\n");
223 nvgpu_release_firmware(g, fecs_sig);
224 return 0;
225free_lsf_desc:
226 nvgpu_kfree(g, lsf_desc);
227rel_sig:
228 nvgpu_release_firmware(g, fecs_sig);
229 return err;
230}
231static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
232{
233 struct lsf_ucode_desc *lsf_desc;
234 struct nvgpu_firmware *gpccs_sig;
235 int err;
236
237 if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS))
238 return -ENOENT;
239
240 gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, 0);
241 if (!gpccs_sig) {
242 nvgpu_err(g, "failed to load gpccs sig");
243 return -ENOENT;
244 }
245 lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
246 if (!lsf_desc) {
247 err = -ENOMEM;
248 goto rel_sig;
249 }
250 memcpy(lsf_desc, (void *)gpccs_sig->data,
251 sizeof(struct lsf_ucode_desc));
252 lsf_desc->falcon_id = LSF_FALCON_ID_GPCCS;
253
254 p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
255 if (p_img->desc == NULL) {
256 err = -ENOMEM;
257 goto free_lsf_desc;
258 }
259
260 p_img->desc->bootloader_start_offset =
261 0;
262 p_img->desc->bootloader_size =
263 ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256);
264 p_img->desc->bootloader_imem_offset =
265 g->ctxsw_ucode_info.gpccs.boot_imem_offset;
266 p_img->desc->bootloader_entry_point =
267 g->ctxsw_ucode_info.gpccs.boot_entry;
268
269 p_img->desc->image_size =
270 ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) +
271 ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) +
272 ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
273 p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256)
274 + ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
275 p_img->desc->app_start_offset = p_img->desc->bootloader_size;
276 p_img->desc->app_imem_offset = 0;
277 p_img->desc->app_imem_entry = 0;
278 p_img->desc->app_dmem_offset = 0;
279 p_img->desc->app_resident_code_offset = 0;
280 p_img->desc->app_resident_code_size =
281 ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256);
282 p_img->desc->app_resident_data_offset =
283 ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) -
284 ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256);
285 p_img->desc->app_resident_data_size =
286 ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
287 p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va +
288 g->ctxsw_ucode_info.gpccs.boot.offset);
289 p_img->data_size = ALIGN(p_img->desc->image_size, 256);
290 p_img->fw_ver = NULL;
291 p_img->header = NULL;
292 p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
293 gm20b_dbg_pmu("gpccs fw loaded\n");
294 nvgpu_release_firmware(g, gpccs_sig);
295 return 0;
296free_lsf_desc:
297 nvgpu_kfree(g, lsf_desc);
298rel_sig:
299 nvgpu_release_firmware(g, gpccs_sig);
300 return err;
301}
302
303bool gm20b_is_lazy_bootstrap(u32 falcon_id)
304{
305 bool enable_status = false;
306
307 switch (falcon_id) {
308 case LSF_FALCON_ID_FECS:
309 enable_status = false;
310 break;
311 case LSF_FALCON_ID_GPCCS:
312 enable_status = false;
313 break;
314 default:
315 break;
316 }
317
318 return enable_status;
319}
320
321bool gm20b_is_priv_load(u32 falcon_id)
322{
323 bool enable_status = false;
324
325 switch (falcon_id) {
326 case LSF_FALCON_ID_FECS:
327 enable_status = false;
328 break;
329 case LSF_FALCON_ID_GPCCS:
330 enable_status = false;
331 break;
332 default:
333 break;
334 }
335
336 return enable_status;
337}
338
339int gm20b_alloc_blob_space(struct gk20a *g,
340 size_t size, struct nvgpu_mem *mem)
341{
342 int err;
343
344 err = nvgpu_dma_alloc_sys(g, size, mem);
345
346 return err;
347}
348
349int prepare_ucode_blob(struct gk20a *g)
350{
351
352 int err;
353 struct ls_flcn_mgr lsfm_l, *plsfm;
354 struct nvgpu_pmu *pmu = &g->pmu;
355 struct wpr_carveout_info wpr_inf;
356
357 if (g->acr.ucode_blob.cpu_va) {
358 /*Recovery case, we do not need to form
359 non WPR blob of ucodes*/
360 err = nvgpu_init_pmu_fw_support(pmu);
361 if (err) {
362 gm20b_dbg_pmu("failed to set function pointers\n");
363 return err;
364 }
365 return 0;
366 }
367 plsfm = &lsfm_l;
368 memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
369 gm20b_dbg_pmu("fetching GMMU regs\n");
370 g->ops.fb.vpr_info_fetch(g);
371 gr_gk20a_init_ctxsw_ucode(g);
372
373 g->ops.pmu.get_wpr(g, &wpr_inf);
374 gm20b_dbg_pmu("wpr carveout base:%llx\n", wpr_inf.wpr_base);
375 gm20b_dbg_pmu("wpr carveout size :%llx\n", wpr_inf.size);
376
377 /* Discover all managed falcons*/
378 err = lsfm_discover_ucode_images(g, plsfm);
379 gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
380 if (err)
381 goto free_sgt;
382
383 if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
384 /* Generate WPR requirements*/
385 err = lsf_gen_wpr_requirements(g, plsfm);
386 if (err)
387 goto free_sgt;
388
389 /*Alloc memory to hold ucode blob contents*/
390 err = g->ops.pmu.alloc_blob_space(g, plsfm->wpr_size
391 , &g->acr.ucode_blob);
392 if (err)
393 goto free_sgt;
394
395 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
396 plsfm->managed_flcn_cnt, plsfm->wpr_size);
397 lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
398 } else {
399 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
400 }
401 gm20b_dbg_pmu("prepare ucode blob return 0\n");
402 free_acr_resources(g, plsfm);
403free_sgt:
404 return err;
405}
406
407static u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
408 u32 falcon_id)
409{
410 return (plsfm->disable_mask >> falcon_id) & 0x1;
411}
412
413/* Discover all managed falcon ucode images */
414static int lsfm_discover_ucode_images(struct gk20a *g,
415 struct ls_flcn_mgr *plsfm)
416{
417 struct nvgpu_pmu *pmu = &g->pmu;
418 struct flcn_ucode_img ucode_img;
419 u32 falcon_id;
420 u32 i;
421 int status;
422
423 /* LSFM requires a secure PMU, discover it first.*/
424 /* Obtain the PMU ucode image and add it to the list if required*/
425 memset(&ucode_img, 0, sizeof(ucode_img));
426 status = pmu_ucode_details(g, &ucode_img);
427 if (status)
428 return status;
429
430 /* The falon_id is formed by grabbing the static base
431 * falon_id from the image and adding the
432 * engine-designated falcon instance.*/
433 pmu->pmu_mode |= PMU_SECURE_MODE;
434 falcon_id = ucode_img.lsf_desc->falcon_id +
435 ucode_img.flcn_inst;
436
437 if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
438 pmu->falcon_id = falcon_id;
439 if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
440 pmu->falcon_id) == 0)
441 pmu->pmu_mode |= PMU_LSFM_MANAGED;
442
443 plsfm->managed_flcn_cnt++;
444 } else {
445 gm20b_dbg_pmu("id not managed %d\n",
446 ucode_img.lsf_desc->falcon_id);
447 }
448
449 /*Free any ucode image resources if not managing this falcon*/
450 if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
451 gm20b_dbg_pmu("pmu is not LSFM managed\n");
452 lsfm_free_ucode_img_res(g, &ucode_img);
453 }
454
455 /* Enumerate all constructed falcon objects,
456 as we need the ucode image info and total falcon count.*/
457
458 /*0th index is always PMU which is already handled in earlier
459 if condition*/
460 for (i = 1; i < (MAX_SUPPORTED_LSFM); i++) {
461 memset(&ucode_img, 0, sizeof(ucode_img));
462 if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
463 if (ucode_img.lsf_desc != NULL) {
464 /* We have engine sigs, ensure that this falcon
465 is aware of the secure mode expectations
466 (ACR status)*/
467
468 /* falon_id is formed by grabbing the static
469 base falonId from the image and adding the
470 engine-designated falcon instance. */
471 falcon_id = ucode_img.lsf_desc->falcon_id +
472 ucode_img.flcn_inst;
473
474 if (!lsfm_falcon_disabled(g, plsfm,
475 falcon_id)) {
476 /* Do not manage non-FB ucode*/
477 if (lsfm_add_ucode_img(g,
478 plsfm, &ucode_img, falcon_id)
479 == 0)
480 plsfm->managed_flcn_cnt++;
481 } else {
482 gm20b_dbg_pmu("not managed %d\n",
483 ucode_img.lsf_desc->falcon_id);
484 lsfm_free_nonpmu_ucode_img_res(g,
485 &ucode_img);
486 }
487 }
488 } else {
489 /* Consumed all available falcon objects */
490 gm20b_dbg_pmu("Done checking for ucodes %d\n", i);
491 break;
492 }
493 }
494 return 0;
495}
496
497
498int gm20b_pmu_populate_loader_cfg(struct gk20a *g,
499 void *lsfm, u32 *p_bl_gen_desc_size)
500{
501 struct wpr_carveout_info wpr_inf;
502 struct nvgpu_pmu *pmu = &g->pmu;
503 struct lsfm_managed_ucode_img *p_lsfm =
504 (struct lsfm_managed_ucode_img *)lsfm;
505 struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
506 struct loader_config *ldr_cfg = &(p_lsfm->bl_gen_desc.loader_cfg);
507 u64 addr_base;
508 struct pmu_ucode_desc *desc;
509 u64 addr_code, addr_data;
510 u32 addr_args;
511
512 if (p_img->desc == NULL) /*This means its a header based ucode,
513 and so we do not fill BL gen desc structure*/
514 return -EINVAL;
515 desc = p_img->desc;
516 /*
517 Calculate physical and virtual addresses for various portions of
518 the PMU ucode image
519 Calculate the 32-bit addresses for the application code, application
520 data, and bootloader code. These values are all based on IM_BASE.
521 The 32-bit addresses will be the upper 32-bits of the virtual or
522 physical addresses of each respective segment.
523 */
524 addr_base = p_lsfm->lsb_header.ucode_off;
525 g->ops.pmu.get_wpr(g, &wpr_inf);
526 addr_base += wpr_inf.wpr_base;
527 gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
528 /*From linux*/
529 addr_code = u64_lo32((addr_base +
530 desc->app_start_offset +
531 desc->app_resident_code_offset) >> 8);
532 gm20b_dbg_pmu("app start %d app res code off %d\n",
533 desc->app_start_offset, desc->app_resident_code_offset);
534 addr_data = u64_lo32((addr_base +
535 desc->app_start_offset +
536 desc->app_resident_data_offset) >> 8);
537 gm20b_dbg_pmu("app res data offset%d\n",
538 desc->app_resident_data_offset);
539 gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
540
541 addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
542 gk20a_readl(g, pwr_falcon_hwcfg_r())))
543 << GK20A_PMU_DMEM_BLKSIZE2);
544 addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
545
546 gm20b_dbg_pmu("addr_args %x\n", addr_args);
547
548 /* Populate the loader_config state*/
549 ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE;
550 ldr_cfg->code_dma_base = addr_code;
551 ldr_cfg->code_dma_base1 = 0x0;
552 ldr_cfg->code_size_total = desc->app_size;
553 ldr_cfg->code_size_to_load = desc->app_resident_code_size;
554 ldr_cfg->code_entry_point = desc->app_imem_entry;
555 ldr_cfg->data_dma_base = addr_data;
556 ldr_cfg->data_dma_base1 = 0;
557 ldr_cfg->data_size = desc->app_resident_data_size;
558 ldr_cfg->overlay_dma_base = addr_code;
559 ldr_cfg->overlay_dma_base1 = 0x0;
560
561 /* Update the argc/argv members*/
562 ldr_cfg->argc = 1;
563 ldr_cfg->argv = addr_args;
564
565 *p_bl_gen_desc_size = sizeof(struct loader_config);
566 g->acr.pmu_args = addr_args;
567 return 0;
568}
569
570int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g,
571 void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid)
572{
573 struct wpr_carveout_info wpr_inf;
574 struct lsfm_managed_ucode_img *p_lsfm =
575 (struct lsfm_managed_ucode_img *)lsfm;
576 struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
577 struct flcn_bl_dmem_desc *ldr_cfg =
578 &(p_lsfm->bl_gen_desc.bl_dmem_desc);
579 u64 addr_base;
580 struct pmu_ucode_desc *desc;
581 u64 addr_code, addr_data;
582
583 if (p_img->desc == NULL) /*This means its a header based ucode,
584 and so we do not fill BL gen desc structure*/
585 return -EINVAL;
586 desc = p_img->desc;
587
588 /*
589 Calculate physical and virtual addresses for various portions of
590 the PMU ucode image
591 Calculate the 32-bit addresses for the application code, application
592 data, and bootloader code. These values are all based on IM_BASE.
593 The 32-bit addresses will be the upper 32-bits of the virtual or
594 physical addresses of each respective segment.
595 */
596 addr_base = p_lsfm->lsb_header.ucode_off;
597 g->ops.pmu.get_wpr(g, &wpr_inf);
598 addr_base += wpr_inf.wpr_base;
599
600 gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
601 p_lsfm->wpr_header.falcon_id);
602 addr_code = u64_lo32((addr_base +
603 desc->app_start_offset +
604 desc->app_resident_code_offset) >> 8);
605 addr_data = u64_lo32((addr_base +
606 desc->app_start_offset +
607 desc->app_resident_data_offset) >> 8);
608
609 gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
610 (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
611 p_lsfm->wpr_header.falcon_id);
612
613 /* Populate the LOADER_CONFIG state */
614 memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc));
615 ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
616 ldr_cfg->code_dma_base = addr_code;
617 ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
618 ldr_cfg->data_dma_base = addr_data;
619 ldr_cfg->data_size = desc->app_resident_data_size;
620 ldr_cfg->code_entry_point = desc->app_imem_entry;
621 *p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc);
622 return 0;
623}
624
625/* Populate falcon boot loader generic desc.*/
626static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
627 struct lsfm_managed_ucode_img *pnode)
628{
629
630 struct nvgpu_pmu *pmu = &g->pmu;
631 if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
632 gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
633 g->ops.pmu.flcn_populate_bl_dmem_desc(g,
634 pnode, &pnode->bl_gen_desc_size,
635 pnode->wpr_header.falcon_id);
636 return 0;
637 }
638
639 if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
640 gm20b_dbg_pmu("pmu write flcn bl gen desc\n");
641 if (pnode->wpr_header.falcon_id == pmu->falcon_id)
642 return g->ops.pmu.pmu_populate_loader_cfg(g, pnode,
643 &pnode->bl_gen_desc_size);
644 }
645
646 /* Failed to find the falcon requested. */
647 return -ENOENT;
648}
649
650/* Initialize WPR contents */
651static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
652 struct nvgpu_mem *ucode)
653{
654 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
655 struct lsf_wpr_header last_wpr_hdr;
656 u32 i;
657
658 /* The WPR array is at the base of the WPR */
659 pnode = plsfm->ucode_img_list;
660 memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header));
661 i = 0;
662
663 /*
664 * Walk the managed falcons, flush WPR and LSB headers to FB.
665 * flush any bl args to the storage area relative to the
666 * ucode image (appended on the end as a DMEM area).
667 */
668 while (pnode) {
669 /* Flush WPR header to memory*/
670 nvgpu_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
671 &pnode->wpr_header, sizeof(pnode->wpr_header));
672
673 gm20b_dbg_pmu("wpr header");
674 gm20b_dbg_pmu("falconid :%d",
675 pnode->wpr_header.falcon_id);
676 gm20b_dbg_pmu("lsb_offset :%x",
677 pnode->wpr_header.lsb_offset);
678 gm20b_dbg_pmu("bootstrap_owner :%d",
679 pnode->wpr_header.bootstrap_owner);
680 gm20b_dbg_pmu("lazy_bootstrap :%d",
681 pnode->wpr_header.lazy_bootstrap);
682 gm20b_dbg_pmu("status :%d",
683 pnode->wpr_header.status);
684
685 /*Flush LSB header to memory*/
686 nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
687 &pnode->lsb_header, sizeof(pnode->lsb_header));
688
689 gm20b_dbg_pmu("lsb header");
690 gm20b_dbg_pmu("ucode_off :%x",
691 pnode->lsb_header.ucode_off);
692 gm20b_dbg_pmu("ucode_size :%x",
693 pnode->lsb_header.ucode_size);
694 gm20b_dbg_pmu("data_size :%x",
695 pnode->lsb_header.data_size);
696 gm20b_dbg_pmu("bl_code_size :%x",
697 pnode->lsb_header.bl_code_size);
698 gm20b_dbg_pmu("bl_imem_off :%x",
699 pnode->lsb_header.bl_imem_off);
700 gm20b_dbg_pmu("bl_data_off :%x",
701 pnode->lsb_header.bl_data_off);
702 gm20b_dbg_pmu("bl_data_size :%x",
703 pnode->lsb_header.bl_data_size);
704 gm20b_dbg_pmu("app_code_off :%x",
705 pnode->lsb_header.app_code_off);
706 gm20b_dbg_pmu("app_code_size :%x",
707 pnode->lsb_header.app_code_size);
708 gm20b_dbg_pmu("app_data_off :%x",
709 pnode->lsb_header.app_data_off);
710 gm20b_dbg_pmu("app_data_size :%x",
711 pnode->lsb_header.app_data_size);
712 gm20b_dbg_pmu("flags :%x",
713 pnode->lsb_header.flags);
714
715 /*If this falcon has a boot loader and related args,
716 * flush them.*/
717 if (!pnode->ucode_img.header) {
718 /*Populate gen bl and flush to memory*/
719 lsfm_fill_flcn_bl_gen_desc(g, pnode);
720 nvgpu_mem_wr_n(g, ucode,
721 pnode->lsb_header.bl_data_off,
722 &pnode->bl_gen_desc,
723 pnode->bl_gen_desc_size);
724 }
725 /*Copying of ucode*/
726 nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
727 pnode->ucode_img.data,
728 pnode->ucode_img.data_size);
729 pnode = pnode->next;
730 i++;
731 }
732
733 /* Tag the terminator WPR header with an invalid falcon ID. */
734 last_wpr_hdr.falcon_id = LSF_FALCON_ID_INVALID;
735 nvgpu_mem_wr_n(g, ucode,
736 plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header),
737 &last_wpr_hdr,
738 sizeof(struct lsf_wpr_header));
739}
740
741/*!
742 * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
743 *
744 * @param[in] p_ucodehdr : UCODE header
745 * @param[out] lsb_hdr : updates values in LSB header
746 *
747 * @return 0
748 */
749static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
750 struct lsf_lsb_header *lsb_hdr)
751{
752
753 u32 code_size = 0;
754 u32 data_size = 0;
755 u32 i = 0;
756 u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
757
758 /* Lets calculate code size*/
759 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
760 for (i = 0; i < total_apps; i++) {
761 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
762 (total_apps, i)];
763 }
764 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
765
766 /* Calculate data size*/
767 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
768 for (i = 0; i < total_apps; i++) {
769 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
770 (total_apps, i)];
771 }
772
773 lsb_hdr->ucode_size = code_size;
774 lsb_hdr->data_size = data_size;
775 lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
776 lsb_hdr->bl_imem_off = 0;
777 lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
778 lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
779 return 0;
780}
781
782/*!
783 * @brief lsfm_fill_static_lsb_hdr_info
784 * Populate static LSB header infomation using the provided ucode image
785 */
786static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
787 u32 falcon_id, struct lsfm_managed_ucode_img *pnode)
788{
789
790 struct nvgpu_pmu *pmu = &g->pmu;
791 u32 full_app_size = 0;
792 u32 data = 0;
793
794 if (pnode->ucode_img.lsf_desc)
795 memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
796 sizeof(struct lsf_ucode_desc));
797 pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
798
799 /* The remainder of the LSB depends on the loader usage */
800 if (pnode->ucode_img.header) {
801 /* Does not use a loader */
802 pnode->lsb_header.data_size = 0;
803 pnode->lsb_header.bl_code_size = 0;
804 pnode->lsb_header.bl_data_off = 0;
805 pnode->lsb_header.bl_data_size = 0;
806
807 lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
808 &(pnode->lsb_header));
809
810 /* Load the first 256 bytes of IMEM. */
811 /* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
812 True for all method based falcons */
813 data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
814 NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
815 pnode->lsb_header.flags = data;
816 } else {
817 /* Uses a loader. that is has a desc */
818 pnode->lsb_header.data_size = 0;
819
820 /* The loader code size is already aligned (padded) such that
821 the code following it is aligned, but the size in the image
822 desc is not, bloat it up to be on a 256 byte alignment. */
823 pnode->lsb_header.bl_code_size = ALIGN(
824 pnode->ucode_img.desc->bootloader_size,
825 LSF_BL_CODE_SIZE_ALIGNMENT);
826 full_app_size = ALIGN(pnode->ucode_img.desc->app_size,
827 LSF_BL_CODE_SIZE_ALIGNMENT) +
828 pnode->lsb_header.bl_code_size;
829 pnode->lsb_header.ucode_size = ALIGN(
830 pnode->ucode_img.desc->app_resident_data_offset,
831 LSF_BL_CODE_SIZE_ALIGNMENT) +
832 pnode->lsb_header.bl_code_size;
833 pnode->lsb_header.data_size = full_app_size -
834 pnode->lsb_header.ucode_size;
835 /* Though the BL is located at 0th offset of the image, the VA
836 is different to make sure that it doesnt collide the actual OS
837 VA range */
838 pnode->lsb_header.bl_imem_off =
839 pnode->ucode_img.desc->bootloader_imem_offset;
840
841 /* TODO: OBJFLCN should export properties using which the below
842 flags should be populated.*/
843 pnode->lsb_header.flags = 0;
844
845 if (falcon_id == pmu->falcon_id) {
846 data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
847 pnode->lsb_header.flags = data;
848 }
849
850 if (g->ops.pmu.is_priv_load(falcon_id)) {
851 pnode->lsb_header.flags |=
852 NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
853 }
854 }
855}
856
857/* Adds a ucode image to the list of managed ucode images managed. */
858static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
859 struct flcn_ucode_img *ucode_image, u32 falcon_id)
860{
861
862 struct lsfm_managed_ucode_img *pnode;
863 pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img));
864 if (pnode == NULL)
865 return -ENOMEM;
866
867 /* Keep a copy of the ucode image info locally */
868 memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img));
869
870 /* Fill in static WPR header info*/
871 pnode->wpr_header.falcon_id = falcon_id;
872 pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
873 pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
874
875 pnode->wpr_header.lazy_bootstrap =
876 g->ops.pmu.is_lazy_bootstrap(falcon_id);
877
878 /*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
879 Android */
880 /* Fill in static LSB header info elsewhere */
881 lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
882 pnode->next = plsfm->ucode_img_list;
883 plsfm->ucode_img_list = pnode;
884 return 0;
885}
886
887/* Free any ucode image structure resources. */
888static void lsfm_free_ucode_img_res(struct gk20a *g,
889 struct flcn_ucode_img *p_img)
890{
891 if (p_img->lsf_desc != NULL) {
892 nvgpu_kfree(g, p_img->lsf_desc);
893 p_img->lsf_desc = NULL;
894 }
895}
896
897/* Free any ucode image structure resources. */
898static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g,
899 struct flcn_ucode_img *p_img)
900{
901 if (p_img->lsf_desc != NULL) {
902 nvgpu_kfree(g, p_img->lsf_desc);
903 p_img->lsf_desc = NULL;
904 }
905 if (p_img->desc != NULL) {
906 nvgpu_kfree(g, p_img->desc);
907 p_img->desc = NULL;
908 }
909}
910
911static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm)
912{
913 u32 cnt = plsfm->managed_flcn_cnt;
914 struct lsfm_managed_ucode_img *mg_ucode_img;
915 while (cnt) {
916 mg_ucode_img = plsfm->ucode_img_list;
917 if (mg_ucode_img->ucode_img.lsf_desc->falcon_id ==
918 LSF_FALCON_ID_PMU)
919 lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img);
920 else
921 lsfm_free_nonpmu_ucode_img_res(g,
922 &mg_ucode_img->ucode_img);
923 plsfm->ucode_img_list = mg_ucode_img->next;
924 nvgpu_kfree(g, mg_ucode_img);
925 cnt--;
926 }
927}
928
929/* Generate WPR requirements for ACR allocation request */
930static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
931{
932 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
933 u32 wpr_offset;
934
935 /* Calculate WPR size required */
936
937 /* Start with an array of WPR headers at the base of the WPR.
938 The expectation here is that the secure falcon will do a single DMA
939 read of this array and cache it internally so it's OK to pack these.
940 Also, we add 1 to the falcon count to indicate the end of the array.*/
941 wpr_offset = sizeof(struct lsf_wpr_header) *
942 (plsfm->managed_flcn_cnt+1);
943
944 /* Walk the managed falcons, accounting for the LSB structs
945 as well as the ucode images. */
946 while (pnode) {
947 /* Align, save off, and include an LSB header size */
948 wpr_offset = ALIGN(wpr_offset,
949 LSF_LSB_HEADER_ALIGNMENT);
950 pnode->wpr_header.lsb_offset = wpr_offset;
951 wpr_offset += sizeof(struct lsf_lsb_header);
952
953 /* Align, save off, and include the original (static)
954 ucode image size */
955 wpr_offset = ALIGN(wpr_offset,
956 LSF_UCODE_DATA_ALIGNMENT);
957 pnode->lsb_header.ucode_off = wpr_offset;
958 wpr_offset += pnode->ucode_img.data_size;
959
960 /* For falcons that use a boot loader (BL), we append a loader
961 desc structure on the end of the ucode image and consider this
962 the boot loader data. The host will then copy the loader desc
963 args to this space within the WPR region (before locking down)
964 and the HS bin will then copy them to DMEM 0 for the loader. */
965 if (!pnode->ucode_img.header) {
966 /* Track the size for LSB details filled in later
967 Note that at this point we don't know what kind of i
968 boot loader desc, so we just take the size of the
969 generic one, which is the largest it will will ever be.
970 */
971 /* Align (size bloat) and save off generic
972 descriptor size*/
973 pnode->lsb_header.bl_data_size = ALIGN(
974 sizeof(pnode->bl_gen_desc),
975 LSF_BL_DATA_SIZE_ALIGNMENT);
976
977 /*Align, save off, and include the additional BL data*/
978 wpr_offset = ALIGN(wpr_offset,
979 LSF_BL_DATA_ALIGNMENT);
980 pnode->lsb_header.bl_data_off = wpr_offset;
981 wpr_offset += pnode->lsb_header.bl_data_size;
982 } else {
983 /* bl_data_off is already assigned in static
984 information. But that is from start of the image */
985 pnode->lsb_header.bl_data_off +=
986 (wpr_offset - pnode->ucode_img.data_size);
987 }
988
989 /* Finally, update ucode surface size to include updates */
990 pnode->full_ucode_size = wpr_offset -
991 pnode->lsb_header.ucode_off;
992 if (pnode->wpr_header.falcon_id != LSF_FALCON_ID_PMU) {
993 pnode->lsb_header.app_code_off =
994 pnode->lsb_header.bl_code_size;
995 pnode->lsb_header.app_code_size =
996 pnode->lsb_header.ucode_size -
997 pnode->lsb_header.bl_code_size;
998 pnode->lsb_header.app_data_off =
999 pnode->lsb_header.ucode_size;
1000 pnode->lsb_header.app_data_size =
1001 pnode->lsb_header.data_size;
1002 }
1003 pnode = pnode->next;
1004 }
1005 plsfm->wpr_size = wpr_offset;
1006 return 0;
1007}
1008
1009/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
1010 * start and end are addresses of ucode blob in non-WPR region*/
1011int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1012{
1013 struct mm_gk20a *mm = &g->mm;
1014 struct vm_gk20a *vm = mm->pmu.vm;
1015 int err = 0;
1016 u64 *acr_dmem;
1017 u32 img_size_in_bytes = 0;
1018 u32 status, size;
1019 u64 start;
1020 struct acr_desc *acr = &g->acr;
1021 struct nvgpu_firmware *acr_fw = acr->acr_fw;
1022 struct flcn_bl_dmem_desc *bl_dmem_desc = &acr->bl_dmem_desc;
1023 u32 *acr_ucode_header_t210_load;
1024 u32 *acr_ucode_data_t210_load;
1025
1026 start = nvgpu_mem_get_addr(g, &acr->ucode_blob);
1027 size = acr->ucode_blob.size;
1028
1029 gm20b_dbg_pmu("");
1030
1031 if (!acr_fw) {
1032 /*First time init case*/
1033 acr_fw = nvgpu_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE, 0);
1034 if (!acr_fw) {
1035 nvgpu_err(g, "pmu ucode get fail");
1036 return -ENOENT;
1037 }
1038 acr->acr_fw = acr_fw;
1039 acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
1040 acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
1041 acr->hsbin_hdr->header_offset);
1042 acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
1043 acr->hsbin_hdr->data_offset);
1044 acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
1045 acr->fw_hdr->hdr_offset);
1046 img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
1047
1048 /* Lets patch the signatures first.. */
1049 if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
1050 (u32 *)(acr_fw->data +
1051 acr->fw_hdr->sig_prod_offset),
1052 (u32 *)(acr_fw->data +
1053 acr->fw_hdr->sig_dbg_offset),
1054 (u32 *)(acr_fw->data +
1055 acr->fw_hdr->patch_loc),
1056 (u32 *)(acr_fw->data +
1057 acr->fw_hdr->patch_sig)) < 0) {
1058 nvgpu_err(g, "patch signatures fail");
1059 err = -1;
1060 goto err_release_acr_fw;
1061 }
1062 err = nvgpu_dma_alloc_map_sys(vm, img_size_in_bytes,
1063 &acr->acr_ucode);
1064 if (err) {
1065 err = -ENOMEM;
1066 goto err_release_acr_fw;
1067 }
1068
1069 acr_dmem = (u64 *)
1070 &(((u8 *)acr_ucode_data_t210_load)[
1071 acr_ucode_header_t210_load[2]]);
1072 acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)(
1073 acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]);
1074 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
1075 start;
1076 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
1077 size;
1078 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
1079 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
1080
1081 nvgpu_mem_wr_n(g, &acr->acr_ucode, 0,
1082 acr_ucode_data_t210_load, img_size_in_bytes);
1083 /*
1084 * In order to execute this binary, we will be using
1085 * a bootloader which will load this image into PMU IMEM/DMEM.
1086 * Fill up the bootloader descriptor for PMU HAL to use..
1087 * TODO: Use standard descriptor which the generic bootloader is
1088 * checked in.
1089 */
1090
1091 bl_dmem_desc->signature[0] = 0;
1092 bl_dmem_desc->signature[1] = 0;
1093 bl_dmem_desc->signature[2] = 0;
1094 bl_dmem_desc->signature[3] = 0;
1095 bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
1096 bl_dmem_desc->code_dma_base =
1097 (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8));
1098 bl_dmem_desc->code_dma_base1 = 0x0;
1099 bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0];
1100 bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1];
1101 bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5];
1102 bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6];
1103 bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */
1104 bl_dmem_desc->data_dma_base =
1105 bl_dmem_desc->code_dma_base +
1106 ((acr_ucode_header_t210_load[2]) >> 8);
1107 bl_dmem_desc->data_dma_base1 = 0x0;
1108 bl_dmem_desc->data_size = acr_ucode_header_t210_load[3];
1109 } else
1110 acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0;
1111 status = pmu_exec_gen_bl(g, bl_dmem_desc, 1);
1112 if (status != 0) {
1113 err = status;
1114 goto err_free_ucode_map;
1115 }
1116 return 0;
1117err_free_ucode_map:
1118 nvgpu_dma_unmap_free(vm, &acr->acr_ucode);
1119err_release_acr_fw:
1120 nvgpu_release_firmware(g, acr_fw);
1121 acr->acr_fw = NULL;
1122 return err;
1123}
1124
1125static u8 pmu_is_debug_mode_en(struct gk20a *g)
1126{
1127 u32 ctl_stat = gk20a_readl(g, pwr_pmu_scpctl_stat_r());
1128 return pwr_pmu_scpctl_stat_debug_mode_v(ctl_stat);
1129}
1130
1131/*
1132 * @brief Patch signatures into ucode image
1133 */
1134int acr_ucode_patch_sig(struct gk20a *g,
1135 unsigned int *p_img,
1136 unsigned int *p_prod_sig,
1137 unsigned int *p_dbg_sig,
1138 unsigned int *p_patch_loc,
1139 unsigned int *p_patch_ind)
1140{
1141 unsigned int i, *p_sig;
1142 gm20b_dbg_pmu("");
1143
1144 if (!pmu_is_debug_mode_en(g)) {
1145 p_sig = p_prod_sig;
1146 gm20b_dbg_pmu("PRODUCTION MODE\n");
1147 } else {
1148 p_sig = p_dbg_sig;
1149 gm20b_dbg_pmu("DEBUG MODE\n");
1150 }
1151
1152 /* Patching logic:*/
1153 for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) {
1154 p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)];
1155 p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1];
1156 p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2];
1157 p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3];
1158 }
1159 return 0;
1160}
1161
1162static int bl_bootstrap(struct nvgpu_pmu *pmu,
1163 struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz)
1164{
1165 struct gk20a *g = gk20a_from_pmu(pmu);
1166 struct acr_desc *acr = &g->acr;
1167 struct mm_gk20a *mm = &g->mm;
1168 u32 virt_addr = 0;
1169 struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
1170 u32 dst;
1171
1172 gk20a_dbg_fn("");
1173 gk20a_writel(g, pwr_falcon_itfen_r(),
1174 gk20a_readl(g, pwr_falcon_itfen_r()) |
1175 pwr_falcon_itfen_ctxen_enable_f());
1176 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1177 pwr_pmu_new_instblk_ptr_f(
1178 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
1179 pwr_pmu_new_instblk_valid_f(1) |
1180 pwr_pmu_new_instblk_target_sys_coh_f());
1181
1182 /*copy bootloader interface structure to dmem*/
1183 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
1184 sizeof(struct flcn_bl_dmem_desc), 0);
1185
1186 /* copy bootloader to TOP of IMEM */
1187 dst = (pwr_falcon_hwcfg_imem_size_v(
1188 gk20a_readl(g, pwr_falcon_hwcfg_r())) << 8) - bl_sz;
1189
1190 nvgpu_flcn_copy_to_imem(pmu->flcn, dst,
1191 (u8 *)(acr->hsbl_ucode.cpu_va), bl_sz, 0, 0,
1192 pmu_bl_gm10x_desc->bl_start_tag);
1193
1194 gm20b_dbg_pmu("Before starting falcon with BL\n");
1195
1196 virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
1197
1198 nvgpu_flcn_bootstrap(pmu->flcn, virt_addr);
1199
1200 return 0;
1201}
1202
1203int gm20b_init_nspmu_setup_hw1(struct gk20a *g)
1204{
1205 struct nvgpu_pmu *pmu = &g->pmu;
1206 int err = 0;
1207
1208 gk20a_dbg_fn("");
1209
1210 nvgpu_mutex_acquire(&pmu->isr_mutex);
1211 nvgpu_flcn_reset(pmu->flcn);
1212 pmu->isr_enabled = true;
1213 nvgpu_mutex_release(&pmu->isr_mutex);
1214
1215 /* setup apertures - virtual */
1216 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1217 pwr_fbif_transcfg_mem_type_virtual_f());
1218 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1219 pwr_fbif_transcfg_mem_type_virtual_f());
1220 /* setup apertures - physical */
1221 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1222 pwr_fbif_transcfg_mem_type_physical_f() |
1223 pwr_fbif_transcfg_target_local_fb_f());
1224 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1225 pwr_fbif_transcfg_mem_type_physical_f() |
1226 pwr_fbif_transcfg_target_coherent_sysmem_f());
1227 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1228 pwr_fbif_transcfg_mem_type_physical_f() |
1229 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1230
1231 err = g->ops.pmu.pmu_nsbootstrap(pmu);
1232
1233 return err;
1234}
1235
1236int gm20b_init_pmu_setup_hw1(struct gk20a *g,
1237 void *desc, u32 bl_sz)
1238{
1239
1240 struct nvgpu_pmu *pmu = &g->pmu;
1241 int err;
1242
1243 gk20a_dbg_fn("");
1244
1245 nvgpu_mutex_acquire(&pmu->isr_mutex);
1246 nvgpu_flcn_reset(pmu->flcn);
1247 pmu->isr_enabled = true;
1248 nvgpu_mutex_release(&pmu->isr_mutex);
1249
1250 /* setup apertures - virtual */
1251 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1252 pwr_fbif_transcfg_mem_type_physical_f() |
1253 pwr_fbif_transcfg_target_local_fb_f());
1254 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1255 pwr_fbif_transcfg_mem_type_virtual_f());
1256 /* setup apertures - physical */
1257 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1258 pwr_fbif_transcfg_mem_type_physical_f() |
1259 pwr_fbif_transcfg_target_local_fb_f());
1260 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1261 pwr_fbif_transcfg_mem_type_physical_f() |
1262 pwr_fbif_transcfg_target_coherent_sysmem_f());
1263 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1264 pwr_fbif_transcfg_mem_type_physical_f() |
1265 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1266
1267 /*Copying pmu cmdline args*/
1268 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
1269 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
1270 g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
1271 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
1272 pmu, GK20A_PMU_TRACE_BUFSIZE);
1273 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
1274 g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
1275 pmu, GK20A_PMU_DMAIDX_VIRT);
1276 nvgpu_flcn_copy_to_dmem(pmu->flcn, g->acr.pmu_args,
1277 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
1278 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
1279 /*disable irqs for hs falcon booting as we will poll for halt*/
1280 nvgpu_mutex_acquire(&pmu->isr_mutex);
1281 pmu_enable_irq(pmu, false);
1282 pmu->isr_enabled = false;
1283 nvgpu_mutex_release(&pmu->isr_mutex);
1284 /*Clearing mailbox register used to reflect capabilities*/
1285 gk20a_writel(g, pwr_falcon_mailbox1_r(), 0);
1286 err = bl_bootstrap(pmu, desc, bl_sz);
1287 if (err)
1288 return err;
1289 return 0;
1290}
1291
1292/*
1293* Executes a generic bootloader and wait for PMU to halt.
1294* This BL will be used for those binaries that are loaded
1295* and executed at times other than RM PMU Binary execution.
1296*
1297* @param[in] g gk20a pointer
1298* @param[in] desc Bootloader descriptor
1299* @param[in] dma_idx DMA Index
1300* @param[in] b_wait_for_halt Wait for PMU to HALT
1301*/
1302int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1303{
1304 struct mm_gk20a *mm = &g->mm;
1305 struct vm_gk20a *vm = mm->pmu.vm;
1306 int err = 0;
1307 u32 bl_sz;
1308 struct acr_desc *acr = &g->acr;
1309 struct nvgpu_firmware *hsbl_fw = acr->hsbl_fw;
1310 struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
1311 u32 *pmu_bl_gm10x = NULL;
1312 gm20b_dbg_pmu("");
1313
1314 if (!hsbl_fw) {
1315 hsbl_fw = nvgpu_request_firmware(g,
1316 GM20B_HSBIN_PMU_BL_UCODE_IMAGE, 0);
1317 if (!hsbl_fw) {
1318 nvgpu_err(g, "pmu ucode load fail");
1319 return -ENOENT;
1320 }
1321 acr->hsbl_fw = hsbl_fw;
1322 acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data;
1323 acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data +
1324 acr->bl_bin_hdr->header_offset);
1325 pmu_bl_gm10x_desc = acr->pmu_hsbl_desc;
1326 pmu_bl_gm10x = (u32 *)(hsbl_fw->data +
1327 acr->bl_bin_hdr->data_offset);
1328 bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size,
1329 256);
1330 acr->hsbl_ucode.size = bl_sz;
1331 gm20b_dbg_pmu("Executing Generic Bootloader\n");
1332
1333 /*TODO in code verify that enable PMU is done,
1334 scrubbing etc is done*/
1335 /*TODO in code verify that gmmu vm init is done*/
1336 err = nvgpu_dma_alloc_flags_sys(g,
1337 NVGPU_DMA_READ_ONLY, bl_sz, &acr->hsbl_ucode);
1338 if (err) {
1339 nvgpu_err(g, "failed to allocate memory");
1340 goto err_done;
1341 }
1342
1343 acr->hsbl_ucode.gpu_va = nvgpu_gmmu_map(vm,
1344 &acr->hsbl_ucode,
1345 bl_sz,
1346 0, /* flags */
1347 gk20a_mem_flag_read_only, false,
1348 acr->hsbl_ucode.aperture);
1349 if (!acr->hsbl_ucode.gpu_va) {
1350 nvgpu_err(g, "failed to map pmu ucode memory!!");
1351 goto err_free_ucode;
1352 }
1353
1354 nvgpu_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz);
1355 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1356 }
1357 /*
1358 * Disable interrupts to avoid kernel hitting breakpoint due
1359 * to PMU halt
1360 */
1361
1362 if (g->ops.pmu.falcon_clear_halt_interrupt_status(g,
1363 gk20a_get_gr_idle_timeout(g)))
1364 goto err_unmap_bl;
1365
1366 gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
1367 pwr_falcon_mmu_phys_sec_r()));
1368 gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
1369
1370 g->ops.pmu.init_falcon_setup_hw(g, desc, acr->hsbl_ucode.size);
1371
1372 /* Poll for HALT */
1373 if (b_wait_for_halt) {
1374 err = g->ops.pmu.falcon_wait_for_halt(g,
1375 ACR_COMPLETION_TIMEOUT_MS);
1376 if (err == 0) {
1377 /* Clear the HALT interrupt */
1378 if (g->ops.pmu.falcon_clear_halt_interrupt_status(g,
1379 gk20a_get_gr_idle_timeout(g)))
1380 goto err_unmap_bl;
1381 }
1382 else
1383 goto err_unmap_bl;
1384 }
1385 gm20b_dbg_pmu("after waiting for halt, err %x\n", err);
1386 gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
1387 pwr_falcon_mmu_phys_sec_r()));
1388 gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
1389 start_gm20b_pmu(g);
1390 return 0;
1391err_unmap_bl:
1392 nvgpu_gmmu_unmap(vm, &acr->hsbl_ucode, acr->hsbl_ucode.gpu_va);
1393err_free_ucode:
1394 nvgpu_dma_free(g, &acr->hsbl_ucode);
1395err_done:
1396 nvgpu_release_firmware(g, hsbl_fw);
1397 return err;
1398}
1399
1400/*!
1401* Wait for PMU to halt
1402* @param[in] g GPU object pointer
1403* @param[in] timeout_ms Timeout in msec for PMU to halt
1404* @return '0' if PMU halts
1405*/
1406int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms)
1407{
1408 struct nvgpu_pmu *pmu = &g->pmu;
1409 u32 data = 0;
1410 int ret = -EBUSY;
1411
1412 ret = nvgpu_flcn_wait_for_halt(pmu->flcn, timeout_ms);
1413 if (ret) {
1414 nvgpu_err(g, "ACR boot timed out");
1415 return ret;
1416 }
1417
1418 g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r());
1419 gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
1420 data = gk20a_readl(g, pwr_falcon_mailbox0_r());
1421 if (data) {
1422 nvgpu_err(g, "ACR boot failed, err %x", data);
1423 ret = -EAGAIN;
1424 }
1425
1426 return ret;
1427}
1428
1429/*!
1430* Wait for PMU halt interrupt status to be cleared
1431* @param[in] g GPU object pointer
1432* @param[in] timeout_ms Timeout in msec for halt to clear
1433* @return '0' if PMU halt irq status is clear
1434*/
1435int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout_ms)
1436{
1437 struct nvgpu_pmu *pmu = &g->pmu;
1438 int status = 0;
1439
1440 if (nvgpu_flcn_clear_halt_intr_status(pmu->flcn, timeout_ms))
1441 status = -EBUSY;
1442
1443 return status;
1444}
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
new file mode 100644
index 00000000..9d261aae
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -0,0 +1,60 @@
1/*
2 * GM20B ACR
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef __ACR_GM20B_H_
26#define __ACR_GM20B_H_
27
28#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode_image.bin"
29#define GM20B_PMU_UCODE_DESC "gpmu_ucode_desc.bin"
30#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
31#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
32#define GM20B_PMU_UCODE_SIG "pmu_sig.bin"
33#define GM20B_FECS_UCODE_SIG "fecs_sig.bin"
34#define T18x_GPCCS_UCODE_SIG "gpccs_sig.bin"
35
36bool gm20b_is_pmu_supported(struct gk20a *g);
37int prepare_ucode_blob(struct gk20a *g);
38int gm20b_bootstrap_hs_flcn(struct gk20a *g);
39bool gm20b_is_lazy_bootstrap(u32 falcon_id);
40bool gm20b_is_priv_load(u32 falcon_id);
41void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf);
42int gm20b_alloc_blob_space(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
43int gm20b_pmu_populate_loader_cfg(struct gk20a *g,
44 void *lsfm, u32 *p_bl_gen_desc_size);
45int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g,
46 void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid);
47int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms);
48int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout);
49int gm20b_init_pmu_setup_hw1(struct gk20a *g, void *desc, u32 bl_sz);
50
51int gm20b_pmu_setup_sw(struct gk20a *g);
52int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
53int gm20b_init_nspmu_setup_hw1(struct gk20a *g);
54int acr_ucode_patch_sig(struct gk20a *g,
55 unsigned int *p_img,
56 unsigned int *p_prod_sig,
57 unsigned int *p_dbg_sig,
58 unsigned int *p_patch_loc,
59 unsigned int *p_patch_ind);
60#endif /*__ACR_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
new file mode 100644
index 00000000..34c8d4b7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -0,0 +1,65 @@
1/*
2 * GM20B MMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/timers.h>
26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h>
28
29#include "bus_gm20b.h"
30#include "gk20a/gk20a.h"
31#include "gk20a/bus_gk20a.h"
32
33#include <nvgpu/hw/gm20b/hw_bus_gm20b.h>
34
35int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
36{
37 struct nvgpu_timeout timeout;
38 int err = 0;
39 u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
40 u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
41
42 gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
43
44 gk20a_writel(g, bus_bar1_block_r(),
45 nvgpu_aperture_mask(g, bar1_inst,
46 bus_bar1_block_target_sys_mem_ncoh_f(),
47 bus_bar1_block_target_vid_mem_f()) |
48 bus_bar1_block_mode_virtual_f() |
49 bus_bar1_block_ptr_f(ptr_v));
50 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
51 do {
52 u32 val = gk20a_readl(g, bus_bind_status_r());
53 u32 pending = bus_bind_status_bar1_pending_v(val);
54 u32 outstanding = bus_bind_status_bar1_outstanding_v(val);
55 if (!pending && !outstanding)
56 break;
57
58 nvgpu_udelay(5);
59 } while (!nvgpu_timeout_expired(&timeout));
60
61 if (nvgpu_timeout_peek_expired(&timeout))
62 err = -EINVAL;
63
64 return err;
65}
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.h b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h
new file mode 100644
index 00000000..961b906a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h
@@ -0,0 +1,33 @@
1/*
2 * GM20B BUS
3 *
4 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVGPU_GM20B_BUS
26#define _NVGPU_GM20B_BUS
27
28struct gk20a;
29struct nvgpu_mem;
30
31int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst);
32
33#endif
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
new file mode 100644
index 00000000..61d3b6f5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
@@ -0,0 +1,1605 @@
1/*
2 * GM20B Clocks
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "clk_gm20b.h"
27
28#include <nvgpu/soc.h>
29#include <nvgpu/fuse.h>
30#include <nvgpu/bug.h>
31
32#include <nvgpu/hw/gm20b/hw_trim_gm20b.h>
33#include <nvgpu/hw/gm20b/hw_timer_gm20b.h>
34#include <nvgpu/hw/gm20b/hw_therm_gm20b.h>
35#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
36
37#define gk20a_dbg_clk(fmt, arg...) \
38 gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
39
40#define DFS_DET_RANGE 6 /* -2^6 ... 2^6-1 */
41#define SDM_DIN_RANGE 12 /* -2^12 ... 2^12-1 */
42#define DFS_TESTOUT_DET BIT(0)
43#define DFS_EXT_CAL_EN BIT(9)
44#define DFS_EXT_STROBE BIT(16)
45
46#define BOOT_GPU_UV_B1 1000000 /* gpu rail boot voltage 1.0V */
47#define BOOT_GPU_UV_C1 800000 /* gpu rail boot voltage 0.8V */
48#define ADC_SLOPE_UV 10000 /* default ADC detection slope 10mV */
49
50#define DVFS_SAFE_MARGIN 10 /* 10% */
51
52static struct pll_parms gpc_pll_params_b1 = {
53 128000, 2600000, /* freq */
54 1300000, 2600000, /* vco */
55 12000, 38400, /* u */
56 1, 255, /* M */
57 8, 255, /* N */
58 1, 31, /* PL */
59 -165230, 214007, /* DFS_COEFF */
60 0, 0, /* ADC char coeff - to be read from fuses */
61 0x7 << 3, /* vco control in NA mode */
62 500, /* Locking and ramping timeout */
63 40, /* Lock delay in NA mode */
64 5, /* IDDQ mode exit delay */
65};
66
67static struct pll_parms gpc_pll_params_c1 = {
68 76800, 2600000, /* freq */
69 1300000, 2600000, /* vco */
70 19200, 38400, /* u */
71 1, 255, /* M */
72 8, 255, /* N */
73 1, 31, /* PL */
74 -172550, 195374, /* DFS_COEFF */
75 0, 0, /* ADC char coeff - to be read from fuses */
76 (0x1 << 3) | 0x7, /* vco control in NA mode */
77 500, /* Locking and ramping timeout */
78 40, /* Lock delay in NA mode */
79 5, /* IDDQ mode exit delay */
80 0x3 << 10, /* DFS control settings */
81};
82
83static struct pll_parms gpc_pll_params;
84
85static void clk_setup_slide(struct gk20a *g, u32 clk_u);
86
87#define DUMP_REG(addr_func) \
88do { \
89 addr = trim_sys_##addr_func##_r(); \
90 data = gk20a_readl(g, addr); \
91 pr_info(#addr_func "[0x%x] = 0x%x\n", addr, data); \
92} while (0)
93
94static void dump_gpc_pll(struct gk20a *g, struct pll *gpll, u32 last_cfg)
95{
96 u32 addr, data;
97
98 pr_info("**** GPCPLL DUMP ****");
99 pr_info("gpcpll s/w M=%u N=%u P=%u\n", gpll->M, gpll->N, gpll->PL);
100 pr_info("gpcpll_cfg_last = 0x%x\n", last_cfg);
101 DUMP_REG(gpcpll_cfg);
102 DUMP_REG(gpcpll_coeff);
103 DUMP_REG(sel_vco);
104 pr_info("\n");
105}
106
107#define PLDIV_GLITCHLESS 1
108
109#if PLDIV_GLITCHLESS
110/*
111 * Post divider tarnsition is glitchless only if there is common "1" in binary
112 * representation of old and new settings.
113 */
114static u32 get_interim_pldiv(struct gk20a *g, u32 old_pl, u32 new_pl)
115{
116 u32 pl;
117
118 if ((g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) || (old_pl & new_pl))
119 return 0;
120
121 pl = old_pl | BIT(ffs(new_pl) - 1); /* pl never 0 */
122 new_pl |= BIT(ffs(old_pl) - 1);
123
124 return min(pl, new_pl);
125}
126#endif
127
128/* Calculate and update M/N/PL as well as pll->freq
129 ref_clk_f = clk_in_f;
130 u_f = ref_clk_f / M;
131 vco_f = u_f * N = ref_clk_f * N / M;
132 PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
133 gpcclk = gpc2clk / 2; */
134static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
135 struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
136{
137 u32 min_vco_f, max_vco_f;
138 u32 best_M, best_N;
139 u32 low_PL, high_PL, best_PL;
140 u32 m, n, n2;
141 u32 target_vco_f, vco_f;
142 u32 ref_clk_f, target_clk_f, u_f;
143 u32 delta, lwv, best_delta = ~0;
144 u32 pl;
145
146 BUG_ON(target_freq == NULL);
147
148 gk20a_dbg_fn("request target freq %d MHz", *target_freq);
149
150 ref_clk_f = pll->clk_in;
151 target_clk_f = *target_freq;
152 max_vco_f = pll_params->max_vco;
153 min_vco_f = pll_params->min_vco;
154 best_M = pll_params->max_M;
155 best_N = pll_params->min_N;
156 best_PL = pll_params->min_PL;
157
158 target_vco_f = target_clk_f + target_clk_f / 50;
159 if (max_vco_f < target_vco_f)
160 max_vco_f = target_vco_f;
161
162 /* Set PL search boundaries. */
163 high_PL = nvgpu_div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
164 high_PL = min(high_PL, pll_params->max_PL);
165 high_PL = max(high_PL, pll_params->min_PL);
166
167 low_PL = nvgpu_div_to_pl(min_vco_f / target_vco_f);
168 low_PL = min(low_PL, pll_params->max_PL);
169 low_PL = max(low_PL, pll_params->min_PL);
170
171 gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
172 low_PL, nvgpu_pl_to_div(low_PL), high_PL, nvgpu_pl_to_div(high_PL));
173
174 for (pl = low_PL; pl <= high_PL; pl++) {
175 target_vco_f = target_clk_f * nvgpu_pl_to_div(pl);
176
177 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
178 u_f = ref_clk_f / m;
179
180 if (u_f < pll_params->min_u)
181 break;
182 if (u_f > pll_params->max_u)
183 continue;
184
185 n = (target_vco_f * m) / ref_clk_f;
186 n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
187
188 if (n > pll_params->max_N)
189 break;
190
191 for (; n <= n2; n++) {
192 if (n < pll_params->min_N)
193 continue;
194 if (n > pll_params->max_N)
195 break;
196
197 vco_f = ref_clk_f * n / m;
198
199 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
200 lwv = (vco_f + (nvgpu_pl_to_div(pl) / 2))
201 / nvgpu_pl_to_div(pl);
202 delta = abs(lwv - target_clk_f);
203
204 if (delta < best_delta) {
205 best_delta = delta;
206 best_M = m;
207 best_N = n;
208 best_PL = pl;
209
210 if (best_delta == 0 ||
211 /* 0.45% for non best fit */
212 (!best_fit && (vco_f / best_delta > 218))) {
213 goto found_match;
214 }
215
216 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
217 delta, m, n, pl);
218 }
219 }
220 }
221 }
222 }
223
224found_match:
225 BUG_ON(best_delta == ~0U);
226
227 if (best_fit && best_delta != 0)
228 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
229 target_clk_f);
230
231 pll->M = best_M;
232 pll->N = best_N;
233 pll->PL = best_PL;
234
235 /* save current frequency */
236 pll->freq = ref_clk_f * pll->N / (pll->M * nvgpu_pl_to_div(pll->PL));
237
238 *target_freq = pll->freq;
239
240 gk20a_dbg_clk("actual target freq %d kHz, M %d, N %d, PL %d(div%d)",
241 *target_freq, pll->M, pll->N, pll->PL, nvgpu_pl_to_div(pll->PL));
242
243 gk20a_dbg_fn("done");
244
245 return 0;
246}
247
248/* GPCPLL NA/DVFS mode methods */
249
250static inline int fuse_get_gpcpll_adc_rev(u32 val)
251{
252 return (val >> 30) & 0x3;
253}
254
255static inline int fuse_get_gpcpll_adc_slope_uv(u32 val)
256{
257 /* Integer part in mV * 1000 + fractional part in uV */
258 return ((val >> 24) & 0x3f) * 1000 + ((val >> 14) & 0x3ff);
259}
260
261static inline int fuse_get_gpcpll_adc_intercept_uv(u32 val)
262{
263 /* Integer part in mV * 1000 + fractional part in 100uV */
264 return ((val >> 4) & 0x3ff) * 1000 + ((val >> 0) & 0xf) * 100;
265}
266
267static int nvgpu_fuse_calib_gpcpll_get_adc(struct gk20a *g,
268 int *slope_uv, int *intercept_uv)
269{
270 u32 val;
271 int ret;
272
273 ret = nvgpu_tegra_fuse_read_reserved_calib(g, &val);
274 if (ret)
275 return ret;
276
277 if (!fuse_get_gpcpll_adc_rev(val))
278 return -EINVAL;
279
280 *slope_uv = fuse_get_gpcpll_adc_slope_uv(val);
281 *intercept_uv = fuse_get_gpcpll_adc_intercept_uv(val);
282 return 0;
283}
284
285#ifdef CONFIG_TEGRA_USE_NA_GPCPLL
286static bool nvgpu_fuse_can_use_na_gpcpll(struct gk20a *g)
287{
288 return nvgpu_tegra_get_gpu_speedo_id(g);
289}
290#endif
291
292/*
293 * Read ADC characteristic parmeters from fuses.
294 * Determine clibration settings.
295 */
296static int clk_config_calibration_params(struct gk20a *g)
297{
298 int slope, offs;
299 struct pll_parms *p = &gpc_pll_params;
300
301 if (!nvgpu_fuse_calib_gpcpll_get_adc(g, &slope, &offs)) {
302 p->uvdet_slope = slope;
303 p->uvdet_offs = offs;
304 }
305
306 if (!p->uvdet_slope || !p->uvdet_offs) {
307 /*
308 * If ADC conversion slope/offset parameters are not fused
309 * (non-production config), report error, but allow to use
310 * boot internal calibration with default slope.
311 */
312 nvgpu_err(g, "ADC coeff are not fused");
313 return -EINVAL;
314 }
315 return 0;
316}
317
318/*
319 * Determine DFS_COEFF for the requested voltage. Always select external
320 * calibration override equal to the voltage, and set maximum detection
321 * limit "0" (to make sure that PLL output remains under F/V curve when
322 * voltage increases).
323 */
324static void clk_config_dvfs_detection(int mv, struct na_dvfs *d)
325{
326 u32 coeff, coeff_max;
327 struct pll_parms *p = &gpc_pll_params;
328
329 coeff_max = trim_sys_gpcpll_dvfs0_dfs_coeff_v(
330 trim_sys_gpcpll_dvfs0_dfs_coeff_m());
331 coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
332 coeff = DIV_ROUND_CLOSEST(coeff, 1000);
333 coeff = min(coeff, coeff_max);
334 d->dfs_coeff = coeff;
335
336 d->dfs_ext_cal = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs,
337 p->uvdet_slope);
338 BUG_ON(abs(d->dfs_ext_cal) >= (1 << DFS_DET_RANGE));
339 d->uv_cal = p->uvdet_offs + d->dfs_ext_cal * p->uvdet_slope;
340 d->dfs_det_max = 0;
341}
342
343/*
344 * Solve equation for integer and fractional part of the effective NDIV:
345 *
346 * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) +
347 * DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE
348 *
349 * The SDM_DIN LSB is finally shifted out, since it is not accessible by s/w.
350 */
351static void clk_config_dvfs_ndiv(int mv, u32 n_eff, struct na_dvfs *d)
352{
353 int n, det_delta;
354 u32 rem, rem_range;
355 struct pll_parms *p = &gpc_pll_params;
356
357 det_delta = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs,
358 p->uvdet_slope);
359 det_delta -= d->dfs_ext_cal;
360 det_delta = min(det_delta, d->dfs_det_max);
361 det_delta = det_delta * d->dfs_coeff;
362
363 n = (int)(n_eff << DFS_DET_RANGE) - det_delta;
364 BUG_ON((n < 0) || (n > (int)(p->max_N << DFS_DET_RANGE)));
365 d->n_int = ((u32)n) >> DFS_DET_RANGE;
366
367 rem = ((u32)n) & ((1 << DFS_DET_RANGE) - 1);
368 rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
369 d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE);
370 d->sdm_din = (d->sdm_din >> BITS_PER_BYTE) & 0xff;
371}
372
373/* Voltage dependent configuration */
374static void clk_config_dvfs(struct gk20a *g, struct pll *gpll)
375{
376 struct na_dvfs *d = &gpll->dvfs;
377
378 d->mv = g->ops.clk.predict_mv_at_hz_cur_tfloor(&g->clk,
379 rate_gpc2clk_to_gpu(gpll->freq));
380
381 clk_config_dvfs_detection(d->mv, d);
382 clk_config_dvfs_ndiv(d->mv, gpll->N, d);
383}
384
385/* Update DVFS detection settings in flight */
386static void clk_set_dfs_coeff(struct gk20a *g, u32 dfs_coeff)
387{
388 u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
389 data |= DFS_EXT_STROBE;
390 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
391
392 data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
393 data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
394 trim_sys_gpcpll_dvfs0_dfs_coeff_f(dfs_coeff));
395 gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
396
397 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
398 nvgpu_udelay(1);
399 data &= ~DFS_EXT_STROBE;
400 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
401}
402
403static void __maybe_unused clk_set_dfs_det_max(struct gk20a *g, u32 dfs_det_max)
404{
405 u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
406 data |= DFS_EXT_STROBE;
407 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
408
409 data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
410 data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
411 trim_sys_gpcpll_dvfs0_dfs_det_max_f(dfs_det_max));
412 gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
413
414 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
415 nvgpu_udelay(1);
416 data &= ~DFS_EXT_STROBE;
417 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
418}
419
420static void clk_set_dfs_ext_cal(struct gk20a *g, u32 dfs_det_cal)
421{
422 u32 data, ctrl;
423
424 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
425 data &= ~(BIT(DFS_DET_RANGE + 1) - 1);
426 data |= dfs_det_cal & (BIT(DFS_DET_RANGE + 1) - 1);
427 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
428
429 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
430 nvgpu_udelay(1);
431 ctrl = trim_sys_gpcpll_dvfs1_dfs_ctrl_v(data);
432 if (~ctrl & DFS_EXT_CAL_EN) {
433 data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(),
434 trim_sys_gpcpll_dvfs1_dfs_ctrl_f(
435 ctrl | DFS_EXT_CAL_EN | DFS_TESTOUT_DET));
436 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
437 }
438}
439
440static void clk_setup_dvfs_detection(struct gk20a *g, struct pll *gpll)
441{
442 struct na_dvfs *d = &gpll->dvfs;
443
444 u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
445 data |= DFS_EXT_STROBE;
446 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
447
448 data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
449 data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
450 trim_sys_gpcpll_dvfs0_dfs_coeff_f(d->dfs_coeff));
451 data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
452 trim_sys_gpcpll_dvfs0_dfs_det_max_f(d->dfs_det_max));
453 gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
454
455 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
456 nvgpu_udelay(1);
457 data &= ~DFS_EXT_STROBE;
458 gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
459
460 clk_set_dfs_ext_cal(g, d->dfs_ext_cal);
461}
462
463/* Enable NA/DVFS mode */
464static int clk_enbale_pll_dvfs(struct gk20a *g)
465{
466 u32 data, cfg = 0;
467 int delay = gpc_pll_params.iddq_exit_delay; /* iddq & calib delay */
468 struct pll_parms *p = &gpc_pll_params;
469 bool calibrated = p->uvdet_slope && p->uvdet_offs;
470
471 /* Enable NA DVFS */
472 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
473 data |= trim_sys_gpcpll_dvfs1_en_dfs_m();
474 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
475
476 /* Set VCO_CTRL */
477 if (p->vco_ctrl) {
478 data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
479 data = set_field(data, trim_sys_gpcpll_cfg3_vco_ctrl_m(),
480 trim_sys_gpcpll_cfg3_vco_ctrl_f(p->vco_ctrl));
481 gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
482 }
483
484 /* Set NA mode DFS control */
485 if (p->dfs_ctrl) {
486 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
487 data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(),
488 trim_sys_gpcpll_dvfs1_dfs_ctrl_f(p->dfs_ctrl));
489 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
490 }
491
492 /*
493 * If calibration parameters are known (either from fuses, or from
494 * internal calibration on boot) - use them. Internal calibration is
495 * started anyway; it will complete, but results will not be used.
496 */
497 if (calibrated) {
498 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
499 data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
500 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
501 }
502
503 /* Exit IDDQ mode */
504 data = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
505 data = set_field(data, trim_sys_gpcpll_cfg_iddq_m(),
506 trim_sys_gpcpll_cfg_iddq_power_on_v());
507 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), data);
508 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
509 nvgpu_udelay(delay);
510
511 /*
512 * Dynamic ramp setup based on update rate, which in DVFS mode on GM20b
513 * is always 38.4 MHz, the same as reference clock rate.
514 */
515 clk_setup_slide(g, g->clk.gpc_pll.clk_in);
516
517 if (calibrated)
518 return 0;
519
520 /*
521 * If calibration parameters are not fused, start internal calibration,
522 * wait for completion, and use results along with default slope to
523 * calculate ADC offset during boot.
524 */
525 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
526 data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
527 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
528
529 /* C1 PLL must be enabled to read internal calibration results */
530 if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) {
531 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
532 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
533 trim_sys_gpcpll_cfg_enable_yes_f());
534 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
535 }
536
537 /* Wait for internal calibration done (spec < 2us). */
538 do {
539 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
540 if (trim_sys_gpcpll_dvfs1_dfs_cal_done_v(data))
541 break;
542 nvgpu_udelay(1);
543 delay--;
544 } while (delay > 0);
545
546 /* Read calibration results */
547 data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
548 data = trim_sys_gpcpll_cfg3_dfs_testout_v(data);
549
550 if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) {
551 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
552 trim_sys_gpcpll_cfg_enable_no_f());
553 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
554 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
555 }
556
557 if (delay <= 0) {
558 nvgpu_err(g, "GPCPLL calibration timeout");
559 return -ETIMEDOUT;
560 }
561
562 p->uvdet_offs = g->clk.pll_poweron_uv - data * ADC_SLOPE_UV;
563 p->uvdet_slope = ADC_SLOPE_UV;
564 return 0;
565}
566
567/* GPCPLL slide methods */
568static void clk_setup_slide(struct gk20a *g, u32 clk_u)
569{
570 u32 data, step_a, step_b;
571
572 switch (clk_u) {
573 case 12000:
574 case 12800:
575 case 13000: /* only on FPGA */
576 step_a = 0x2B;
577 step_b = 0x0B;
578 break;
579 case 19200:
580 step_a = 0x12;
581 step_b = 0x08;
582 break;
583 case 38400:
584 step_a = 0x04;
585 step_b = 0x05;
586 break;
587 default:
588 nvgpu_err(g, "Unexpected reference rate %u kHz", clk_u);
589 BUG();
590 }
591
592 /* setup */
593 data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
594 data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
595 trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
596 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
597 data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
598 data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
599 trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
600 gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
601}
602
603static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll)
604{
605 u32 data, coeff;
606 u32 nold, sdm_old;
607 int ramp_timeout = gpc_pll_params.lock_timeout;
608
609 /* get old coefficients */
610 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
611 nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
612
613 /* do nothing if NDIV is same */
614 if (gpll->mode == GPC_PLL_MODE_DVFS) {
615 /* in DVFS mode check both integer and fraction */
616 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
617 sdm_old = trim_sys_gpcpll_cfg2_sdm_din_v(coeff);
618 if ((gpll->dvfs.n_int == nold) &&
619 (gpll->dvfs.sdm_din == sdm_old))
620 return 0;
621 } else {
622 if (gpll->N == nold)
623 return 0;
624
625 /* dynamic ramp setup based on update rate */
626 clk_setup_slide(g, gpll->clk_in / gpll->M);
627 }
628
629 /* pll slowdown mode */
630 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
631 data = set_field(data,
632 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
633 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
634 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
635
636 /* new ndiv ready for ramp */
637 if (gpll->mode == GPC_PLL_MODE_DVFS) {
638 /* in DVFS mode SDM is updated via "new" field */
639 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
640 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_new_m(),
641 trim_sys_gpcpll_cfg2_sdm_din_new_f(gpll->dvfs.sdm_din));
642 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
643
644 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
645 coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
646 trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int));
647 nvgpu_udelay(1);
648 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
649 } else {
650 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
651 coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
652 trim_sys_gpcpll_coeff_ndiv_f(gpll->N));
653 nvgpu_udelay(1);
654 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
655 }
656
657 /* dynamic ramp to new ndiv */
658 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
659 data = set_field(data,
660 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
661 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
662 nvgpu_udelay(1);
663 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
664
665 do {
666 nvgpu_udelay(1);
667 ramp_timeout--;
668 data = gk20a_readl(
669 g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
670 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
671 break;
672 } while (ramp_timeout > 0);
673
674 if ((gpll->mode == GPC_PLL_MODE_DVFS) && (ramp_timeout > 0)) {
675 /* in DVFS mode complete SDM update */
676 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
677 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
678 trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
679 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
680 }
681
682 /* exit slowdown mode */
683 data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
684 data = set_field(data,
685 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
686 trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
687 data = set_field(data,
688 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
689 trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
690 gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
691 gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
692
693 if (ramp_timeout <= 0) {
694 nvgpu_err(g, "gpcpll dynamic ramp timeout");
695 return -ETIMEDOUT;
696 }
697 return 0;
698}
699
700/* GPCPLL bypass methods */
701static int clk_change_pldiv_under_bypass(struct gk20a *g, struct pll *gpll)
702{
703 u32 data, coeff;
704
705 /* put PLL in bypass before programming it */
706 data = gk20a_readl(g, trim_sys_sel_vco_r());
707 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
708 trim_sys_sel_vco_gpc2clk_out_bypass_f());
709 gk20a_writel(g, trim_sys_sel_vco_r(), data);
710
711 /* change PLDIV */
712 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
713 nvgpu_udelay(1);
714 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
715 trim_sys_gpcpll_coeff_pldiv_f(gpll->PL));
716 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
717
718 /* put PLL back on vco */
719 data = gk20a_readl(g, trim_sys_sel_vco_r());
720 nvgpu_udelay(1);
721 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
722 trim_sys_sel_vco_gpc2clk_out_vco_f());
723 gk20a_writel(g, trim_sys_sel_vco_r(), data);
724
725 return 0;
726}
727
728static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll)
729{
730 u32 data, cfg, coeff, timeout;
731
732 /* put PLL in bypass before programming it */
733 data = gk20a_readl(g, trim_sys_sel_vco_r());
734 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
735 trim_sys_sel_vco_gpc2clk_out_bypass_f());
736 gk20a_writel(g, trim_sys_sel_vco_r(), data);
737
738 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
739 nvgpu_udelay(1);
740 if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
741 /* get out from IDDQ (1st power up) */
742 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
743 trim_sys_gpcpll_cfg_iddq_power_on_v());
744 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
745 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
746 nvgpu_udelay(gpc_pll_params.iddq_exit_delay);
747 } else {
748 /* clear SYNC_MODE before disabling PLL */
749 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
750 trim_sys_gpcpll_cfg_sync_mode_disable_f());
751 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
752 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
753
754 /* disable running PLL before changing coefficients */
755 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
756 trim_sys_gpcpll_cfg_enable_no_f());
757 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
758 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
759 }
760
761 /* change coefficients */
762 if (gpll->mode == GPC_PLL_MODE_DVFS) {
763 clk_setup_dvfs_detection(g, gpll);
764
765 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
766 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
767 trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
768 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
769
770 coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
771 trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int) |
772 trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
773 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
774 } else {
775 coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
776 trim_sys_gpcpll_coeff_ndiv_f(gpll->N) |
777 trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
778 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
779 }
780
781 /* enable PLL after changing coefficients */
782 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
783 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
784 trim_sys_gpcpll_cfg_enable_yes_f());
785 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
786
787 /* just delay in DVFS mode (lock cannot be used) */
788 if (gpll->mode == GPC_PLL_MODE_DVFS) {
789 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
790 nvgpu_udelay(gpc_pll_params.na_lock_delay);
791 gk20a_dbg_clk("NA config_pll under bypass: %u (%u) kHz %d mV",
792 gpll->freq, gpll->freq / 2,
793 (trim_sys_gpcpll_cfg3_dfs_testout_v(
794 gk20a_readl(g, trim_sys_gpcpll_cfg3_r()))
795 * gpc_pll_params.uvdet_slope
796 + gpc_pll_params.uvdet_offs) / 1000);
797 goto pll_locked;
798 }
799
800 /* lock pll */
801 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
802 if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
803 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
804 trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
805 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
806 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
807 }
808
809 /* wait pll lock */
810 timeout = gpc_pll_params.lock_timeout + 1;
811 do {
812 nvgpu_udelay(1);
813 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
814 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
815 goto pll_locked;
816 } while (--timeout > 0);
817
818 /* PLL is messed up. What can we do here? */
819 dump_gpc_pll(g, gpll, cfg);
820 BUG();
821 return -EBUSY;
822
823pll_locked:
824 gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
825 trim_sys_gpcpll_cfg_r(), cfg);
826
827 /* set SYNC_MODE for glitchless switch out of bypass */
828 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
829 trim_sys_gpcpll_cfg_sync_mode_enable_f());
830 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
831 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
832
833 /* put PLL back on vco */
834 data = gk20a_readl(g, trim_sys_sel_vco_r());
835 data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
836 trim_sys_sel_vco_gpc2clk_out_vco_f());
837 gk20a_writel(g, trim_sys_sel_vco_r(), data);
838
839 return 0;
840}
841
842/*
843 * Change GPCPLL frequency:
844 * - in legacy (non-DVFS) mode
845 * - in DVFS mode at constant DVFS detection settings, matching current/lower
846 * voltage; the same procedure can be used in this case, since maximum DVFS
847 * detection limit makes sure that PLL output remains under F/V curve when
848 * voltage increases arbitrary.
849 */
850static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new,
851 int allow_slide)
852{
853 u32 cfg, coeff, data;
854 bool can_slide, pldiv_only;
855 struct pll gpll;
856
857 gk20a_dbg_fn("");
858
859 if (!nvgpu_platform_is_silicon(g))
860 return 0;
861
862 /* get old coefficients */
863 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
864 gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
865 gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff);
866 gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff);
867 gpll.clk_in = gpll_new->clk_in;
868
869 /* combine target dvfs with old coefficients */
870 gpll.dvfs = gpll_new->dvfs;
871 gpll.mode = gpll_new->mode;
872
873 /* do NDIV slide if there is no change in M and PL */
874 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
875 can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
876
877 if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL))
878 return clk_slide_gpc_pll(g, gpll_new);
879
880 /* slide down to NDIV_LO */
881 if (can_slide) {
882 int ret;
883 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
884 gpll.clk_in);
885 if (gpll.mode == GPC_PLL_MODE_DVFS)
886 clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
887 ret = clk_slide_gpc_pll(g, &gpll);
888 if (ret)
889 return ret;
890 }
891 pldiv_only = can_slide && (gpll_new->M == gpll.M);
892
893 /*
894 * Split FO-to-bypass jump in halfs by setting out divider 1:2.
895 * (needed even if PLDIV_GLITCHLESS is set, since 1:1 <=> 1:2 direct
896 * transition is not really glitch-less - see get_interim_pldiv
897 * function header).
898 */
899 if ((gpll_new->PL < 2) || (gpll.PL < 2)) {
900 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
901 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
902 trim_sys_gpc2clk_out_vcodiv_f(2));
903 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
904 /* Intentional 2nd write to assure linear divider operation */
905 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
906 gk20a_readl(g, trim_sys_gpc2clk_out_r());
907 nvgpu_udelay(2);
908 }
909
910#if PLDIV_GLITCHLESS
911 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
912 if (pldiv_only) {
913 /* Insert interim PLDIV state if necessary */
914 u32 interim_pl = get_interim_pldiv(g, gpll_new->PL, gpll.PL);
915 if (interim_pl) {
916 coeff = set_field(coeff,
917 trim_sys_gpcpll_coeff_pldiv_m(),
918 trim_sys_gpcpll_coeff_pldiv_f(interim_pl));
919 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
920 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
921 }
922 goto set_pldiv; /* path A: no need to bypass */
923 }
924
925 /* path B: bypass if either M changes or PLL is disabled */
926#endif
927 /*
928 * Program and lock pll under bypass. On exit PLL is out of bypass,
929 * enabled, and locked. VCO is at vco_min if sliding is allowed.
930 * Otherwise it is at VCO target (and therefore last slide call below
931 * is effectively NOP). PL is set to target. Output divider is engaged
932 * at 1:2 if either entry, or exit PL setting is 1:1.
933 */
934 gpll = *gpll_new;
935 if (allow_slide) {
936 gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco,
937 gpll_new->clk_in);
938 if (gpll.mode == GPC_PLL_MODE_DVFS)
939 clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
940 }
941 if (pldiv_only)
942 clk_change_pldiv_under_bypass(g, &gpll);
943 else
944 clk_lock_gpc_pll_under_bypass(g, &gpll);
945
946#if PLDIV_GLITCHLESS
947 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
948
949set_pldiv:
950 /* coeff must be current from either path A or B */
951 if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) {
952 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
953 trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL));
954 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
955 }
956#endif
957 /* restore out divider 1:1 */
958 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
959 if ((data & trim_sys_gpc2clk_out_vcodiv_m()) !=
960 trim_sys_gpc2clk_out_vcodiv_by1_f()) {
961 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
962 trim_sys_gpc2clk_out_vcodiv_by1_f());
963 nvgpu_udelay(2);
964 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
965 /* Intentional 2nd write to assure linear divider operation */
966 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
967 gk20a_readl(g, trim_sys_gpc2clk_out_r());
968 }
969
970 /* slide up to target NDIV */
971 return clk_slide_gpc_pll(g, gpll_new);
972}
973
974/* Find GPCPLL config safe at DVFS coefficient = 0, matching target frequency */
975static void clk_config_pll_safe_dvfs(struct gk20a *g, struct pll *gpll)
976{
977 u32 nsafe, nmin;
978
979 if (gpll->freq > g->clk.dvfs_safe_max_freq)
980 gpll->freq = gpll->freq * (100 - DVFS_SAFE_MARGIN) / 100;
981
982 nmin = DIV_ROUND_UP(gpll->M * gpc_pll_params.min_vco, gpll->clk_in);
983 nsafe = gpll->M * gpll->freq / gpll->clk_in;
984
985 /*
986 * If safe frequency is above VCOmin, it can be used in safe PLL config
987 * as is. Since safe frequency is below both old and new frequencies,
988 * in this case all three configurations have same post divider 1:1, and
989 * direct old=>safe=>new n-sliding will be used for transitions.
990 *
991 * Otherwise, if safe frequency is below VCO min, post-divider in safe
992 * configuration (and possibly in old and/or new configurations) is
993 * above 1:1, and each old=>safe and safe=>new transitions includes
994 * sliding to/from VCOmin, as well as divider changes. To avoid extra
995 * dynamic ramps from VCOmin during old=>safe transition and to VCOmin
996 * during safe=>new transition, select nmin as safe NDIV, and set safe
997 * post divider to assure PLL output is below safe frequency
998 */
999 if (nsafe < nmin) {
1000 gpll->PL = DIV_ROUND_UP(nmin * gpll->clk_in,
1001 gpll->M * gpll->freq);
1002 nsafe = nmin;
1003 }
1004 gpll->N = nsafe;
1005 clk_config_dvfs_ndiv(gpll->dvfs.mv, gpll->N, &gpll->dvfs);
1006
1007 gk20a_dbg_clk("safe freq %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d",
1008 gpll->freq, gpll->M, gpll->N, gpll->PL, nvgpu_pl_to_div(gpll->PL),
1009 gpll->dvfs.mv, gpll->dvfs.uv_cal / 1000, gpll->dvfs.dfs_coeff);
1010}
1011
1012/* Change GPCPLL frequency and DVFS detection settings in DVFS mode */
1013static int clk_program_na_gpc_pll(struct gk20a *g, struct pll *gpll_new,
1014 int allow_slide)
1015{
1016 int ret;
1017 struct pll gpll_safe;
1018 struct pll *gpll_old = &g->clk.gpc_pll_last;
1019
1020 BUG_ON(gpll_new->M != 1); /* the only MDIV in NA mode */
1021 clk_config_dvfs(g, gpll_new);
1022
1023 /*
1024 * In cases below no intermediate steps in PLL DVFS configuration are
1025 * necessary because either
1026 * - PLL DVFS will be configured under bypass directly to target, or
1027 * - voltage is not changing, so DVFS detection settings are the same
1028 */
1029 if (!allow_slide || !gpll_new->enabled ||
1030 (gpll_old->dvfs.mv == gpll_new->dvfs.mv))
1031 return clk_program_gpc_pll(g, gpll_new, allow_slide);
1032
1033 /*
1034 * Interim step for changing DVFS detection settings: low enough
1035 * frequency to be safe at at DVFS coeff = 0.
1036 *
1037 * 1. If voltage is increasing:
1038 * - safe frequency target matches the lowest - old - frequency
1039 * - DVFS settings are still old
1040 * - Voltage already increased to new level by tegra DVFS, but maximum
1041 * detection limit assures PLL output remains under F/V curve
1042 *
1043 * 2. If voltage is decreasing:
1044 * - safe frequency target matches the lowest - new - frequency
1045 * - DVFS settings are still old
1046 * - Voltage is also old, it will be lowered by tegra DVFS afterwards
1047 *
1048 * Interim step can be skipped if old frequency is below safe minimum,
1049 * i.e., it is low enough to be safe at any voltage in operating range
1050 * with zero DVFS coefficient.
1051 */
1052 if (gpll_old->freq > g->clk.dvfs_safe_max_freq) {
1053 if (gpll_old->dvfs.mv < gpll_new->dvfs.mv) {
1054 gpll_safe = *gpll_old;
1055 gpll_safe.dvfs.mv = gpll_new->dvfs.mv;
1056 } else {
1057 gpll_safe = *gpll_new;
1058 gpll_safe.dvfs = gpll_old->dvfs;
1059 }
1060 clk_config_pll_safe_dvfs(g, &gpll_safe);
1061
1062 ret = clk_program_gpc_pll(g, &gpll_safe, 1);
1063 if (ret) {
1064 nvgpu_err(g, "Safe dvfs program fail");
1065 return ret;
1066 }
1067 }
1068
1069 /*
1070 * DVFS detection settings transition:
1071 * - Set DVFS coefficient zero (safe, since already at frequency safe
1072 * at DVFS coeff = 0 for the lowest of the old/new end-points)
1073 * - Set calibration level to new voltage (safe, since DVFS coeff = 0)
1074 * - Set DVFS coefficient to match new voltage (safe, since already at
1075 * frequency safe at DVFS coeff = 0 for the lowest of the old/new
1076 * end-points.
1077 */
1078 clk_set_dfs_coeff(g, 0);
1079 clk_set_dfs_ext_cal(g, gpll_new->dvfs.dfs_ext_cal);
1080 clk_set_dfs_coeff(g, gpll_new->dvfs.dfs_coeff);
1081
1082 gk20a_dbg_clk("config_pll %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d",
1083 gpll_new->freq, gpll_new->M, gpll_new->N, gpll_new->PL,
1084 nvgpu_pl_to_div(gpll_new->PL),
1085 max(gpll_new->dvfs.mv, gpll_old->dvfs.mv),
1086 gpll_new->dvfs.uv_cal / 1000, gpll_new->dvfs.dfs_coeff);
1087
1088 /* Finally set target rate (with DVFS detection settings already new) */
1089 return clk_program_gpc_pll(g, gpll_new, 1);
1090}
1091
1092static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
1093{
1094 u32 cfg, coeff;
1095 struct clk_gk20a *clk = &g->clk;
1096 struct pll gpll = clk->gpc_pll;
1097
1098 /* slide to VCO min */
1099 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1100 if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
1101 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
1102 gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
1103 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
1104 gpll.clk_in);
1105 if (gpll.mode == GPC_PLL_MODE_DVFS)
1106 clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
1107 clk_slide_gpc_pll(g, &gpll);
1108 }
1109
1110 /* put PLL in bypass before disabling it */
1111 cfg = gk20a_readl(g, trim_sys_sel_vco_r());
1112 cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
1113 trim_sys_sel_vco_gpc2clk_out_bypass_f());
1114 gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
1115
1116 /* clear SYNC_MODE before disabling PLL */
1117 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1118 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
1119 trim_sys_gpcpll_cfg_sync_mode_disable_f());
1120 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
1121
1122 /* disable PLL */
1123 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1124 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
1125 trim_sys_gpcpll_cfg_enable_no_f());
1126 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
1127 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1128
1129 clk->gpc_pll.enabled = false;
1130 clk->gpc_pll_last.enabled = false;
1131 return 0;
1132}
1133
1134struct pll_parms *gm20b_get_gpc_pll_parms(void)
1135{
1136 return &gpc_pll_params;
1137}
1138
1139int gm20b_init_clk_setup_sw(struct gk20a *g)
1140{
1141 struct clk_gk20a *clk = &g->clk;
1142 unsigned long safe_rate;
1143 int err;
1144
1145 gk20a_dbg_fn("");
1146
1147 err = nvgpu_mutex_init(&clk->clk_mutex);
1148 if (err)
1149 return err;
1150
1151 if (clk->sw_ready) {
1152 gk20a_dbg_fn("skip init");
1153 return 0;
1154 }
1155
1156 if (clk->gpc_pll.id == GM20B_GPC_PLL_C1) {
1157 gpc_pll_params = gpc_pll_params_c1;
1158 if (!clk->pll_poweron_uv)
1159 clk->pll_poweron_uv = BOOT_GPU_UV_C1;
1160 } else {
1161 gpc_pll_params = gpc_pll_params_b1;
1162 if (!clk->pll_poweron_uv)
1163 clk->pll_poweron_uv = BOOT_GPU_UV_B1;
1164 }
1165
1166 clk->gpc_pll.clk_in = g->ops.clk.get_ref_clock_rate(g) / KHZ;
1167 if (clk->gpc_pll.clk_in == 0) {
1168 nvgpu_err(g, "GPCPLL reference clock is zero");
1169 err = -EINVAL;
1170 goto fail;
1171 }
1172
1173 safe_rate = g->ops.clk.get_fmax_at_vmin_safe(clk);
1174 safe_rate = safe_rate * (100 - DVFS_SAFE_MARGIN) / 100;
1175 clk->dvfs_safe_max_freq = rate_gpu_to_gpc2clk(safe_rate);
1176 clk->gpc_pll.PL = (clk->dvfs_safe_max_freq == 0) ? 0 :
1177 DIV_ROUND_UP(gpc_pll_params.min_vco, clk->dvfs_safe_max_freq);
1178
1179 /* Initial freq: low enough to be safe at Vmin (default 1/3 VCO min) */
1180 clk->gpc_pll.M = 1;
1181 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
1182 clk->gpc_pll.clk_in);
1183 clk->gpc_pll.PL = max(clk->gpc_pll.PL, 3U);
1184 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
1185 clk->gpc_pll.freq /= nvgpu_pl_to_div(clk->gpc_pll.PL);
1186
1187 /*
1188 * All production parts should have ADC fuses burnt. Therefore, check
1189 * ADC fuses always, regardless of whether NA mode is selected; and if
1190 * NA mode is indeed selected, and part can support it, switch to NA
1191 * mode even when ADC calibration is not fused; less accurate s/w
1192 * self-calibration will be used for those parts.
1193 */
1194 clk_config_calibration_params(g);
1195#ifdef CONFIG_TEGRA_USE_NA_GPCPLL
1196 if (nvgpu_fuse_can_use_na_gpcpll(g)) {
1197 /* NA mode is supported only at max update rate 38.4 MHz */
1198 BUG_ON(clk->gpc_pll.clk_in != gpc_pll_params.max_u);
1199 clk->gpc_pll.mode = GPC_PLL_MODE_DVFS;
1200 gpc_pll_params.min_u = gpc_pll_params.max_u;
1201 }
1202#endif
1203
1204 clk->sw_ready = true;
1205
1206 gk20a_dbg_fn("done");
1207 nvgpu_info(g,
1208 "GPCPLL initial settings:%s M=%u, N=%u, P=%u (id = %u)",
1209 clk->gpc_pll.mode == GPC_PLL_MODE_DVFS ? " NA mode," : "",
1210 clk->gpc_pll.M, clk->gpc_pll.N, clk->gpc_pll.PL,
1211 clk->gpc_pll.id);
1212 return 0;
1213
1214fail:
1215 nvgpu_mutex_destroy(&clk->clk_mutex);
1216 return err;
1217}
1218
1219
1220static int set_pll_freq(struct gk20a *g, int allow_slide);
1221static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq);
1222
1223int gm20b_clk_prepare(struct clk_gk20a *clk)
1224{
1225 int ret = 0;
1226
1227 nvgpu_mutex_acquire(&clk->clk_mutex);
1228 if (!clk->gpc_pll.enabled && clk->clk_hw_on)
1229 ret = set_pll_freq(clk->g, 1);
1230 nvgpu_mutex_release(&clk->clk_mutex);
1231 return ret;
1232}
1233
1234void gm20b_clk_unprepare(struct clk_gk20a *clk)
1235{
1236 nvgpu_mutex_acquire(&clk->clk_mutex);
1237 if (clk->gpc_pll.enabled && clk->clk_hw_on)
1238 clk_disable_gpcpll(clk->g, 1);
1239 nvgpu_mutex_release(&clk->clk_mutex);
1240}
1241
1242int gm20b_clk_is_prepared(struct clk_gk20a *clk)
1243{
1244 return clk->gpc_pll.enabled && clk->clk_hw_on;
1245}
1246
1247unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate)
1248{
1249 return rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
1250}
1251
1252int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate,
1253 unsigned long parent_rate)
1254{
1255 u32 old_freq;
1256 int ret = -ENODATA;
1257
1258 nvgpu_mutex_acquire(&clk->clk_mutex);
1259 old_freq = clk->gpc_pll.freq;
1260 ret = set_pll_target(clk->g, rate_gpu_to_gpc2clk(rate), old_freq);
1261 if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
1262 ret = set_pll_freq(clk->g, 1);
1263 nvgpu_mutex_release(&clk->clk_mutex);
1264
1265 return ret;
1266}
1267
1268long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate,
1269 unsigned long *parent_rate)
1270{
1271 u32 freq;
1272 struct pll tmp_pll;
1273 unsigned long maxrate;
1274 struct gk20a *g = clk->g;
1275
1276 maxrate = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
1277 if (rate > maxrate)
1278 rate = maxrate;
1279
1280 nvgpu_mutex_acquire(&clk->clk_mutex);
1281 freq = rate_gpu_to_gpc2clk(rate);
1282 if (freq > gpc_pll_params.max_freq)
1283 freq = gpc_pll_params.max_freq;
1284 else if (freq < gpc_pll_params.min_freq)
1285 freq = gpc_pll_params.min_freq;
1286
1287 tmp_pll = clk->gpc_pll;
1288 clk_config_pll(clk, &tmp_pll, &gpc_pll_params, &freq, true);
1289 nvgpu_mutex_release(&clk->clk_mutex);
1290
1291 return rate_gpc2clk_to_gpu(tmp_pll.freq);
1292}
1293
1294static int gm20b_init_clk_setup_hw(struct gk20a *g)
1295{
1296 u32 data;
1297
1298 gk20a_dbg_fn("");
1299
1300 /* LDIV: Div4 mode (required); both bypass and vco ratios 1:1 */
1301 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
1302 data = set_field(data,
1303 trim_sys_gpc2clk_out_sdiv14_m() |
1304 trim_sys_gpc2clk_out_vcodiv_m() |
1305 trim_sys_gpc2clk_out_bypdiv_m(),
1306 trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
1307 trim_sys_gpc2clk_out_vcodiv_by1_f() |
1308 trim_sys_gpc2clk_out_bypdiv_f(0));
1309 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
1310
1311 /*
1312 * Clear global bypass control; PLL is still under bypass, since SEL_VCO
1313 * is cleared by default.
1314 */
1315 data = gk20a_readl(g, trim_sys_bypassctrl_r());
1316 data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
1317 trim_sys_bypassctrl_gpcpll_vco_f());
1318 gk20a_writel(g, trim_sys_bypassctrl_r(), data);
1319
1320 /* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */
1321 data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_r());
1322 if (!fuse_ctrl_opt_ram_svop_pdp_data_v(data)) {
1323 data = set_field(data, fuse_ctrl_opt_ram_svop_pdp_data_m(),
1324 fuse_ctrl_opt_ram_svop_pdp_data_f(0x2));
1325 gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_r(), data);
1326 data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_override_r());
1327 data = set_field(data,
1328 fuse_ctrl_opt_ram_svop_pdp_override_data_m(),
1329 fuse_ctrl_opt_ram_svop_pdp_override_data_yes_f());
1330 gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_override_r(), data);
1331 }
1332
1333 /* Disable idle slow down */
1334 data = gk20a_readl(g, therm_clk_slowdown_r(0));
1335 data = set_field(data, therm_clk_slowdown_idle_factor_m(),
1336 therm_clk_slowdown_idle_factor_disabled_f());
1337 gk20a_writel(g, therm_clk_slowdown_r(0), data);
1338 gk20a_readl(g, therm_clk_slowdown_r(0));
1339
1340 if (g->clk.gpc_pll.mode == GPC_PLL_MODE_DVFS)
1341 return clk_enbale_pll_dvfs(g);
1342
1343 return 0;
1344}
1345
1346static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
1347{
1348 struct clk_gk20a *clk = &g->clk;
1349
1350 if (freq > gpc_pll_params.max_freq)
1351 freq = gpc_pll_params.max_freq;
1352 else if (freq < gpc_pll_params.min_freq)
1353 freq = gpc_pll_params.min_freq;
1354
1355 if (freq != old_freq) {
1356 /* gpc_pll.freq is changed to new value here */
1357 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
1358 &freq, true)) {
1359 nvgpu_err(g, "failed to set pll target for %d", freq);
1360 return -EINVAL;
1361 }
1362 }
1363 return 0;
1364}
1365
1366static int set_pll_freq(struct gk20a *g, int allow_slide)
1367{
1368 struct clk_gk20a *clk = &g->clk;
1369 int err = 0;
1370
1371 gk20a_dbg_fn("last freq: %dMHz, target freq %dMHz",
1372 clk->gpc_pll_last.freq, clk->gpc_pll.freq);
1373
1374 /* If programming with dynamic sliding failed, re-try under bypass */
1375 if (clk->gpc_pll.mode == GPC_PLL_MODE_DVFS) {
1376 err = clk_program_na_gpc_pll(g, &clk->gpc_pll, allow_slide);
1377 if (err && allow_slide)
1378 err = clk_program_na_gpc_pll(g, &clk->gpc_pll, 0);
1379 } else {
1380 err = clk_program_gpc_pll(g, &clk->gpc_pll, allow_slide);
1381 if (err && allow_slide)
1382 err = clk_program_gpc_pll(g, &clk->gpc_pll, 0);
1383 }
1384
1385 if (!err) {
1386 clk->gpc_pll.enabled = true;
1387 clk->gpc_pll_last = clk->gpc_pll;
1388 return 0;
1389 }
1390
1391 /*
1392 * Just report error but not restore PLL since dvfs could already change
1393 * voltage even when programming failed.
1394 */
1395 nvgpu_err(g, "failed to set pll to %d", clk->gpc_pll.freq);
1396 return err;
1397}
1398
1399int gm20b_init_clk_support(struct gk20a *g)
1400{
1401 struct clk_gk20a *clk = &g->clk;
1402 u32 err;
1403
1404 gk20a_dbg_fn("");
1405
1406 nvgpu_mutex_acquire(&clk->clk_mutex);
1407 clk->clk_hw_on = true;
1408
1409 err = gm20b_init_clk_setup_hw(g);
1410 nvgpu_mutex_release(&clk->clk_mutex);
1411 if (err)
1412 return err;
1413
1414 /* FIXME: this effectively prevents host level clock gating */
1415 err = g->ops.clk.prepare_enable(&g->clk);
1416 if (err)
1417 return err;
1418
1419 /* The prev call may not enable PLL if gbus is unbalanced - force it */
1420 nvgpu_mutex_acquire(&clk->clk_mutex);
1421 if (!clk->gpc_pll.enabled)
1422 err = set_pll_freq(g, 1);
1423 nvgpu_mutex_release(&clk->clk_mutex);
1424 if (err)
1425 return err;
1426
1427 if (!clk->debugfs_set && g->ops.clk.init_debugfs) {
1428 err = g->ops.clk.init_debugfs(g);
1429 if (err)
1430 return err;
1431 clk->debugfs_set = true;
1432 }
1433
1434 return err;
1435}
1436
1437int gm20b_suspend_clk_support(struct gk20a *g)
1438{
1439 int ret = 0;
1440
1441 g->ops.clk.disable_unprepare(&g->clk);
1442
1443 /* The prev call may not disable PLL if gbus is unbalanced - force it */
1444 nvgpu_mutex_acquire(&g->clk.clk_mutex);
1445 if (g->clk.gpc_pll.enabled)
1446 ret = clk_disable_gpcpll(g, 1);
1447 g->clk.clk_hw_on = false;
1448 nvgpu_mutex_release(&g->clk.clk_mutex);
1449
1450 nvgpu_mutex_destroy(&g->clk.clk_mutex);
1451
1452 return ret;
1453}
1454
1455int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val)
1456{
1457 struct gk20a *g = clk->g;
1458 struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
1459 u32 det_out;
1460 int err;
1461
1462 if (clk->gpc_pll.mode != GPC_PLL_MODE_DVFS)
1463 return -ENOSYS;
1464
1465 err = gk20a_busy(g);
1466 if (err)
1467 return err;
1468
1469 nvgpu_mutex_acquire(&g->clk.clk_mutex);
1470
1471 det_out = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
1472 det_out = trim_sys_gpcpll_cfg3_dfs_testout_v(det_out);
1473 *val = div64_u64((u64)det_out * gpc_pll_params->uvdet_slope +
1474 gpc_pll_params->uvdet_offs, 1000ULL);
1475
1476 nvgpu_mutex_release(&g->clk.clk_mutex);
1477
1478 gk20a_idle(g);
1479 return 0;
1480}
1481
1482int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val)
1483{
1484 struct gk20a *g = clk->g;
1485 u32 clk_slowdown, clk_slowdown_save;
1486 int err;
1487
1488 u32 ncycle = 800; /* count GPCCLK for ncycle of clkin */
1489 u64 freq = clk->gpc_pll.clk_in;
1490 u32 count1, count2;
1491
1492 err = gk20a_busy(g);
1493 if (err)
1494 return err;
1495
1496 nvgpu_mutex_acquire(&g->clk.clk_mutex);
1497
1498 /* Disable clock slowdown during measurements */
1499 clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
1500 clk_slowdown = set_field(clk_slowdown_save,
1501 therm_clk_slowdown_idle_factor_m(),
1502 therm_clk_slowdown_idle_factor_disabled_f());
1503 gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
1504 gk20a_readl(g, therm_clk_slowdown_r(0));
1505
1506 gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1507 trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
1508 gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1509 trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
1510 trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
1511 trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
1512 /* start */
1513
1514 /* It should take less than 25us to finish 800 cycle of 38.4MHz.
1515 * But longer than 100us delay is required here.
1516 */
1517 gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
1518 nvgpu_udelay(200);
1519
1520 count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1521 nvgpu_udelay(100);
1522 count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1523 freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
1524 do_div(freq, ncycle);
1525 *val = freq;
1526
1527 /* Restore clock slowdown */
1528 gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
1529 nvgpu_mutex_release(&g->clk.clk_mutex);
1530
1531 gk20a_idle(g);
1532
1533 if (count1 != count2)
1534 return -EBUSY;
1535
1536 return 0;
1537}
1538
1539int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val)
1540{
1541 if (((reg < trim_sys_gpcpll_cfg_r()) ||
1542 (reg > trim_sys_gpcpll_dvfs2_r())) &&
1543 (reg != trim_sys_sel_vco_r()) &&
1544 (reg != trim_sys_gpc2clk_out_r()) &&
1545 (reg != trim_sys_bypassctrl_r()))
1546 return -EPERM;
1547
1548 if (reg == trim_sys_gpcpll_dvfs2_r())
1549 reg = trim_gpc_bcast_gpcpll_dvfs2_r();
1550
1551 nvgpu_mutex_acquire(&g->clk.clk_mutex);
1552 if (!g->clk.clk_hw_on) {
1553 nvgpu_mutex_release(&g->clk.clk_mutex);
1554 return -EINVAL;
1555 }
1556 gk20a_writel(g, reg, val);
1557 nvgpu_mutex_release(&g->clk.clk_mutex);
1558
1559 return 0;
1560}
1561
1562int gm20b_clk_get_pll_debug_data(struct gk20a *g,
1563 struct nvgpu_clk_pll_debug_data *d)
1564{
1565 u32 reg;
1566
1567 nvgpu_mutex_acquire(&g->clk.clk_mutex);
1568 if (!g->clk.clk_hw_on) {
1569 nvgpu_mutex_release(&g->clk.clk_mutex);
1570 return -EINVAL;
1571 }
1572
1573 d->trim_sys_bypassctrl_reg = trim_sys_bypassctrl_r();
1574 d->trim_sys_bypassctrl_val = gk20a_readl(g, trim_sys_bypassctrl_r());
1575 d->trim_sys_sel_vco_reg = trim_sys_sel_vco_r();
1576 d->trim_sys_sel_vco_val = gk20a_readl(g, trim_sys_sel_vco_r());
1577 d->trim_sys_gpc2clk_out_reg = trim_sys_gpc2clk_out_r();
1578 d->trim_sys_gpc2clk_out_val = gk20a_readl(g, trim_sys_gpc2clk_out_r());
1579 d->trim_sys_gpcpll_cfg_reg = trim_sys_gpcpll_cfg_r();
1580 d->trim_sys_gpcpll_dvfs2_reg = trim_gpc_bcast_gpcpll_dvfs2_r();
1581
1582 reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1583 d->trim_sys_gpcpll_cfg_val = reg;
1584 d->trim_sys_gpcpll_cfg_enabled = trim_sys_gpcpll_cfg_enable_v(reg);
1585 d->trim_sys_gpcpll_cfg_locked = trim_sys_gpcpll_cfg_pll_lock_v(reg);
1586 d->trim_sys_gpcpll_cfg_sync_on = trim_sys_gpcpll_cfg_sync_mode_v(reg);
1587
1588 reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
1589 d->trim_sys_gpcpll_coeff_val = reg;
1590 d->trim_sys_gpcpll_coeff_mdiv = trim_sys_gpcpll_coeff_mdiv_v(reg);
1591 d->trim_sys_gpcpll_coeff_ndiv = trim_sys_gpcpll_coeff_ndiv_v(reg);
1592 d->trim_sys_gpcpll_coeff_pldiv = trim_sys_gpcpll_coeff_pldiv_v(reg);
1593
1594 reg = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
1595 d->trim_sys_gpcpll_dvfs0_val = reg;
1596 d->trim_sys_gpcpll_dvfs0_dfs_coeff =
1597 trim_sys_gpcpll_dvfs0_dfs_coeff_v(reg);
1598 d->trim_sys_gpcpll_dvfs0_dfs_det_max =
1599 trim_sys_gpcpll_dvfs0_dfs_det_max_v(reg);
1600 d->trim_sys_gpcpll_dvfs0_dfs_dc_offset =
1601 trim_sys_gpcpll_dvfs0_dfs_dc_offset_v(reg);
1602
1603 nvgpu_mutex_release(&g->clk.clk_mutex);
1604 return 0;
1605}
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
new file mode 100644
index 00000000..e814ac70
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
@@ -0,0 +1,95 @@
1/*
2 * GM20B Graphics
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef _NVHOST_CLK_GM20B_H_
25#define _NVHOST_CLK_GM20B_H_
26
27#include <nvgpu/lock.h>
28
29struct gk20a;
30struct clk_gk20a;
31
32struct nvgpu_clk_pll_debug_data {
33 u32 trim_sys_sel_vco_reg;
34 u32 trim_sys_sel_vco_val;
35
36 u32 trim_sys_gpc2clk_out_reg;
37 u32 trim_sys_gpc2clk_out_val;
38
39 u32 trim_sys_bypassctrl_reg;
40 u32 trim_sys_bypassctrl_val;
41
42 u32 trim_sys_gpcpll_cfg_reg;
43 u32 trim_sys_gpcpll_dvfs2_reg;
44
45 u32 trim_sys_gpcpll_cfg_val;
46 bool trim_sys_gpcpll_cfg_enabled;
47 bool trim_sys_gpcpll_cfg_locked;
48 bool trim_sys_gpcpll_cfg_sync_on;
49
50 u32 trim_sys_gpcpll_coeff_val;
51 u32 trim_sys_gpcpll_coeff_mdiv;
52 u32 trim_sys_gpcpll_coeff_ndiv;
53 u32 trim_sys_gpcpll_coeff_pldiv;
54
55 u32 trim_sys_gpcpll_dvfs0_val;
56 u32 trim_sys_gpcpll_dvfs0_dfs_coeff;
57 u32 trim_sys_gpcpll_dvfs0_dfs_det_max;
58 u32 trim_sys_gpcpll_dvfs0_dfs_dc_offset;
59};
60
61int gm20b_init_clk_setup_sw(struct gk20a *g);
62
63int gm20b_clk_prepare(struct clk_gk20a *clk);
64void gm20b_clk_unprepare(struct clk_gk20a *clk);
65int gm20b_clk_is_prepared(struct clk_gk20a *clk);
66unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate);
67int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate,
68 unsigned long parent_rate);
69long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate,
70 unsigned long *parent_rate);
71struct pll_parms *gm20b_get_gpc_pll_parms(void);
72#ifdef CONFIG_DEBUG_FS
73int gm20b_clk_init_debugfs(struct gk20a *g);
74#endif
75
76int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val);
77int gm20b_init_clk_support(struct gk20a *g);
78int gm20b_suspend_clk_support(struct gk20a *g);
79int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val);
80int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val);
81int gm20b_clk_get_pll_debug_data(struct gk20a *g,
82 struct nvgpu_clk_pll_debug_data *d);
83
84/* 1:1 match between post divider settings and divisor value */
85static inline u32 nvgpu_pl_to_div(u32 pl)
86{
87 return pl;
88}
89
90static inline u32 nvgpu_div_to_pl(u32 div)
91{
92 return div;
93}
94
95#endif /* _NVHOST_CLK_GM20B_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
new file mode 100644
index 00000000..1f8cc326
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -0,0 +1,195 @@
1/*
2 * GM20B GPC MMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5*
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "gk20a/fb_gk20a.h"
27#include "gm20b/fb_gm20b.h"
28
29#include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
30#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
31#include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
32#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
33
34#define VPR_INFO_FETCH_WAIT (5)
35#define WPR_INFO_ADDR_ALIGNMENT 0x0000000c
36
37void fb_gm20b_init_fs_state(struct gk20a *g)
38{
39 gk20a_dbg_info("initialize gm20b fb");
40
41 gk20a_writel(g, fb_fbhub_num_active_ltcs_r(),
42 g->ltc_count);
43}
44
45void gm20b_fb_set_mmu_page_size(struct gk20a *g)
46{
47 /* set large page size in fb */
48 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
49 fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f();
50 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
51}
52
53bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g)
54{
55 /* set large page size in fb */
56 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
57 fb_mmu_ctrl |= fb_mmu_ctrl_use_full_comp_tag_line_true_f();
58 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
59
60 return true;
61}
62
63unsigned int gm20b_fb_compression_page_size(struct gk20a *g)
64{
65 return SZ_128K;
66}
67
68unsigned int gm20b_fb_compressible_page_size(struct gk20a *g)
69{
70 return SZ_64K;
71}
72
73void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g)
74{
75 u32 val;
76
77 /* print vpr and wpr info */
78 val = gk20a_readl(g, fb_mmu_vpr_info_r());
79 val &= ~0x3;
80 val |= fb_mmu_vpr_info_index_addr_lo_v();
81 gk20a_writel(g, fb_mmu_vpr_info_r(), val);
82 nvgpu_err(g, "VPR: %08x %08x %08x %08x",
83 gk20a_readl(g, fb_mmu_vpr_info_r()),
84 gk20a_readl(g, fb_mmu_vpr_info_r()),
85 gk20a_readl(g, fb_mmu_vpr_info_r()),
86 gk20a_readl(g, fb_mmu_vpr_info_r()));
87
88 val = gk20a_readl(g, fb_mmu_wpr_info_r());
89 val &= ~0xf;
90 val |= (fb_mmu_wpr_info_index_allow_read_v());
91 gk20a_writel(g, fb_mmu_wpr_info_r(), val);
92 nvgpu_err(g, "WPR: %08x %08x %08x %08x %08x %08x",
93 gk20a_readl(g, fb_mmu_wpr_info_r()),
94 gk20a_readl(g, fb_mmu_wpr_info_r()),
95 gk20a_readl(g, fb_mmu_wpr_info_r()),
96 gk20a_readl(g, fb_mmu_wpr_info_r()),
97 gk20a_readl(g, fb_mmu_wpr_info_r()),
98 gk20a_readl(g, fb_mmu_wpr_info_r()));
99
100}
101
102static int gm20b_fb_vpr_info_fetch_wait(struct gk20a *g,
103 unsigned int msec)
104{
105 struct nvgpu_timeout timeout;
106
107 nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER);
108
109 do {
110 u32 val;
111
112 val = gk20a_readl(g, fb_mmu_vpr_info_r());
113 if (fb_mmu_vpr_info_fetch_v(val) ==
114 fb_mmu_vpr_info_fetch_false_v())
115 return 0;
116
117 } while (!nvgpu_timeout_expired(&timeout));
118
119 return -ETIMEDOUT;
120}
121
122int gm20b_fb_vpr_info_fetch(struct gk20a *g)
123{
124 if (gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
125 return -ETIME;
126 }
127
128 gk20a_writel(g, fb_mmu_vpr_info_r(),
129 fb_mmu_vpr_info_fetch_true_v());
130
131 return gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
132}
133
134void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
135{
136 u32 val = 0;
137 u64 wpr_start = 0;
138 u64 wpr_end = 0;
139
140 val = gk20a_readl(g, fb_mmu_wpr_info_r());
141 val &= ~0xF;
142 val |= fb_mmu_wpr_info_index_wpr1_addr_lo_v();
143 gk20a_writel(g, fb_mmu_wpr_info_r(), val);
144
145 val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4;
146 wpr_start = hi32_lo32_to_u64(
147 (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)),
148 (val << WPR_INFO_ADDR_ALIGNMENT));
149
150 val = gk20a_readl(g, fb_mmu_wpr_info_r());
151 val &= ~0xF;
152 val |= fb_mmu_wpr_info_index_wpr1_addr_hi_v();
153 gk20a_writel(g, fb_mmu_wpr_info_r(), val);
154
155 val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4;
156 wpr_end = hi32_lo32_to_u64(
157 (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)),
158 (val << WPR_INFO_ADDR_ALIGNMENT));
159
160 inf->wpr_base = wpr_start;
161 inf->nonwpr_base = 0;
162 inf->size = (wpr_end - wpr_start);
163}
164
165bool gm20b_fb_debug_mode_enabled(struct gk20a *g)
166{
167 u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
168 return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) ==
169 gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
170}
171
172void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable)
173{
174 u32 reg_val, fb_debug_ctrl, gpc_debug_ctrl;
175
176 if (enable) {
177 fb_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
178 gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f();
179 g->mmu_debug_ctrl = true;
180 } else {
181 fb_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
182 gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f();
183 g->mmu_debug_ctrl = false;
184 }
185
186 reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
187 reg_val = set_field(reg_val,
188 fb_mmu_debug_ctrl_debug_m(), fb_debug_ctrl);
189 gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
190
191 reg_val = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
192 reg_val = set_field(reg_val,
193 gr_gpcs_pri_mmu_debug_ctrl_debug_m(), gpc_debug_ctrl);
194 gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), reg_val);
195}
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h
new file mode 100644
index 00000000..32d36f57
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h
@@ -0,0 +1,40 @@
1/*
2 * GM20B FB
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_FB
26#define _NVHOST_GM20B_FB
27struct gk20a;
28
29void fb_gm20b_init_fs_state(struct gk20a *g);
30void gm20b_fb_set_mmu_page_size(struct gk20a *g);
31bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g);
32unsigned int gm20b_fb_compression_page_size(struct gk20a *g);
33unsigned int gm20b_fb_compressible_page_size(struct gk20a *g);
34void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g);
35void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf);
36int gm20b_fb_vpr_info_fetch(struct gk20a *g);
37bool gm20b_fb_debug_mode_enabled(struct gk20a *g);
38void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable);
39
40#endif
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
new file mode 100644
index 00000000..0762e8bd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -0,0 +1,223 @@
1/*
2 * GM20B Fifo
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "gk20a/fifo_gk20a.h"
27
28#include "fifo_gm20b.h"
29
30#include <nvgpu/timers.h>
31#include <nvgpu/log.h>
32#include <nvgpu/atomic.h>
33#include <nvgpu/barrier.h>
34#include <nvgpu/mm.h>
35
36#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
37#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
38#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
39#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
40#include <nvgpu/hw/gm20b/hw_pbdma_gm20b.h>
41
42void channel_gm20b_bind(struct channel_gk20a *c)
43{
44 struct gk20a *g = c->g;
45
46 u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block)
47 >> ram_in_base_shift_v();
48
49 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
50 c->chid, inst_ptr);
51
52
53 gk20a_writel(g, ccsr_channel_inst_r(c->chid),
54 ccsr_channel_inst_ptr_f(inst_ptr) |
55 nvgpu_aperture_mask(g, &c->inst_block,
56 ccsr_channel_inst_target_sys_mem_ncoh_f(),
57 ccsr_channel_inst_target_vid_mem_f()) |
58 ccsr_channel_inst_bind_true_f());
59
60 gk20a_writel(g, ccsr_channel_r(c->chid),
61 (gk20a_readl(g, ccsr_channel_r(c->chid)) &
62 ~ccsr_channel_enable_set_f(~0)) |
63 ccsr_channel_enable_set_true_f());
64 nvgpu_smp_wmb();
65 nvgpu_atomic_set(&c->bound, true);
66}
67
68static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
69{
70 u32 fault_id = ~0;
71 struct fifo_engine_info_gk20a *engine_info;
72
73 engine_info = gk20a_fifo_get_engine_info(g, engine_id);
74
75 if (engine_info) {
76 fault_id = engine_info->fault_id;
77 } else {
78 nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
79 }
80 return fault_id;
81}
82
83void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
84 unsigned long engine_ids)
85{
86 unsigned long delay = GR_IDLE_CHECK_DEFAULT;
87 unsigned long engine_id;
88 int ret = -EBUSY;
89 struct nvgpu_timeout timeout;
90
91 /* trigger faults for all bad engines */
92 for_each_set_bit(engine_id, &engine_ids, 32) {
93 if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
94 nvgpu_err(g, "faulting unknown engine %ld", engine_id);
95 } else {
96 u32 mmu_id = gm20b_engine_id_to_mmu_id(g,
97 engine_id);
98 if (mmu_id != (u32)~0)
99 gk20a_writel(g, fifo_trigger_mmu_fault_r(mmu_id),
100 fifo_trigger_mmu_fault_enable_f(1));
101 }
102 }
103
104 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
105 NVGPU_TIMER_CPU_TIMER);
106
107 /* Wait for MMU fault to trigger */
108 do {
109 if (gk20a_readl(g, fifo_intr_0_r()) &
110 fifo_intr_0_mmu_fault_pending_f()) {
111 ret = 0;
112 break;
113 }
114
115 nvgpu_usleep_range(delay, delay * 2);
116 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
117 } while (!nvgpu_timeout_expired(&timeout));
118
119 if (ret)
120 nvgpu_err(g, "mmu fault timeout");
121
122 /* release mmu fault trigger */
123 for_each_set_bit(engine_id, &engine_ids, 32)
124 gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
125}
126
127u32 gm20b_fifo_get_num_fifos(struct gk20a *g)
128{
129 return ccsr_channel__size_1_v();
130}
131
132void gm20b_device_info_data_parse(struct gk20a *g,
133 u32 table_entry, u32 *inst_id,
134 u32 *pri_base, u32 *fault_id)
135{
136 if (top_device_info_data_type_v(table_entry) ==
137 top_device_info_data_type_enum2_v()) {
138 if (pri_base) {
139 *pri_base =
140 (top_device_info_data_pri_base_v(table_entry)
141 << top_device_info_data_pri_base_align_v());
142 }
143 if (fault_id && (top_device_info_data_fault_id_v(table_entry) ==
144 top_device_info_data_fault_id_valid_v())) {
145 *fault_id =
146 top_device_info_data_fault_id_enum_v(table_entry);
147 }
148 } else
149 nvgpu_err(g, "unknown device_info_data %d",
150 top_device_info_data_type_v(table_entry));
151}
152
153void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)
154{
155 /*
156 * These are all errors which indicate something really wrong
157 * going on in the device.
158 */
159 f->intr.pbdma.device_fatal_0 =
160 pbdma_intr_0_memreq_pending_f() |
161 pbdma_intr_0_memack_timeout_pending_f() |
162 pbdma_intr_0_memack_extra_pending_f() |
163 pbdma_intr_0_memdat_timeout_pending_f() |
164 pbdma_intr_0_memdat_extra_pending_f() |
165 pbdma_intr_0_memflush_pending_f() |
166 pbdma_intr_0_memop_pending_f() |
167 pbdma_intr_0_lbconnect_pending_f() |
168 pbdma_intr_0_lback_timeout_pending_f() |
169 pbdma_intr_0_lback_extra_pending_f() |
170 pbdma_intr_0_lbdat_timeout_pending_f() |
171 pbdma_intr_0_lbdat_extra_pending_f() |
172 pbdma_intr_0_pri_pending_f();
173
174 /*
175 * These are data parsing, framing errors or others which can be
176 * recovered from with intervention... or just resetting the
177 * channel
178 */
179 f->intr.pbdma.channel_fatal_0 =
180 pbdma_intr_0_gpfifo_pending_f() |
181 pbdma_intr_0_gpptr_pending_f() |
182 pbdma_intr_0_gpentry_pending_f() |
183 pbdma_intr_0_gpcrc_pending_f() |
184 pbdma_intr_0_pbptr_pending_f() |
185 pbdma_intr_0_pbentry_pending_f() |
186 pbdma_intr_0_pbcrc_pending_f() |
187 pbdma_intr_0_method_pending_f() |
188 pbdma_intr_0_methodcrc_pending_f() |
189 pbdma_intr_0_pbseg_pending_f() |
190 pbdma_intr_0_signature_pending_f();
191
192 /* Can be used for sw-methods, or represents a recoverable timeout. */
193 f->intr.pbdma.restartable_0 =
194 pbdma_intr_0_device_pending_f();
195}
196
197static void gm20b_fifo_set_ctx_reload(struct channel_gk20a *ch)
198{
199 struct gk20a *g = ch->g;
200 u32 channel = gk20a_readl(g, ccsr_channel_r(ch->chid));
201
202 gk20a_writel(g, ccsr_channel_r(ch->chid),
203 channel | ccsr_channel_force_ctx_reload_true_f());
204}
205
206void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch)
207{
208 struct gk20a *g = ch->g;
209 struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
210 struct channel_gk20a *temp_ch;
211
212 /* If CTX_RELOAD is set on a channel, move it to some other channel */
213 if (gk20a_fifo_channel_status_is_ctx_reload(ch->g, ch->chid)) {
214 nvgpu_rwsem_down_read(&tsg->ch_list_lock);
215 nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, channel_gk20a, ch_entry) {
216 if (temp_ch->chid != ch->chid) {
217 gm20b_fifo_set_ctx_reload(temp_ch);
218 break;
219 }
220 }
221 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
222 }
223}
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h
new file mode 100644
index 00000000..8d487358
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h
@@ -0,0 +1,39 @@
1/*
2 * GM20B Fifo
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_FIFO
26#define _NVHOST_GM20B_FIFO
27struct gk20a;
28
29void channel_gm20b_bind(struct channel_gk20a *c);
30void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
31 unsigned long engine_ids);
32u32 gm20b_fifo_get_num_fifos(struct gk20a *g);
33void gm20b_device_info_data_parse(struct gk20a *g,
34 u32 table_entry, u32 *inst_id,
35 u32 *pri_base, u32 *fault_id);
36void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f);
37void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch);
38
39#endif
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c
new file mode 100644
index 00000000..0ebb2d0d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c
@@ -0,0 +1,731 @@
1/*
2 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 *
22 * This file is autogenerated. Do not edit.
23 */
24
25#ifndef __gm20b_gating_reglist_h__
26#define __gm20b_gating_reglist_h__
27
28#include "gm20b_gating_reglist.h"
29#include <nvgpu/enabled.h>
30
31struct gating_desc {
32 u32 addr;
33 u32 prod;
34 u32 disable;
35};
36/* slcg bus */
37static const struct gating_desc gm20b_slcg_bus[] = {
38 {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000003fe},
39};
40
41/* slcg ce2 */
42static const struct gating_desc gm20b_slcg_ce2[] = {
43 {.addr = 0x00106f28, .prod = 0x00000000, .disable = 0x000007fe},
44};
45
46/* slcg chiplet */
47static const struct gating_desc gm20b_slcg_chiplet[] = {
48 {.addr = 0x0010c07c, .prod = 0x00000000, .disable = 0x00000007},
49 {.addr = 0x0010e07c, .prod = 0x00000000, .disable = 0x00000007},
50 {.addr = 0x0010d07c, .prod = 0x00000000, .disable = 0x00000007},
51 {.addr = 0x0010e17c, .prod = 0x00000000, .disable = 0x00000007},
52};
53
54/* slcg fb */
55static const struct gating_desc gm20b_slcg_fb[] = {
56 {.addr = 0x00100d14, .prod = 0x00000000, .disable = 0xfffffffe},
57 {.addr = 0x00100c9c, .prod = 0x00000000, .disable = 0x000001fe},
58};
59
60/* slcg fifo */
61static const struct gating_desc gm20b_slcg_fifo[] = {
62 {.addr = 0x000026ac, .prod = 0x00000100, .disable = 0x0001fffe},
63};
64
65/* slcg gr */
66static const struct gating_desc gm20b_slcg_gr[] = {
67 {.addr = 0x004041f4, .prod = 0x00000002, .disable = 0x03fffffe},
68 {.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe},
69 {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe},
70 {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
71 {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
72 {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
73 {.addr = 0x00405910, .prod = 0xfffffff0, .disable = 0xfffffffe},
74 {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
75 {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000007e},
76 {.addr = 0x0041a17c, .prod = 0x00020008, .disable = 0x0003fffe},
77 {.addr = 0x0041a894, .prod = 0x00000040, .disable = 0x0003fffe},
78 {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0007fffe},
79 {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
80 {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
81 {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e},
82 {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
83 {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
84 {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
85 {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
86 {.addr = 0x00418c74, .prod = 0xffffffc0, .disable = 0xfffffffe},
87 {.addr = 0x00418cf4, .prod = 0xfffffffc, .disable = 0xfffffffe},
88 {.addr = 0x00418d74, .prod = 0xffffffe0, .disable = 0xfffffffe},
89 {.addr = 0x00418f10, .prod = 0xffffffe0, .disable = 0xfffffffe},
90 {.addr = 0x00418e10, .prod = 0xfffffffe, .disable = 0xfffffffe},
91 {.addr = 0x00419024, .prod = 0x000001fe, .disable = 0x000001fe},
92 {.addr = 0x0041889c, .prod = 0x00000000, .disable = 0x000001fe},
93 {.addr = 0x00419d64, .prod = 0x00000000, .disable = 0x000001ff},
94 {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
95 {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
96 {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
97 {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
98 {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
99 {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
100 {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
101 {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
102 {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
103 {.addr = 0x0041986c, .prod = 0x00000104, .disable = 0x00fffffe},
104 {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
105 {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
106 {.addr = 0x00419c74, .prod = 0x0000001e, .disable = 0x0000001e},
107 {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
108 {.addr = 0x00419fdc, .prod = 0xffedff00, .disable = 0xfffffffe},
109 {.addr = 0x00419fe4, .prod = 0x00001b00, .disable = 0x00001ffe},
110 {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
111 {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
112 {.addr = 0x0041be2c, .prod = 0x04115fc0, .disable = 0xfffffffe},
113 {.addr = 0x0041bfec, .prod = 0xfffffff0, .disable = 0xfffffffe},
114 {.addr = 0x0041bed4, .prod = 0xfffffff6, .disable = 0xfffffffe},
115 {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
116 {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe},
117 {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
118 {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe},
119 {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe},
120 {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe},
121 {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe},
122 {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe},
123 {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
124 {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff},
125};
126
127/* slcg ltc */
128static const struct gating_desc gm20b_slcg_ltc[] = {
129 {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0xfffffffe},
130 {.addr = 0x0017e35c, .prod = 0x00000000, .disable = 0xfffffffe},
131};
132
133/* slcg perf */
134static const struct gating_desc gm20b_slcg_perf[] = {
135 {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
136 {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
137 {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
138 {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
139};
140
141/* slcg PriRing */
142static const struct gating_desc gm20b_slcg_priring[] = {
143 {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
144};
145
146/* slcg pwr_csb */
147static const struct gating_desc gm20b_slcg_pwr_csb[] = {
148 {.addr = 0x0000017c, .prod = 0x00020008, .disable = 0x0003fffe},
149 {.addr = 0x00000e74, .prod = 0x00000000, .disable = 0x0000000f},
150 {.addr = 0x00000a74, .prod = 0x00000000, .disable = 0x00007ffe},
151 {.addr = 0x000016b8, .prod = 0x00000000, .disable = 0x0000000f},
152};
153
154/* slcg pmu */
155static const struct gating_desc gm20b_slcg_pmu[] = {
156 {.addr = 0x0010a17c, .prod = 0x00020008, .disable = 0x0003fffe},
157 {.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe},
158 {.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f},
159};
160
161/* therm gr */
162static const struct gating_desc gm20b_slcg_therm[] = {
163 {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
164};
165
166/* slcg Xbar */
167static const struct gating_desc gm20b_slcg_xbar[] = {
168 {.addr = 0x0013cbe4, .prod = 0x00000000, .disable = 0x1ffffffe},
169 {.addr = 0x0013cc04, .prod = 0x00000000, .disable = 0x1ffffffe},
170};
171
172/* blcg bus */
173static const struct gating_desc gm20b_blcg_bus[] = {
174 {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
175};
176
177/* blcg ctxsw prog */
178static const struct gating_desc gm20b_blcg_ctxsw_prog[] = {
179};
180
181/* blcg fb */
182static const struct gating_desc gm20b_blcg_fb[] = {
183 {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
184 {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
185 {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
186 {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
187 {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000},
188};
189
190/* blcg fifo */
191static const struct gating_desc gm20b_blcg_fifo[] = {
192 {.addr = 0x000026a4, .prod = 0x0000c242, .disable = 0x00000000},
193};
194
195/* blcg gr */
196static const struct gating_desc gm20b_blcg_gr[] = {
197 {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000},
198 {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
199 {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
200 {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000},
201 {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000},
202 {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000},
203 {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000},
204 {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000},
205 {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000},
206 {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000},
207 {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000},
208 {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
209 {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000},
210 {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000},
211 {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000},
212 {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
213 {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000},
214 {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000},
215 {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000},
216 {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000},
217 {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000},
218 {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000},
219 {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000},
220 {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000},
221 {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000},
222 {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
223 {.addr = 0x00418898, .prod = 0x00000042, .disable = 0x00000000},
224 {.addr = 0x00419a40, .prod = 0x00000042, .disable = 0x00000000},
225 {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000},
226 {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000},
227 {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000},
228 {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000},
229 {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000},
230 {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000},
231 {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000},
232 {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000},
233 {.addr = 0x00419868, .prod = 0x00000042, .disable = 0x00000000},
234 {.addr = 0x00419cd4, .prod = 0x00000002, .disable = 0x00000000},
235 {.addr = 0x00419cdc, .prod = 0x00000002, .disable = 0x00000000},
236 {.addr = 0x00419c70, .prod = 0x00004044, .disable = 0x00000000},
237 {.addr = 0x00419fd0, .prod = 0x00004044, .disable = 0x00000000},
238 {.addr = 0x00419fd8, .prod = 0x00004046, .disable = 0x00000000},
239 {.addr = 0x00419fe0, .prod = 0x00004044, .disable = 0x00000000},
240 {.addr = 0x00419fe8, .prod = 0x00000042, .disable = 0x00000000},
241 {.addr = 0x00419ff0, .prod = 0x00004045, .disable = 0x00000000},
242 {.addr = 0x00419ff8, .prod = 0x00000002, .disable = 0x00000000},
243 {.addr = 0x00419f90, .prod = 0x00000002, .disable = 0x00000000},
244 {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000},
245 {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000},
246 {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000},
247 {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000},
248 {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000},
249 {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000},
250 {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000},
251 {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000},
252 {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000},
253 {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000},
254 {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000},
255 {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000},
256 {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000},
257 {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000},
258};
259
260/* blcg ltc */
261static const struct gating_desc gm20b_blcg_ltc[] = {
262 {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
263 {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
264 {.addr = 0x0017e3e0, .prod = 0x00000044, .disable = 0x00000000},
265 {.addr = 0x0017e3c8, .prod = 0x00000044, .disable = 0x00000000},
266};
267
268/* blcg pwr_csb */
269static const struct gating_desc gm20b_blcg_pwr_csb[] = {
270 {.addr = 0x00000a70, .prod = 0x00000045, .disable = 0x00000000},
271};
272
273/* blcg pmu */
274static const struct gating_desc gm20b_blcg_pmu[] = {
275 {.addr = 0x0010aa70, .prod = 0x00000045, .disable = 0x00000000},
276};
277
278/* blcg Xbar */
279static const struct gating_desc gm20b_blcg_xbar[] = {
280 {.addr = 0x0013cbe0, .prod = 0x00000042, .disable = 0x00000000},
281 {.addr = 0x0013cc00, .prod = 0x00000042, .disable = 0x00000000},
282};
283
284/* pg gr */
285static const struct gating_desc gm20b_pg_gr[] = {
286};
287
288/* inline functions */
289void gm20b_slcg_bus_load_gating_prod(struct gk20a *g,
290 bool prod)
291{
292 u32 i;
293 u32 size = sizeof(gm20b_slcg_bus) / sizeof(struct gating_desc);
294
295 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
296 return;
297
298 for (i = 0; i < size; i++) {
299 if (prod)
300 gk20a_writel(g, gm20b_slcg_bus[i].addr,
301 gm20b_slcg_bus[i].prod);
302 else
303 gk20a_writel(g, gm20b_slcg_bus[i].addr,
304 gm20b_slcg_bus[i].disable);
305 }
306}
307
308void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g,
309 bool prod)
310{
311 u32 i;
312 u32 size = sizeof(gm20b_slcg_ce2) / sizeof(struct gating_desc);
313
314 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
315 return;
316
317 for (i = 0; i < size; i++) {
318 if (prod)
319 gk20a_writel(g, gm20b_slcg_ce2[i].addr,
320 gm20b_slcg_ce2[i].prod);
321 else
322 gk20a_writel(g, gm20b_slcg_ce2[i].addr,
323 gm20b_slcg_ce2[i].disable);
324 }
325}
326
327void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g,
328 bool prod)
329{
330 u32 i;
331 u32 size = sizeof(gm20b_slcg_chiplet) / sizeof(struct gating_desc);
332
333 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
334 return;
335
336 for (i = 0; i < size; i++) {
337 if (prod)
338 gk20a_writel(g, gm20b_slcg_chiplet[i].addr,
339 gm20b_slcg_chiplet[i].prod);
340 else
341 gk20a_writel(g, gm20b_slcg_chiplet[i].addr,
342 gm20b_slcg_chiplet[i].disable);
343 }
344}
345
346void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
347 bool prod)
348{
349}
350
351void gm20b_slcg_fb_load_gating_prod(struct gk20a *g,
352 bool prod)
353{
354 u32 i;
355 u32 size = sizeof(gm20b_slcg_fb) / sizeof(struct gating_desc);
356
357 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
358 return;
359
360 for (i = 0; i < size; i++) {
361 if (prod)
362 gk20a_writel(g, gm20b_slcg_fb[i].addr,
363 gm20b_slcg_fb[i].prod);
364 else
365 gk20a_writel(g, gm20b_slcg_fb[i].addr,
366 gm20b_slcg_fb[i].disable);
367 }
368}
369
370void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g,
371 bool prod)
372{
373 u32 i;
374 u32 size = sizeof(gm20b_slcg_fifo) / sizeof(struct gating_desc);
375
376 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
377 return;
378
379 for (i = 0; i < size; i++) {
380 if (prod)
381 gk20a_writel(g, gm20b_slcg_fifo[i].addr,
382 gm20b_slcg_fifo[i].prod);
383 else
384 gk20a_writel(g, gm20b_slcg_fifo[i].addr,
385 gm20b_slcg_fifo[i].disable);
386 }
387}
388
389void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g,
390 bool prod)
391{
392 u32 i;
393 u32 size = sizeof(gm20b_slcg_gr) / sizeof(struct gating_desc);
394
395 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
396 return;
397
398 for (i = 0; i < size; i++) {
399 if (prod)
400 gk20a_writel(g, gm20b_slcg_gr[i].addr,
401 gm20b_slcg_gr[i].prod);
402 else
403 gk20a_writel(g, gm20b_slcg_gr[i].addr,
404 gm20b_slcg_gr[i].disable);
405 }
406}
407
408void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g,
409 bool prod)
410{
411 u32 i;
412 u32 size = sizeof(gm20b_slcg_ltc) / sizeof(struct gating_desc);
413
414 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
415 return;
416
417 for (i = 0; i < size; i++) {
418 if (prod)
419 gk20a_writel(g, gm20b_slcg_ltc[i].addr,
420 gm20b_slcg_ltc[i].prod);
421 else
422 gk20a_writel(g, gm20b_slcg_ltc[i].addr,
423 gm20b_slcg_ltc[i].disable);
424 }
425}
426
427void gm20b_slcg_perf_load_gating_prod(struct gk20a *g,
428 bool prod)
429{
430 u32 i;
431 u32 size = sizeof(gm20b_slcg_perf) / sizeof(struct gating_desc);
432
433 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
434 return;
435
436 for (i = 0; i < size; i++) {
437 if (prod)
438 gk20a_writel(g, gm20b_slcg_perf[i].addr,
439 gm20b_slcg_perf[i].prod);
440 else
441 gk20a_writel(g, gm20b_slcg_perf[i].addr,
442 gm20b_slcg_perf[i].disable);
443 }
444}
445
446void gm20b_slcg_priring_load_gating_prod(struct gk20a *g,
447 bool prod)
448{
449 u32 i;
450 u32 size = sizeof(gm20b_slcg_priring) / sizeof(struct gating_desc);
451
452 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
453 return;
454
455 for (i = 0; i < size; i++) {
456 if (prod)
457 gk20a_writel(g, gm20b_slcg_priring[i].addr,
458 gm20b_slcg_priring[i].prod);
459 else
460 gk20a_writel(g, gm20b_slcg_priring[i].addr,
461 gm20b_slcg_priring[i].disable);
462 }
463}
464
465void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
466 bool prod)
467{
468 u32 i;
469 u32 size = sizeof(gm20b_slcg_pwr_csb) / sizeof(struct gating_desc);
470
471 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
472 return;
473
474 for (i = 0; i < size; i++) {
475 if (prod)
476 gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr,
477 gm20b_slcg_pwr_csb[i].prod);
478 else
479 gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr,
480 gm20b_slcg_pwr_csb[i].disable);
481 }
482}
483
484void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g,
485 bool prod)
486{
487 u32 i;
488 u32 size = sizeof(gm20b_slcg_pmu) / sizeof(struct gating_desc);
489
490 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
491 return;
492
493 for (i = 0; i < size; i++) {
494 if (prod)
495 gk20a_writel(g, gm20b_slcg_pmu[i].addr,
496 gm20b_slcg_pmu[i].prod);
497 else
498 gk20a_writel(g, gm20b_slcg_pmu[i].addr,
499 gm20b_slcg_pmu[i].disable);
500 }
501}
502
503void gm20b_slcg_therm_load_gating_prod(struct gk20a *g,
504 bool prod)
505{
506 u32 i;
507 u32 size = sizeof(gm20b_slcg_therm) / sizeof(struct gating_desc);
508
509 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
510 return;
511
512 for (i = 0; i < size; i++) {
513 if (prod)
514 gk20a_writel(g, gm20b_slcg_therm[i].addr,
515 gm20b_slcg_therm[i].prod);
516 else
517 gk20a_writel(g, gm20b_slcg_therm[i].addr,
518 gm20b_slcg_therm[i].disable);
519 }
520}
521
522void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g,
523 bool prod)
524{
525 u32 i;
526 u32 size = sizeof(gm20b_slcg_xbar) / sizeof(struct gating_desc);
527
528 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
529 return;
530
531 for (i = 0; i < size; i++) {
532 if (prod)
533 gk20a_writel(g, gm20b_slcg_xbar[i].addr,
534 gm20b_slcg_xbar[i].prod);
535 else
536 gk20a_writel(g, gm20b_slcg_xbar[i].addr,
537 gm20b_slcg_xbar[i].disable);
538 }
539}
540
541void gm20b_blcg_bus_load_gating_prod(struct gk20a *g,
542 bool prod)
543{
544 u32 i;
545 u32 size = sizeof(gm20b_blcg_bus) / sizeof(struct gating_desc);
546
547 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
548 return;
549
550 for (i = 0; i < size; i++) {
551 if (prod)
552 gk20a_writel(g, gm20b_blcg_bus[i].addr,
553 gm20b_blcg_bus[i].prod);
554 else
555 gk20a_writel(g, gm20b_blcg_bus[i].addr,
556 gm20b_blcg_bus[i].disable);
557 }
558}
559
560void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
561 bool prod)
562{
563 u32 i;
564 u32 size = sizeof(gm20b_blcg_ctxsw_prog) / sizeof(struct gating_desc);
565
566 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
567 return;
568
569 for (i = 0; i < size; i++) {
570 if (prod)
571 gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr,
572 gm20b_blcg_ctxsw_prog[i].prod);
573 else
574 gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr,
575 gm20b_blcg_ctxsw_prog[i].disable);
576 }
577}
578
579void gm20b_blcg_fb_load_gating_prod(struct gk20a *g,
580 bool prod)
581{
582 u32 i;
583 u32 size = sizeof(gm20b_blcg_fb) / sizeof(struct gating_desc);
584
585 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
586 return;
587
588 for (i = 0; i < size; i++) {
589 if (prod)
590 gk20a_writel(g, gm20b_blcg_fb[i].addr,
591 gm20b_blcg_fb[i].prod);
592 else
593 gk20a_writel(g, gm20b_blcg_fb[i].addr,
594 gm20b_blcg_fb[i].disable);
595 }
596}
597
598void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g,
599 bool prod)
600{
601 u32 i;
602 u32 size = sizeof(gm20b_blcg_fifo) / sizeof(struct gating_desc);
603
604 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
605 return;
606
607 for (i = 0; i < size; i++) {
608 if (prod)
609 gk20a_writel(g, gm20b_blcg_fifo[i].addr,
610 gm20b_blcg_fifo[i].prod);
611 else
612 gk20a_writel(g, gm20b_blcg_fifo[i].addr,
613 gm20b_blcg_fifo[i].disable);
614 }
615}
616
617void gm20b_blcg_gr_load_gating_prod(struct gk20a *g,
618 bool prod)
619{
620 u32 i;
621 u32 size = sizeof(gm20b_blcg_gr) / sizeof(struct gating_desc);
622
623 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
624 return;
625
626 for (i = 0; i < size; i++) {
627 if (prod)
628 gk20a_writel(g, gm20b_blcg_gr[i].addr,
629 gm20b_blcg_gr[i].prod);
630 else
631 gk20a_writel(g, gm20b_blcg_gr[i].addr,
632 gm20b_blcg_gr[i].disable);
633 }
634}
635
636void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g,
637 bool prod)
638{
639 u32 i;
640 u32 size = sizeof(gm20b_blcg_ltc) / sizeof(struct gating_desc);
641
642 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
643 return;
644
645 for (i = 0; i < size; i++) {
646 if (prod)
647 gk20a_writel(g, gm20b_blcg_ltc[i].addr,
648 gm20b_blcg_ltc[i].prod);
649 else
650 gk20a_writel(g, gm20b_blcg_ltc[i].addr,
651 gm20b_blcg_ltc[i].disable);
652 }
653}
654
655void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
656 bool prod)
657{
658 u32 i;
659 u32 size = sizeof(gm20b_blcg_pwr_csb) / sizeof(struct gating_desc);
660
661 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
662 return;
663
664 for (i = 0; i < size; i++) {
665 if (prod)
666 gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr,
667 gm20b_blcg_pwr_csb[i].prod);
668 else
669 gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr,
670 gm20b_blcg_pwr_csb[i].disable);
671 }
672}
673
674void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g,
675 bool prod)
676{
677 u32 i;
678 u32 size = sizeof(gm20b_blcg_pmu) / sizeof(struct gating_desc);
679
680 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
681 return;
682
683 for (i = 0; i < size; i++) {
684 if (prod)
685 gk20a_writel(g, gm20b_blcg_pmu[i].addr,
686 gm20b_blcg_pmu[i].prod);
687 else
688 gk20a_writel(g, gm20b_blcg_pmu[i].addr,
689 gm20b_blcg_pmu[i].disable);
690 }
691}
692
693void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g,
694 bool prod)
695{
696 u32 i;
697 u32 size = sizeof(gm20b_blcg_xbar) / sizeof(struct gating_desc);
698
699 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
700 return;
701
702 for (i = 0; i < size; i++) {
703 if (prod)
704 gk20a_writel(g, gm20b_blcg_xbar[i].addr,
705 gm20b_blcg_xbar[i].prod);
706 else
707 gk20a_writel(g, gm20b_blcg_xbar[i].addr,
708 gm20b_blcg_xbar[i].disable);
709 }
710}
711
712void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g,
713 bool prod)
714{
715 u32 i;
716 u32 size = sizeof(gm20b_pg_gr) / sizeof(struct gating_desc);
717
718 if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
719 return;
720
721 for (i = 0; i < size; i++) {
722 if (prod)
723 gk20a_writel(g, gm20b_pg_gr[i].addr,
724 gm20b_pg_gr[i].prod);
725 else
726 gk20a_writel(g, gm20b_pg_gr[i].addr,
727 gm20b_pg_gr[i].disable);
728 }
729}
730
731#endif /* __gm20b_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h
new file mode 100644
index 00000000..557f5689
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h
@@ -0,0 +1,100 @@
1/*
2 * drivers/video/tegra/host/gm20b/gm20b_gating_reglist.h
3 *
4 * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * This file is autogenerated. Do not edit.
25 */
26
27#include "gk20a/gk20a.h"
28
29void gm20b_slcg_bus_load_gating_prod(struct gk20a *g,
30 bool prod);
31
32void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g,
33 bool prod);
34
35void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g,
36 bool prod);
37
38void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
39 bool prod);
40
41void gm20b_slcg_fb_load_gating_prod(struct gk20a *g,
42 bool prod);
43
44void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g,
45 bool prod);
46
47void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g,
48 bool prod);
49
50void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g,
51 bool prod);
52
53void gm20b_slcg_perf_load_gating_prod(struct gk20a *g,
54 bool prod);
55
56void gm20b_slcg_priring_load_gating_prod(struct gk20a *g,
57 bool prod);
58
59void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
60 bool prod);
61
62void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g,
63 bool prod);
64
65void gm20b_slcg_therm_load_gating_prod(struct gk20a *g,
66 bool prod);
67
68void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g,
69 bool prod);
70
71void gm20b_blcg_bus_load_gating_prod(struct gk20a *g,
72 bool prod);
73
74void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
75 bool prod);
76
77void gm20b_blcg_fb_load_gating_prod(struct gk20a *g,
78 bool prod);
79
80void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g,
81 bool prod);
82
83void gm20b_blcg_gr_load_gating_prod(struct gk20a *g,
84 bool prod);
85
86void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g,
87 bool prod);
88
89void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
90 bool prod);
91
92void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g,
93 bool prod);
94
95void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g,
96 bool prod);
97
98void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g,
99 bool prod);
100
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
new file mode 100644
index 00000000..cd7433b3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
@@ -0,0 +1,72 @@
1/*
2 * drivers/video/tegra/host/gm20b/gr_ctx_gm20b.c
3 *
4 * GM20B Graphics Context
5 *
6 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27#include "gk20a/gk20a.h"
28#include "gr_ctx_gm20b.h"
29
30int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name)
31{
32 switch (index) {
33#ifdef GM20B_NETLIST_IMAGE_FW_NAME
34 case NETLIST_FINAL:
35 sprintf(name, GM20B_NETLIST_IMAGE_FW_NAME);
36 return 0;
37#endif
38#ifdef GK20A_NETLIST_IMAGE_A
39 case NETLIST_SLOT_A:
40 sprintf(name, GK20A_NETLIST_IMAGE_A);
41 return 0;
42#endif
43#ifdef GK20A_NETLIST_IMAGE_B
44 case NETLIST_SLOT_B:
45 sprintf(name, GK20A_NETLIST_IMAGE_B);
46 return 0;
47#endif
48#ifdef GK20A_NETLIST_IMAGE_C
49 case NETLIST_SLOT_C:
50 sprintf(name, GK20A_NETLIST_IMAGE_C);
51 return 0;
52#endif
53#ifdef GK20A_NETLIST_IMAGE_D
54 case NETLIST_SLOT_D:
55 sprintf(name, GK20A_NETLIST_IMAGE_D);
56 return 0;
57#endif
58 default:
59 return -1;
60 }
61
62 return -1;
63}
64
65bool gr_gm20b_is_firmware_defined(void)
66{
67#ifdef GM20B_NETLIST_IMAGE_FW_NAME
68 return true;
69#else
70 return false;
71#endif
72}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h
new file mode 100644
index 00000000..8712b353
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h
@@ -0,0 +1,36 @@
1/*
2 * GM20B Graphics Context
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef __GR_CTX_GM20B_H__
25#define __GR_CTX_GM20B_H__
26
27#include "gk20a/gr_ctx_gk20a.h"
28
29/* production netlist, one and only one from below */
30/*#undef GM20B_NETLIST_IMAGE_FW_NAME*/
31#define GM20B_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B
32
33int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name);
34bool gr_gm20b_is_firmware_defined(void);
35
36#endif /*__GR_CTX_GM20B_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
new file mode 100644
index 00000000..ef46c1ee
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -0,0 +1,1527 @@
1/*
2 * GM20B GPC MMU
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <uapi/linux/nvgpu.h>
26
27#include <nvgpu/kmem.h>
28#include <nvgpu/log.h>
29#include <nvgpu/enabled.h>
30#include <nvgpu/debug.h>
31#include <nvgpu/fuse.h>
32
33#include "gk20a/gk20a.h"
34#include "gk20a/gr_gk20a.h"
35#include "common/linux/os_linux.h"
36
37#include "gr_gm20b.h"
38#include "pmu_gm20b.h"
39
40#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
41#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
42#include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
43#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
44#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
45#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
46#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
47
48void gr_gm20b_init_gpc_mmu(struct gk20a *g)
49{
50 u32 temp;
51
52 gk20a_dbg_info("initialize gpc mmu");
53
54 if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
55 /* Bypass MMU check for non-secure boot. For
56 * secure-boot,this register write has no-effect */
57 gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
58 }
59 temp = gk20a_readl(g, fb_mmu_ctrl_r());
60 temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
61 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
62 gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() |
63 gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
64 gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
65 gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
66 gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
67 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
68 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
69 gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
70 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
71 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
72 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
73
74 gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
75 gk20a_readl(g, fb_mmu_debug_ctrl_r()));
76 gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
77 gk20a_readl(g, fb_mmu_debug_wr_r()));
78 gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
79 gk20a_readl(g, fb_mmu_debug_rd_r()));
80
81 gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(),
82 gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
83}
84
85void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
86{
87 struct gr_gk20a *gr = &g->gr;
88
89 gr->bundle_cb_default_size =
90 gr_scc_bundle_cb_size_div_256b__prod_v();
91 gr->min_gpm_fifo_depth =
92 gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
93 gr->bundle_cb_token_limit =
94 gr_pd_ab_dist_cfg2_token_limit_init_v();
95}
96
97void gr_gm20b_cb_size_default(struct gk20a *g)
98{
99 struct gr_gk20a *gr = &g->gr;
100
101 if (!gr->attrib_cb_default_size)
102 gr->attrib_cb_default_size =
103 gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
104 gr->alpha_cb_default_size =
105 gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
106}
107
108int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
109{
110 struct gr_gk20a *gr = &g->gr;
111 int size;
112
113 gr->attrib_cb_size = gr->attrib_cb_default_size
114 + (gr->attrib_cb_default_size >> 1);
115 gr->alpha_cb_size = gr->alpha_cb_default_size
116 + (gr->alpha_cb_default_size >> 1);
117
118 size = gr->attrib_cb_size *
119 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
120 gr->max_tpc_count;
121
122 size += gr->alpha_cb_size *
123 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
124 gr->max_tpc_count;
125
126 return size;
127}
128
129void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
130 struct channel_ctx_gk20a *ch_ctx,
131 u64 addr, bool patch)
132{
133 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
134 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
135 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
136
137 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
138 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
139 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
140
141 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
142 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
143 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
144}
145
146void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
147 struct channel_ctx_gk20a *ch_ctx,
148 u64 addr, u64 size, bool patch)
149{
150 u32 data;
151
152 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
153 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
154
155 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
156 gr_scc_bundle_cb_size_div_256b_f(size) |
157 gr_scc_bundle_cb_size_valid_true_f(), patch);
158
159 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
160 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
161
162 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
163 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
164 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
165
166 /* data for state_limit */
167 data = (g->gr.bundle_cb_default_size *
168 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
169 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
170
171 data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
172
173 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
174 g->gr.bundle_cb_token_limit, data);
175
176 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
177 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
178 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
179
180}
181
182int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
183 struct channel_gk20a *c, bool patch)
184{
185 struct gr_gk20a *gr = &g->gr;
186 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
187 u32 attrib_offset_in_chunk = 0;
188 u32 alpha_offset_in_chunk = 0;
189 u32 pd_ab_max_output;
190 u32 gpc_index, ppc_index;
191 u32 cbm_cfg_size1, cbm_cfg_size2;
192 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
193 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
194 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
195 GPU_LIT_NUM_PES_PER_GPC);
196
197 gk20a_dbg_fn("");
198
199 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
200 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
201 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
202 patch);
203
204 pd_ab_max_output = (gr->alpha_cb_default_size *
205 gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
206 gr_pd_ab_dist_cfg1_max_output_granularity_v();
207
208 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
209 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
210 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
211
212 alpha_offset_in_chunk = attrib_offset_in_chunk +
213 gr->tpc_count * gr->attrib_cb_size;
214
215 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
216 u32 temp = gpc_stride * gpc_index;
217 u32 temp2 = num_pes_per_gpc * gpc_index;
218 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
219 ppc_index++) {
220 cbm_cfg_size1 = gr->attrib_cb_default_size *
221 gr->pes_tpc_count[ppc_index][gpc_index];
222 cbm_cfg_size2 = gr->alpha_cb_default_size *
223 gr->pes_tpc_count[ppc_index][gpc_index];
224
225 gr_gk20a_ctx_patch_write(g, ch_ctx,
226 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
227 ppc_in_gpc_stride * ppc_index,
228 cbm_cfg_size1, patch);
229
230 gr_gk20a_ctx_patch_write(g, ch_ctx,
231 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
232 ppc_in_gpc_stride * ppc_index,
233 attrib_offset_in_chunk, patch);
234
235 attrib_offset_in_chunk += gr->attrib_cb_size *
236 gr->pes_tpc_count[ppc_index][gpc_index];
237
238 gr_gk20a_ctx_patch_write(g, ch_ctx,
239 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
240 ppc_in_gpc_stride * ppc_index,
241 cbm_cfg_size2, patch);
242
243 gr_gk20a_ctx_patch_write(g, ch_ctx,
244 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
245 ppc_in_gpc_stride * ppc_index,
246 alpha_offset_in_chunk, patch);
247
248 alpha_offset_in_chunk += gr->alpha_cb_size *
249 gr->pes_tpc_count[ppc_index][gpc_index];
250
251 gr_gk20a_ctx_patch_write(g, ch_ctx,
252 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
253 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
254 gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3),
255 patch);
256 }
257 }
258
259 return 0;
260}
261
262void gr_gm20b_commit_global_pagepool(struct gk20a *g,
263 struct channel_ctx_gk20a *ch_ctx,
264 u64 addr, u32 size, bool patch)
265{
266 gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
267
268 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(),
269 gr_gpcs_swdx_rm_pagepool_total_pages_f(size) |
270 gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch);
271
272}
273
274void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data)
275{
276 u32 val;
277
278 gk20a_dbg_fn("");
279
280 val = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
281 val = set_field(val, gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
282 gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data));
283 gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val);
284
285 gk20a_dbg_fn("done");
286}
287
288int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
289 u32 class_num, u32 offset, u32 data)
290{
291 gk20a_dbg_fn("");
292
293 if (class_num == MAXWELL_COMPUTE_B) {
294 switch (offset << 2) {
295 case NVB1C0_SET_SHADER_EXCEPTIONS:
296 gk20a_gr_set_shader_exceptions(g, data);
297 break;
298 case NVB1C0_SET_RD_COALESCE:
299 gr_gm20b_set_rd_coalesce(g, data);
300 break;
301 default:
302 goto fail;
303 }
304 }
305
306 if (class_num == MAXWELL_B) {
307 switch (offset << 2) {
308 case NVB197_SET_SHADER_EXCEPTIONS:
309 gk20a_gr_set_shader_exceptions(g, data);
310 break;
311 case NVB197_SET_CIRCULAR_BUFFER_SIZE:
312 g->ops.gr.set_circular_buffer_size(g, data);
313 break;
314 case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
315 g->ops.gr.set_alpha_circular_buffer_size(g, data);
316 break;
317 case NVB197_SET_RD_COALESCE:
318 gr_gm20b_set_rd_coalesce(g, data);
319 break;
320 default:
321 goto fail;
322 }
323 }
324 return 0;
325
326fail:
327 return -EINVAL;
328}
329
330void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
331{
332 struct gr_gk20a *gr = &g->gr;
333 u32 gpc_index, ppc_index, stride, val;
334 u32 pd_ab_max_output;
335 u32 alpha_cb_size = data * 4;
336 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
337 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
338
339 gk20a_dbg_fn("");
340 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
341 return; */
342
343 if (alpha_cb_size > gr->alpha_cb_size)
344 alpha_cb_size = gr->alpha_cb_size;
345
346 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
347 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
348 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
349 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
350
351 pd_ab_max_output = alpha_cb_size *
352 gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
353 gr_pd_ab_dist_cfg1_max_output_granularity_v();
354
355 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
356 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
357 gr_pd_ab_dist_cfg1_max_batches_init_f());
358
359 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
360 stride = gpc_stride * gpc_index;
361
362 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
363 ppc_index++) {
364
365 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
366 stride +
367 ppc_in_gpc_stride * ppc_index);
368
369 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
370 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
371 gr->pes_tpc_count[ppc_index][gpc_index]));
372
373 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
374 stride +
375 ppc_in_gpc_stride * ppc_index, val);
376 }
377 }
378}
379
380void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
381{
382 struct gr_gk20a *gr = &g->gr;
383 u32 gpc_index, ppc_index, stride, val;
384 u32 cb_size = data * 4;
385 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
386 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
387
388 gk20a_dbg_fn("");
389
390 if (cb_size > gr->attrib_cb_size)
391 cb_size = gr->attrib_cb_size;
392
393 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
394 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
395 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
396 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
397
398 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
399 stride = gpc_stride * gpc_index;
400
401 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
402 ppc_index++) {
403
404 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
405 stride +
406 ppc_in_gpc_stride * ppc_index);
407
408 val = set_field(val,
409 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
410 gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
411 gr->pes_tpc_count[ppc_index][gpc_index]));
412
413 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
414 stride +
415 ppc_in_gpc_stride * ppc_index, val);
416
417 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
418 ppc_index + gpc_index));
419
420 val = set_field(val,
421 gr_gpcs_swdx_tc_beta_cb_size_v_m(),
422 gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
423 gr->gpc_ppc_count[gpc_index]));
424 val = set_field(val,
425 gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
426 gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
427 gr->gpc_ppc_count[gpc_index])/3));
428
429 gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
430 ppc_index + gpc_index), val);
431 }
432 }
433}
434
435void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
436{
437 /* setup sm warp esr report masks */
438 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
439 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
440 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
441 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
442 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
443 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
444 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
445 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
446 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
447 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
448 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
449 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
450 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
451 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
452 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
453 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
454 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
455 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
456 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
457 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() |
458 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() |
459 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
460 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
461
462 /* setup sm global esr report mask */
463 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
464 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
465 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
466}
467
468bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
469{
470 bool valid = false;
471
472 switch (class_num) {
473 case MAXWELL_COMPUTE_B:
474 case MAXWELL_B:
475 case FERMI_TWOD_A:
476 case KEPLER_DMA_COPY_A:
477 case MAXWELL_DMA_COPY_A:
478 valid = true;
479 break;
480
481 default:
482 break;
483 }
484
485 return valid;
486}
487
488bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
489{
490 if (class_num == MAXWELL_B)
491 return true;
492 else
493 return false;
494}
495
496bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
497{
498 if (class_num == MAXWELL_COMPUTE_B)
499 return true;
500 else
501 return false;
502}
503
504
505/* Following are the blocks of registers that the ucode
506 stores in the extended region.*/
507/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
508static const u32 _num_sm_dsm_perf_regs;
509/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
510static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
511static u32 *_sm_dsm_perf_regs;
512static u32 _sm_dsm_perf_ctrl_regs[2];
513
514void gr_gm20b_init_sm_dsm_reg_info(void)
515{
516 if (_sm_dsm_perf_ctrl_regs[0] != 0)
517 return;
518
519 _sm_dsm_perf_ctrl_regs[0] =
520 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
521 _sm_dsm_perf_ctrl_regs[1] =
522 gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
523}
524
525void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
526 u32 *num_sm_dsm_perf_regs,
527 u32 **sm_dsm_perf_regs,
528 u32 *perf_register_stride)
529{
530 *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
531 *sm_dsm_perf_regs = _sm_dsm_perf_regs;
532 *perf_register_stride = 0;
533}
534
535void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
536 u32 *num_sm_dsm_perf_ctrl_regs,
537 u32 **sm_dsm_perf_ctrl_regs,
538 u32 *ctrl_register_stride)
539{
540 *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
541 *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
542
543 *ctrl_register_stride =
544 ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
545}
546
547u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
548{
549 u32 val;
550 struct gr_gk20a *gr = &g->gr;
551
552 /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
553 val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index));
554
555 return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
556}
557
558void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
559{
560 nvgpu_tegra_fuse_write_bypass(g, 0x1);
561 nvgpu_tegra_fuse_write_access_sw(g, 0x0);
562
563 if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) {
564 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
565 nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1);
566 } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) {
567 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
568 nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
569 } else {
570 nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
571 nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
572 }
573}
574
575void gr_gm20b_load_tpc_mask(struct gk20a *g)
576{
577 u32 pes_tpc_mask = 0, fuse_tpc_mask;
578 u32 gpc, pes;
579 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
580
581 for (gpc = 0; gpc < g->gr.gpc_count; gpc++)
582 for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
583 pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
584 num_tpc_per_gpc * gpc;
585 }
586
587 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
588 if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask &&
589 fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) {
590 u32 val = g->tpc_fs_mask_user;
591 val &= (0x1U << g->gr.max_tpc_count) - 1U;
592 /* skip tpc to disable the other tpc cause channel timeout */
593 val = (0x1U << hweight32(val)) - 1U;
594 gk20a_writel(g, gr_fe_tpc_fs_r(), val);
595 } else {
596 gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
597 }
598}
599
600void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
601 u32 gpc, u32 tpc, u32 smid)
602{
603 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
604 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
605 u32 gpc_offset = gpc_stride * gpc;
606 u32 tpc_offset = tpc_in_gpc_stride * tpc;
607
608 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
609 gr_gpc0_tpc0_sm_cfg_sm_id_f(smid));
610 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
611 gr_gpc0_gpm_pd_sm_id_id_f(smid));
612 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
613 gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
614}
615
616int gr_gm20b_load_smid_config(struct gk20a *g)
617{
618 u32 *tpc_sm_id;
619 u32 i, j;
620 u32 tpc_index, gpc_index;
621
622 tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32));
623 if (!tpc_sm_id)
624 return -ENOMEM;
625
626 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
627 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
628 u32 reg = 0;
629 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
630 gr_cwd_gpc_tpc_id_tpc0_s();
631
632 for (j = 0; j < 4; j++) {
633 u32 sm_id = (i * 4) + j;
634 u32 bits;
635
636 if (sm_id >= g->gr.tpc_count)
637 break;
638
639 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
640 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
641
642 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
643 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
644 reg |= bits << (j * bit_stride);
645
646 tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride;
647 }
648 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
649 }
650
651 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
652 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
653
654 nvgpu_kfree(g, tpc_sm_id);
655
656 return 0;
657}
658
659int gr_gm20b_init_fs_state(struct gk20a *g)
660{
661 int err = 0;
662
663 gk20a_dbg_fn("");
664
665 err = gr_gk20a_init_fs_state(g);
666 if (err)
667 return err;
668
669 g->ops.gr.load_tpc_mask(g);
670
671 gk20a_writel(g, gr_bes_zrop_settings_r(),
672 gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count));
673 gk20a_writel(g, gr_bes_crop_settings_r(),
674 gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count));
675
676 gk20a_writel(g, gr_bes_crop_debug3_r(),
677 gk20a_readl(g, gr_be0_crop_debug3_r()) |
678 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
679
680 g->ops.gr.load_smid_config(g);
681
682 return err;
683}
684
685int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
686 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
687{
688 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
689 gr_fecs_dmactl_require_ctx_f(0));
690
691 /* Copy falcon bootloader into dmem */
692 gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
693 gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
694
695 /* start the falcon immediately if PRIV security is disabled*/
696 if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
697 gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
698 gr_fecs_cpuctl_startcpu_f(0x01));
699 }
700
701 return 0;
702}
703
704static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
705{
706 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
707 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
708 return (addr >= tpc_in_gpc_shared_base) &&
709 (addr < (tpc_in_gpc_shared_base +
710 tpc_in_gpc_stride));
711}
712
713bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
714{
715 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
716 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
717 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
718 return ((addr >= tpc_in_gpc_base) &&
719 (addr < tpc_in_gpc_base +
720 (num_tpc_per_gpc * tpc_in_gpc_stride)))
721 || gr_gm20b_is_tpc_addr_shared(g, addr);
722}
723
724u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
725{
726 u32 i, start;
727 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
728 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
729 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
730
731 for (i = 0; i < num_tpcs; i++) {
732 start = tpc_in_gpc_base + (i * tpc_in_gpc_stride);
733 if ((addr >= start) &&
734 (addr < (start + tpc_in_gpc_stride)))
735 return i;
736 }
737 return 0;
738}
739
740#ifdef CONFIG_TEGRA_ACR
741static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
742{
743 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
744 u64 addr_base = ucode_info->surface_desc.gpu_va;
745
746 gr_gk20a_load_falcon_bind_instblk(g);
747
748 g->ops.gr.falcon_load_ucode(g, addr_base,
749 &g->ctxsw_ucode_info.gpccs,
750 gr_gpcs_gpccs_falcon_hwcfg_r() -
751 gr_fecs_falcon_hwcfg_r());
752}
753
754int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
755{
756 u32 err, flags;
757 u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
758 gr_fecs_falcon_hwcfg_r();
759 u8 falcon_id_mask = 0;
760
761 gk20a_dbg_fn("");
762
763 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
764 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
765 gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
766 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
767 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
768 }
769
770 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
771 g->pmu_lsf_loaded_falcon_id = 0;
772 if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
773 /* this must be recovery so bootstrap fecs and gpccs */
774 if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
775 gr_gm20b_load_gpccs_with_bootloader(g);
776 err = g->ops.pmu.load_lsfalcon_ucode(g,
777 (1 << LSF_FALCON_ID_FECS));
778 } else {
779 /* bind WPR VA inst block */
780 gr_gk20a_load_falcon_bind_instblk(g);
781 err = g->ops.pmu.load_lsfalcon_ucode(g,
782 (1 << LSF_FALCON_ID_FECS) |
783 (1 << LSF_FALCON_ID_GPCCS));
784 }
785 if (err) {
786 nvgpu_err(g, "Unable to recover GR falcon");
787 return err;
788 }
789
790 } else {
791 /* cold boot or rg exit */
792 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
793 if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
794 gr_gm20b_load_gpccs_with_bootloader(g);
795 } else {
796 /* bind WPR VA inst block */
797 gr_gk20a_load_falcon_bind_instblk(g);
798 if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_FECS))
799 falcon_id_mask |= (1 << LSF_FALCON_ID_FECS);
800 if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_GPCCS))
801 falcon_id_mask |= (1 << LSF_FALCON_ID_GPCCS);
802
803 err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask);
804
805 if (err) {
806 nvgpu_err(g, "Unable to boot GPCCS");
807 return err;
808 }
809 }
810 }
811
812 /*start gpccs */
813 if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
814 gk20a_writel(g, reg_offset +
815 gr_fecs_cpuctl_alias_r(),
816 gr_gpccs_cpuctl_startcpu_f(1));
817 } else {
818 gk20a_writel(g, gr_gpccs_dmactl_r(),
819 gr_gpccs_dmactl_require_ctx_f(0));
820 gk20a_writel(g, gr_gpccs_cpuctl_r(),
821 gr_gpccs_cpuctl_startcpu_f(1));
822 }
823 /* start fecs */
824 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
825 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1);
826 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff);
827 gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
828 gr_fecs_cpuctl_startcpu_f(1));
829 gk20a_dbg_fn("done");
830
831 return 0;
832}
833#else
834
835int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
836{
837 return -EPERM;
838}
839
840#endif
841
842void gr_gm20b_detect_sm_arch(struct gk20a *g)
843{
844 u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
845
846 g->params.sm_arch_spa_version =
847 gr_gpc0_tpc0_sm_arch_spa_version_v(v);
848 g->params.sm_arch_sm_version =
849 gr_gpc0_tpc0_sm_arch_sm_version_v(v);
850 g->params.sm_arch_warp_count =
851 gr_gpc0_tpc0_sm_arch_warp_count_v(v);
852}
853
854u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
855{
856 return gr_scc_pagepool_total_pages_hwmax_value_v();
857}
858
859int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
860 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
861 u32 class,
862 u32 flags)
863{
864 int err;
865
866 gk20a_dbg_fn("");
867
868 err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
869 if (err)
870 return err;
871
872 if (class == MAXWELL_COMPUTE_B)
873 (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
874
875 gk20a_dbg_fn("done");
876
877 return 0;
878}
879
880void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
881 struct channel_ctx_gk20a *ch_ctx,
882 struct nvgpu_mem *mem)
883{
884 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
885 u32 cta_preempt_option =
886 ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
887
888 gk20a_dbg_fn("");
889
890 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
891 gk20a_dbg_info("CTA: %x", cta_preempt_option);
892 nvgpu_mem_wr(g, mem,
893 ctxsw_prog_main_image_preemption_options_o(),
894 cta_preempt_option);
895 }
896
897 gk20a_dbg_fn("done");
898}
899
900int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
901 struct gk20a_debug_output *o)
902{
903 struct gr_gk20a *gr = &g->gr;
904 u32 gr_engine_id;
905
906 gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
907
908 gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
909 gk20a_readl(g, gr_status_r()));
910 gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
911 gk20a_readl(g, gr_status_1_r()));
912 gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
913 gk20a_readl(g, gr_status_2_r()));
914 gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
915 gk20a_readl(g, gr_engine_status_r()));
916 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
917 gk20a_readl(g, gr_gpfifo_status_r()));
918 gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
919 gk20a_readl(g, gr_gpfifo_ctl_r()));
920 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
921 gk20a_readl(g, gr_fecs_host_int_status_r()));
922 gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
923 gk20a_readl(g, gr_exception_r()));
924 gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
925 gk20a_readl(g, gr_fecs_intr_r()));
926 gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
927 gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
928 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
929 gk20a_readl(g, gr_activity_0_r()));
930 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
931 gk20a_readl(g, gr_activity_1_r()));
932 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
933 gk20a_readl(g, gr_activity_2_r()));
934 gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
935 gk20a_readl(g, gr_activity_4_r()));
936 gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
937 gk20a_readl(g, gr_pri_sked_activity_r()));
938 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
939 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
940 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
941 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
942 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
943 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
944 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
945 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
946 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
947 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
948 if (gr->gpc_tpc_count[0] == 2)
949 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
950 gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
951 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
952 gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
953 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
954 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
955 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
956 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
957 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
958 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
959 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
960 gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
961 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
962 gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
963 if (gr->gpc_tpc_count[0] == 2)
964 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
965 gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
966 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
967 gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
968 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
969 gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
970 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
971 gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
972 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
973 gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
974 gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
975 gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
976 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n",
977 gk20a_readl(g, gr_pri_fe_go_idle_on_status_r()));
978 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
979 gk20a_readl(g, gr_fe_go_idle_timeout_r()));
980 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n",
981 gk20a_readl(g, gr_pri_fe_go_idle_check_r()));
982 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
983 gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
984 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
985 gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
986 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
987 gk20a_readl(g, gr_cwd_fs_r()));
988 gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
989 gk20a_readl(g, gr_fe_tpc_fs_r()));
990 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
991 gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
992 gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
993 gk20a_readl(g, gr_cwd_sm_id_r(0)));
994 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
995 gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
996 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
997 gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
998 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
999 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
1000 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
1001 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
1002 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
1003 gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
1004 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
1005 gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
1006 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
1007 gk20a_readl(g, gr_fecs_current_ctx_r()));
1008 gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
1009 gk20a_readl(g, gr_fecs_new_ctx_r()));
1010 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
1011 gk20a_readl(g, gr_pri_be0_crop_status1_r()));
1012 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
1013 gk20a_readl(g, gr_pri_bes_crop_status1_r()));
1014 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
1015 gk20a_readl(g, gr_pri_be0_zrop_status_r()));
1016 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
1017 gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
1018 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
1019 gk20a_readl(g, gr_pri_bes_zrop_status_r()));
1020 gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
1021 gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
1022 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
1023 gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
1024 gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
1025 gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
1026 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
1027 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
1028 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
1029 gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
1030 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
1031 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
1032 gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
1033 gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
1034
1035 return 0;
1036}
1037
1038int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1039 bool enable)
1040{
1041 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1042 struct nvgpu_mem *mem;
1043 u32 v;
1044
1045 gk20a_dbg_fn("");
1046
1047 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
1048 return -EINVAL;
1049
1050 mem = &ch_ctx->gr_ctx->mem;
1051
1052 if (nvgpu_mem_begin(c->g, mem))
1053 return -ENOMEM;
1054
1055 v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
1056 v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
1057 v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
1058 nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
1059
1060 nvgpu_mem_end(c->g, mem);
1061
1062 gk20a_dbg_fn("done");
1063
1064 return 0;
1065}
1066
1067u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
1068{
1069 u32 fbp_en_mask, opt_fbio;
1070 u32 tmp, max_fbps_count;
1071
1072 tmp = gk20a_readl(g, top_num_fbps_r());
1073 max_fbps_count = top_num_fbps_value_v(tmp);
1074
1075 opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
1076 fbp_en_mask =
1077 ((1 << max_fbps_count) - 1) ^
1078 fuse_status_opt_fbio_data_v(opt_fbio);
1079 return fbp_en_mask;
1080}
1081
1082u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
1083{
1084 u32 ltc_per_fbp, reg;
1085 reg = gk20a_readl(g, top_ltc_per_fbp_r());
1086 ltc_per_fbp = top_ltc_per_fbp_value_v(reg);
1087 return ltc_per_fbp;
1088}
1089
1090u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
1091{
1092 u32 lts_per_ltc, reg;
1093 reg = gk20a_readl(g, top_slices_per_ltc_r());
1094 lts_per_ltc = top_slices_per_ltc_value_v(reg);
1095 return lts_per_ltc;
1096}
1097
1098u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
1099{
1100 struct gr_gk20a *gr = &g->gr;
1101 u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
1102 u32 rop_l2_all_en;
1103
1104 tmp = gk20a_readl(g, top_num_fbps_r());
1105 max_fbps_count = top_num_fbps_value_v(tmp);
1106 max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g);
1107 rop_l2_all_en = (1 << max_ltc_per_fbp) - 1;
1108
1109 /* mask of Rop_L2 for each FBP */
1110 for (i = 0; i < max_fbps_count; i++) {
1111 tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i));
1112 gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
1113 }
1114
1115 return gr->fbp_rop_l2_en_mask;
1116}
1117
1118u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
1119{
1120 u32 tmp, max_fbps_count;
1121 tmp = gk20a_readl(g, top_num_fbps_r());
1122 max_fbps_count = top_num_fbps_value_v(tmp);
1123 return max_fbps_count;
1124}
1125
1126void gr_gm20b_init_cyclestats(struct gk20a *g)
1127{
1128#if defined(CONFIG_GK20A_CYCLE_STATS)
1129 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, true);
1130 __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, true);
1131 g->gr.max_css_buffer_size = 0xffffffff;
1132#else
1133 (void)g;
1134#endif
1135}
1136
1137void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
1138{
1139 u32 cde_v;
1140
1141 cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
1142 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
1143 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
1144}
1145
1146void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
1147{
1148 /* Check if we have at least one valid warp */
1149 /* get paused state on maxwell */
1150 struct gr_gk20a *gr = &g->gr;
1151 u32 gpc, tpc, sm_id;
1152 u32 tpc_offset, gpc_offset, reg_offset;
1153 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
1154 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1155 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1156
1157 /* for maxwell & kepler */
1158 u32 numSmPerTpc = 1;
1159 u32 numWarpPerTpc = g->params.sm_arch_warp_count * numSmPerTpc;
1160
1161 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1162 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1163 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1164
1165 tpc_offset = tpc_in_gpc_stride * tpc;
1166 gpc_offset = gpc_stride * gpc;
1167 reg_offset = tpc_offset + gpc_offset;
1168
1169 /* 64 bit read */
1170 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
1171 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
1172
1173 /* 64 bit read */
1174 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
1175 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
1176
1177 /* 64 bit read */
1178 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
1179 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
1180
1181 w_state[sm_id].valid_warps[0] = warps_valid;
1182 w_state[sm_id].trapped_warps[0] = warps_trapped;
1183 w_state[sm_id].paused_warps[0] = warps_paused;
1184
1185
1186 if (numWarpPerTpc > 64) {
1187 /* 64 bit read */
1188 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32;
1189 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset);
1190
1191 /* 64 bit read */
1192 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32;
1193 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset);
1194
1195 /* 64 bit read */
1196 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32;
1197 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset);
1198
1199 w_state[sm_id].valid_warps[1] = warps_valid;
1200 w_state[sm_id].trapped_warps[1] = warps_trapped;
1201 w_state[sm_id].paused_warps[1] = warps_paused;
1202 }
1203 }
1204
1205
1206 /* Only for debug purpose */
1207 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1208 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
1209 sm_id, w_state[sm_id].valid_warps[0]);
1210 gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n",
1211 sm_id, w_state[sm_id].valid_warps[1]);
1212
1213 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
1214 sm_id, w_state[sm_id].trapped_warps[0]);
1215 gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n",
1216 sm_id, w_state[sm_id].trapped_warps[1]);
1217
1218 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
1219 sm_id, w_state[sm_id].paused_warps[0]);
1220 gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n",
1221 sm_id, w_state[sm_id].paused_warps[1]);
1222 }
1223}
1224
1225void gr_gm20b_get_access_map(struct gk20a *g,
1226 u32 **whitelist, int *num_entries)
1227{
1228 static u32 wl_addr_gm20b[] = {
1229 /* this list must be sorted (low to high) */
1230 0x404468, /* gr_pri_mme_max_instructions */
1231 0x418300, /* gr_pri_gpcs_rasterarb_line_class */
1232 0x418800, /* gr_pri_gpcs_setup_debug */
1233 0x418e00, /* gr_pri_gpcs_swdx_config */
1234 0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1235 0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1236 0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1237 0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1238 0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl */
1239 0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1240 0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1241 0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1242 0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1243 0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1244 0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1245 0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1246 0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1247 0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1248 0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1249 0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1250 0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1251 0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1252 0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1253 0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1254 0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr */
1255 0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
1256 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
1257 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
1258 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
1259 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */
1260 };
1261
1262 *whitelist = wl_addr_gm20b;
1263 *num_entries = ARRAY_SIZE(wl_addr_gm20b);
1264}
1265
1266int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
1267{
1268 int sm_id;
1269 struct gr_gk20a *gr = &g->gr;
1270 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1271 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1272 GPU_LIT_TPC_IN_GPC_STRIDE);
1273 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1274
1275 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1276
1277 sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
1278 gr_gpc0_tpc0_sm_cfg_r() + offset));
1279
1280 gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
1281 gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1282 gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
1283 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
1284 gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
1285 gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
1286 gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
1287 gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
1288 gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
1289 gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
1290
1291 nvgpu_mutex_release(&g->dbg_sessions_lock);
1292
1293 return 0;
1294}
1295
1296int gm20b_gr_update_sm_error_state(struct gk20a *g,
1297 struct channel_gk20a *ch, u32 sm_id,
1298 struct nvgpu_gr_sm_error_state *sm_error_state)
1299{
1300 u32 gpc, tpc, offset;
1301 struct gr_gk20a *gr = &g->gr;
1302 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
1303 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1304 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1305 GPU_LIT_TPC_IN_GPC_STRIDE);
1306 int err = 0;
1307
1308 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1309
1310 gr->sm_error_states[sm_id].hww_global_esr =
1311 sm_error_state->hww_global_esr;
1312 gr->sm_error_states[sm_id].hww_warp_esr =
1313 sm_error_state->hww_warp_esr;
1314 gr->sm_error_states[sm_id].hww_warp_esr_pc =
1315 sm_error_state->hww_warp_esr_pc;
1316 gr->sm_error_states[sm_id].hww_global_esr_report_mask =
1317 sm_error_state->hww_global_esr_report_mask;
1318 gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
1319 sm_error_state->hww_warp_esr_report_mask;
1320
1321 err = gr_gk20a_disable_ctxsw(g);
1322 if (err) {
1323 nvgpu_err(g, "unable to stop gr ctxsw");
1324 goto fail;
1325 }
1326
1327 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1328 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1329
1330 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1331
1332 if (gk20a_is_channel_ctx_resident(ch)) {
1333 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1334 gr->sm_error_states[sm_id].hww_global_esr);
1335 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
1336 gr->sm_error_states[sm_id].hww_warp_esr);
1337 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
1338 gr->sm_error_states[sm_id].hww_warp_esr_pc);
1339 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1340 gr->sm_error_states[sm_id].hww_global_esr_report_mask);
1341 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1342 gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
1343 } else {
1344 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
1345 if (err)
1346 goto enable_ctxsw;
1347
1348 gr_gk20a_ctx_patch_write(g, ch_ctx,
1349 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1350 gr->sm_error_states[sm_id].hww_global_esr_report_mask,
1351 true);
1352 gr_gk20a_ctx_patch_write(g, ch_ctx,
1353 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1354 gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
1355 true);
1356
1357 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
1358 }
1359
1360enable_ctxsw:
1361 err = gr_gk20a_enable_ctxsw(g);
1362
1363fail:
1364 nvgpu_mutex_release(&g->dbg_sessions_lock);
1365 return err;
1366}
1367
1368int gm20b_gr_clear_sm_error_state(struct gk20a *g,
1369 struct channel_gk20a *ch, u32 sm_id)
1370{
1371 u32 gpc, tpc, offset;
1372 u32 val;
1373 struct gr_gk20a *gr = &g->gr;
1374 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1375 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1376 GPU_LIT_TPC_IN_GPC_STRIDE);
1377 int err = 0;
1378
1379 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1380
1381 memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
1382
1383 err = gr_gk20a_disable_ctxsw(g);
1384 if (err) {
1385 nvgpu_err(g, "unable to stop gr ctxsw");
1386 goto fail;
1387 }
1388
1389 if (gk20a_is_channel_ctx_resident(ch)) {
1390 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1391 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1392
1393 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1394
1395 val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1396 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1397 val);
1398 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
1399 0);
1400 }
1401
1402 err = gr_gk20a_enable_ctxsw(g);
1403
1404fail:
1405 nvgpu_mutex_release(&g->dbg_sessions_lock);
1406 return err;
1407}
1408
1409int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
1410 struct nvgpu_preemption_modes_rec *preemption_modes_rec)
1411{
1412 preemption_modes_rec->graphics_preemption_mode_flags =
1413 NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
1414 preemption_modes_rec->compute_preemption_mode_flags = (
1415 NVGPU_PREEMPTION_MODE_COMPUTE_WFI |
1416 NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
1417
1418 preemption_modes_rec->default_graphics_preempt_mode =
1419 NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
1420 preemption_modes_rec->default_compute_preempt_mode =
1421 NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
1422
1423 return 0;
1424}
1425
1426bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
1427{
1428 u32 ltc_shared_base = ltc_ltcs_ltss_v();
1429 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1430
1431 return (addr >= ltc_shared_base) &&
1432 (addr < (ltc_shared_base + lts_stride));
1433}
1434
1435bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
1436{
1437 u32 lts_shared_base = ltc_ltc0_ltss_v();
1438 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1439 u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1;
1440 u32 base_offset = lts_shared_base & addr_mask;
1441 u32 end_offset = base_offset + lts_stride;
1442
1443 return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) &&
1444 ((addr & addr_mask) >= base_offset) &&
1445 ((addr & addr_mask) < end_offset);
1446}
1447
1448static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
1449 u32 *priv_addr_table,
1450 u32 *priv_addr_table_index)
1451{
1452 u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
1453 u32 index = *priv_addr_table_index;
1454 u32 lts_num;
1455 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1456 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1457
1458 for (lts_num = 0; lts_num < num_ltc_slices; lts_num++)
1459 priv_addr_table[index++] = ltc_ltc0_lts0_v() +
1460 ltc_num * ltc_stride +
1461 lts_num * lts_stride +
1462 (addr & (lts_stride - 1));
1463
1464 *priv_addr_table_index = index;
1465}
1466
1467void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
1468 u32 *priv_addr_table,
1469 u32 *priv_addr_table_index)
1470{
1471 u32 num_ltc = g->ltc_count;
1472 u32 i, start, ltc_num = 0;
1473 u32 pltcg_base = ltc_pltcg_base_v();
1474 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1475
1476 for (i = 0; i < num_ltc; i++) {
1477 start = pltcg_base + i * ltc_stride;
1478 if ((addr >= start) && (addr < (start + ltc_stride))) {
1479 ltc_num = i;
1480 break;
1481 }
1482 }
1483 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
1484 priv_addr_table_index);
1485}
1486
1487void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
1488 u32 *priv_addr_table,
1489 u32 *priv_addr_table_index)
1490{
1491 u32 num_ltc = g->ltc_count;
1492 u32 ltc_num;
1493
1494 for (ltc_num = 0; ltc_num < num_ltc; ltc_num++)
1495 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num,
1496 priv_addr_table, priv_addr_table_index);
1497}
1498
1499void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
1500 u32 global_esr)
1501{
1502 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
1503
1504 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1505 global_esr);
1506
1507 /* clear the warp hww */
1508 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0);
1509}
1510
1511/*
1512 * Disable both surface and LG coalesce.
1513 */
1514void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
1515{
1516 u32 dbg2_reg;
1517
1518 dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
1519 dbg2_reg = set_field(dbg2_reg,
1520 gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
1521 gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0));
1522 dbg2_reg = set_field(dbg2_reg,
1523 gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(),
1524 gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0));
1525
1526 gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
1527}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
new file mode 100644
index 00000000..18e6b032
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -0,0 +1,137 @@
1/*
2 * GM20B GPC MMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_GR_MMU_H
26#define _NVHOST_GM20B_GR_MMU_H
27
28struct gk20a;
29struct nvgpu_warpstate;
30
31enum {
32 MAXWELL_B = 0xB197,
33 MAXWELL_COMPUTE_B = 0xB1C0,
34 KEPLER_INLINE_TO_MEMORY_B= 0xA140,
35 MAXWELL_DMA_COPY_A = 0xB0B5,
36 MAXWELL_CHANNEL_GPFIFO_A= 0xB06F,
37};
38
39#define NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
40#define NVB197_SET_CIRCULAR_BUFFER_SIZE 0x1280
41#define NVB197_SET_SHADER_EXCEPTIONS 0x1528
42#define NVB197_SET_RD_COALESCE 0x102c
43#define NVB1C0_SET_SHADER_EXCEPTIONS 0x1528
44#define NVB1C0_SET_RD_COALESCE 0x0228
45
46#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
47
48void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
49 struct channel_ctx_gk20a *ch_ctx,
50 u64 addr, bool patch);
51int gr_gm20b_init_fs_state(struct gk20a *g);
52int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
53void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
54void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
55void gr_gm20b_init_gpc_mmu(struct gk20a *g);
56void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
57void gr_gm20b_cb_size_default(struct gk20a *g);
58int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
59void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
60 struct channel_ctx_gk20a *ch_ctx,
61 u64 addr, u64 size, bool patch);
62int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
63 struct channel_gk20a *c, bool patch);
64void gr_gm20b_commit_global_pagepool(struct gk20a *g,
65 struct channel_ctx_gk20a *ch_ctx,
66 u64 addr, u32 size, bool patch);
67int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
68 u32 class_num, u32 offset, u32 data);
69void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
70void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data);
71void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g);
72bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num);
73bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
74bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num);
75void gr_gm20b_init_sm_dsm_reg_info(void);
76void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
77 u32 *num_sm_dsm_perf_regs,
78 u32 **sm_dsm_perf_regs,
79 u32 *perf_register_stride);
80void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
81 u32 *num_sm_dsm_perf_ctrl_regs,
82 u32 **sm_dsm_perf_ctrl_regs,
83 u32 *ctrl_register_stride);
84u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
85void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
86void gr_gm20b_load_tpc_mask(struct gk20a *g);
87void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
88 u32 gpc, u32 tpc, u32 smid);
89int gr_gm20b_load_smid_config(struct gk20a *g);
90int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
91 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
92bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
93u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
94int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
95int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
96void gr_gm20b_detect_sm_arch(struct gk20a *g);
97u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
98int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
99 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
100 u32 class,
101 u32 flags);
102void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
103 struct channel_ctx_gk20a *ch_ctx,
104 struct nvgpu_mem *mem);
105int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
106 struct gk20a_debug_output *o);
107int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
108 bool enable);
109u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g);
110u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g);
111u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g);
112u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
113u32 gr_gm20b_get_max_fbps_count(struct gk20a *g);
114void gr_gm20b_init_cyclestats(struct gk20a *g);
115void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem);
116void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
117void gr_gm20b_get_access_map(struct gk20a *g,
118 u32 **whitelist, int *num_entries);
119int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc);
120int gm20b_gr_update_sm_error_state(struct gk20a *g,
121 struct channel_gk20a *ch, u32 sm_id,
122 struct nvgpu_gr_sm_error_state *sm_error_state);
123int gm20b_gr_clear_sm_error_state(struct gk20a *g,
124 struct channel_gk20a *ch, u32 sm_id);
125int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
126 struct nvgpu_preemption_modes_rec *preemption_modes_rec);
127bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr);
128bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr);
129void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
130 u32 *priv_addr_table,
131 u32 *priv_addr_table_index);
132void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
133 u32 *priv_addr_table,
134 u32 *priv_addr_table_index);
135void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
136 u32 global_esr);
137#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
new file mode 100644
index 00000000..227b6b6c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -0,0 +1,708 @@
1/*
2 * GM20B Graphics
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "gk20a/ce2_gk20a.h"
27#include "gk20a/dbg_gpu_gk20a.h"
28#include "gk20a/fb_gk20a.h"
29#include "gk20a/fifo_gk20a.h"
30#include "gk20a/therm_gk20a.h"
31#include "gk20a/mm_gk20a.h"
32#include "gk20a/css_gr_gk20a.h"
33#include "gk20a/mc_gk20a.h"
34#include "gk20a/bus_gk20a.h"
35#include "gk20a/flcn_gk20a.h"
36#include "gk20a/priv_ring_gk20a.h"
37#include "gk20a/regops_gk20a.h"
38#include "gk20a/pmu_gk20a.h"
39#include "gk20a/gr_gk20a.h"
40#include "gk20a/tsg_gk20a.h"
41
42#include "ltc_gm20b.h"
43#include "gr_gm20b.h"
44#include "ltc_gm20b.h"
45#include "fb_gm20b.h"
46#include "gm20b_gating_reglist.h"
47#include "fifo_gm20b.h"
48#include "gr_ctx_gm20b.h"
49#include "mm_gm20b.h"
50#include "pmu_gm20b.h"
51#include "clk_gm20b.h"
52#include "regops_gm20b.h"
53#include "therm_gm20b.h"
54#include "bus_gm20b.h"
55#include "hal_gm20b.h"
56#include "acr_gm20b.h"
57
58#include <nvgpu/debug.h>
59#include <nvgpu/bug.h>
60#include <nvgpu/enabled.h>
61#include <nvgpu/bus.h>
62
63#include <nvgpu/hw/gm20b/hw_proj_gm20b.h>
64#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
65#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
66#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
67#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
68#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
69#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
70
71#define PRIV_SECURITY_DISABLE 0x01
72
73int gm20b_get_litter_value(struct gk20a *g, int value)
74{
75 int ret = EINVAL;
76 switch (value) {
77 case GPU_LIT_NUM_GPCS:
78 ret = proj_scal_litter_num_gpcs_v();
79 break;
80 case GPU_LIT_NUM_PES_PER_GPC:
81 ret = proj_scal_litter_num_pes_per_gpc_v();
82 break;
83 case GPU_LIT_NUM_ZCULL_BANKS:
84 ret = proj_scal_litter_num_zcull_banks_v();
85 break;
86 case GPU_LIT_NUM_TPC_PER_GPC:
87 ret = proj_scal_litter_num_tpc_per_gpc_v();
88 break;
89 case GPU_LIT_NUM_SM_PER_TPC:
90 ret = proj_scal_litter_num_sm_per_tpc_v();
91 break;
92 case GPU_LIT_NUM_FBPS:
93 ret = proj_scal_litter_num_fbps_v();
94 break;
95 case GPU_LIT_GPC_BASE:
96 ret = proj_gpc_base_v();
97 break;
98 case GPU_LIT_GPC_STRIDE:
99 ret = proj_gpc_stride_v();
100 break;
101 case GPU_LIT_GPC_SHARED_BASE:
102 ret = proj_gpc_shared_base_v();
103 break;
104 case GPU_LIT_TPC_IN_GPC_BASE:
105 ret = proj_tpc_in_gpc_base_v();
106 break;
107 case GPU_LIT_TPC_IN_GPC_STRIDE:
108 ret = proj_tpc_in_gpc_stride_v();
109 break;
110 case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
111 ret = proj_tpc_in_gpc_shared_base_v();
112 break;
113 case GPU_LIT_PPC_IN_GPC_BASE:
114 ret = proj_ppc_in_gpc_base_v();
115 break;
116 case GPU_LIT_PPC_IN_GPC_STRIDE:
117 ret = proj_ppc_in_gpc_stride_v();
118 break;
119 case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
120 ret = proj_ppc_in_gpc_shared_base_v();
121 break;
122 case GPU_LIT_ROP_BASE:
123 ret = proj_rop_base_v();
124 break;
125 case GPU_LIT_ROP_STRIDE:
126 ret = proj_rop_stride_v();
127 break;
128 case GPU_LIT_ROP_SHARED_BASE:
129 ret = proj_rop_shared_base_v();
130 break;
131 case GPU_LIT_HOST_NUM_ENGINES:
132 ret = proj_host_num_engines_v();
133 break;
134 case GPU_LIT_HOST_NUM_PBDMA:
135 ret = proj_host_num_pbdma_v();
136 break;
137 case GPU_LIT_LTC_STRIDE:
138 ret = proj_ltc_stride_v();
139 break;
140 case GPU_LIT_LTS_STRIDE:
141 ret = proj_lts_stride_v();
142 break;
143 /* Even though GM20B doesn't have an FBPA unit, the HW reports one,
144 * and the microcode as a result leaves space in the context buffer
145 * for one, so make sure SW accounts for this also.
146 */
147 case GPU_LIT_NUM_FBPAS:
148 ret = proj_scal_litter_num_fbpas_v();
149 break;
150 /* Hardcode FBPA values other than NUM_FBPAS to 0. */
151 case GPU_LIT_FBPA_STRIDE:
152 case GPU_LIT_FBPA_BASE:
153 case GPU_LIT_FBPA_SHARED_BASE:
154 ret = 0;
155 break;
156 case GPU_LIT_TWOD_CLASS:
157 ret = FERMI_TWOD_A;
158 break;
159 case GPU_LIT_THREED_CLASS:
160 ret = MAXWELL_B;
161 break;
162 case GPU_LIT_COMPUTE_CLASS:
163 ret = MAXWELL_COMPUTE_B;
164 break;
165 case GPU_LIT_GPFIFO_CLASS:
166 ret = MAXWELL_CHANNEL_GPFIFO_A;
167 break;
168 case GPU_LIT_I2M_CLASS:
169 ret = KEPLER_INLINE_TO_MEMORY_B;
170 break;
171 case GPU_LIT_DMA_COPY_CLASS:
172 ret = MAXWELL_DMA_COPY_A;
173 break;
174 default:
175 nvgpu_err(g, "Missing definition %d", value);
176 BUG();
177 break;
178 }
179
180 return ret;
181}
182
183static const struct gpu_ops gm20b_ops = {
184 .ltc = {
185 .determine_L2_size_bytes = gm20b_determine_L2_size_bytes,
186 .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry,
187 .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry,
188 .init_cbc = gm20b_ltc_init_cbc,
189 .init_fs_state = gm20b_ltc_init_fs_state,
190 .init_comptags = gm20b_ltc_init_comptags,
191 .cbc_ctrl = gm20b_ltc_cbc_ctrl,
192 .isr = gm20b_ltc_isr,
193 .cbc_fix_config = gm20b_ltc_cbc_fix_config,
194 .flush = gm20b_flush_ltc,
195 .set_enabled = gm20b_ltc_set_enabled,
196 },
197 .ce2 = {
198 .isr_stall = gk20a_ce2_isr,
199 .isr_nonstall = gk20a_ce2_nonstall_isr,
200 },
201 .gr = {
202 .get_patch_slots = gr_gk20a_get_patch_slots,
203 .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
204 .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
205 .cb_size_default = gr_gm20b_cb_size_default,
206 .calc_global_ctx_buffer_size =
207 gr_gm20b_calc_global_ctx_buffer_size,
208 .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
209 .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
210 .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
211 .commit_global_pagepool = gr_gm20b_commit_global_pagepool,
212 .handle_sw_method = gr_gm20b_handle_sw_method,
213 .set_alpha_circular_buffer_size =
214 gr_gm20b_set_alpha_circular_buffer_size,
215 .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size,
216 .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
217 .is_valid_class = gr_gm20b_is_valid_class,
218 .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class,
219 .is_valid_compute_class = gr_gm20b_is_valid_compute_class,
220 .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
221 .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
222 .init_fs_state = gr_gm20b_init_fs_state,
223 .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
224 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
225 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
226 .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
227 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
228 .free_channel_ctx = gk20a_free_channel_ctx,
229 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
230 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
231 .get_zcull_info = gr_gk20a_get_zcull_info,
232 .is_tpc_addr = gr_gm20b_is_tpc_addr,
233 .get_tpc_num = gr_gm20b_get_tpc_num,
234 .detect_sm_arch = gr_gm20b_detect_sm_arch,
235 .add_zbc_color = gr_gk20a_add_zbc_color,
236 .add_zbc_depth = gr_gk20a_add_zbc_depth,
237 .zbc_set_table = gk20a_gr_zbc_set_table,
238 .zbc_query_table = gr_gk20a_query_zbc,
239 .pmu_save_zbc = gk20a_pmu_save_zbc,
240 .add_zbc = gr_gk20a_add_zbc,
241 .pagepool_default_size = gr_gm20b_pagepool_default_size,
242 .init_ctx_state = gr_gk20a_init_ctx_state,
243 .alloc_gr_ctx = gr_gm20b_alloc_gr_ctx,
244 .free_gr_ctx = gr_gk20a_free_gr_ctx,
245 .update_ctxsw_preemption_mode =
246 gr_gm20b_update_ctxsw_preemption_mode,
247 .dump_gr_regs = gr_gm20b_dump_gr_status_regs,
248 .update_pc_sampling = gr_gm20b_update_pc_sampling,
249 .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
250 .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
251 .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
252 .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
253 .get_max_fbps_count = gr_gm20b_get_max_fbps_count,
254 .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
255 .wait_empty = gr_gk20a_wait_idle,
256 .init_cyclestats = gr_gm20b_init_cyclestats,
257 .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
258 .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
259 .bpt_reg_info = gr_gm20b_bpt_reg_info,
260 .get_access_map = gr_gm20b_get_access_map,
261 .handle_fecs_error = gk20a_gr_handle_fecs_error,
262 .handle_sm_exception = gr_gk20a_handle_sm_exception,
263 .handle_tex_exception = gr_gk20a_handle_tex_exception,
264 .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
265 .enable_exceptions = gk20a_gr_enable_exceptions,
266 .get_lrf_tex_ltc_dram_override = NULL,
267 .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
268 .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
269 .record_sm_error_state = gm20b_gr_record_sm_error_state,
270 .update_sm_error_state = gm20b_gr_update_sm_error_state,
271 .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
272 .suspend_contexts = gr_gk20a_suspend_contexts,
273 .resume_contexts = gr_gk20a_resume_contexts,
274 .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
275 .init_sm_id_table = gr_gk20a_init_sm_id_table,
276 .load_smid_config = gr_gm20b_load_smid_config,
277 .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
278 .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
279 .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
280 .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
281 .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
282 .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
283 .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
284 .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
285 .commit_inst = gr_gk20a_commit_inst,
286 .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
287 .write_pm_ptr = gr_gk20a_write_pm_ptr,
288 .init_elcg_mode = gr_gk20a_init_elcg_mode,
289 .load_tpc_mask = gr_gm20b_load_tpc_mask,
290 .inval_icache = gr_gk20a_inval_icache,
291 .trigger_suspend = gr_gk20a_trigger_suspend,
292 .wait_for_pause = gr_gk20a_wait_for_pause,
293 .resume_from_pause = gr_gk20a_resume_from_pause,
294 .clear_sm_errors = gr_gk20a_clear_sm_errors,
295 .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
296 .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
297 .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
298 .suspend_single_sm = gk20a_gr_suspend_single_sm,
299 .suspend_all_sms = gk20a_gr_suspend_all_sms,
300 .resume_single_sm = gk20a_gr_resume_single_sm,
301 .resume_all_sms = gk20a_gr_resume_all_sms,
302 .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr,
303 .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
304 .get_sm_no_lock_down_hww_global_esr_mask =
305 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
306 .lock_down_sm = gk20a_gr_lock_down_sm,
307 .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
308 .clear_sm_hww = gm20b_gr_clear_sm_hww,
309 .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
310 .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
311 .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
312 .init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
313 },
314 .fb = {
315 .reset = fb_gk20a_reset,
316 .init_hw = gk20a_fb_init_hw,
317 .init_fs_state = fb_gm20b_init_fs_state,
318 .set_mmu_page_size = gm20b_fb_set_mmu_page_size,
319 .set_use_full_comp_tag_line =
320 gm20b_fb_set_use_full_comp_tag_line,
321 .compression_page_size = gm20b_fb_compression_page_size,
322 .compressible_page_size = gm20b_fb_compressible_page_size,
323 .vpr_info_fetch = gm20b_fb_vpr_info_fetch,
324 .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info,
325 .read_wpr_info = gm20b_fb_read_wpr_info,
326 .is_debug_mode_enabled = gm20b_fb_debug_mode_enabled,
327 .set_debug_mode = gm20b_fb_set_debug_mode,
328 .tlb_invalidate = gk20a_fb_tlb_invalidate,
329 .mem_unlock = NULL,
330 },
331 .clock_gating = {
332 .slcg_bus_load_gating_prod =
333 gm20b_slcg_bus_load_gating_prod,
334 .slcg_ce2_load_gating_prod =
335 gm20b_slcg_ce2_load_gating_prod,
336 .slcg_chiplet_load_gating_prod =
337 gm20b_slcg_chiplet_load_gating_prod,
338 .slcg_ctxsw_firmware_load_gating_prod =
339 gm20b_slcg_ctxsw_firmware_load_gating_prod,
340 .slcg_fb_load_gating_prod =
341 gm20b_slcg_fb_load_gating_prod,
342 .slcg_fifo_load_gating_prod =
343 gm20b_slcg_fifo_load_gating_prod,
344 .slcg_gr_load_gating_prod =
345 gr_gm20b_slcg_gr_load_gating_prod,
346 .slcg_ltc_load_gating_prod =
347 ltc_gm20b_slcg_ltc_load_gating_prod,
348 .slcg_perf_load_gating_prod =
349 gm20b_slcg_perf_load_gating_prod,
350 .slcg_priring_load_gating_prod =
351 gm20b_slcg_priring_load_gating_prod,
352 .slcg_pmu_load_gating_prod =
353 gm20b_slcg_pmu_load_gating_prod,
354 .slcg_therm_load_gating_prod =
355 gm20b_slcg_therm_load_gating_prod,
356 .slcg_xbar_load_gating_prod =
357 gm20b_slcg_xbar_load_gating_prod,
358 .blcg_bus_load_gating_prod =
359 gm20b_blcg_bus_load_gating_prod,
360 .blcg_ctxsw_firmware_load_gating_prod =
361 gm20b_blcg_ctxsw_firmware_load_gating_prod,
362 .blcg_fb_load_gating_prod =
363 gm20b_blcg_fb_load_gating_prod,
364 .blcg_fifo_load_gating_prod =
365 gm20b_blcg_fifo_load_gating_prod,
366 .blcg_gr_load_gating_prod =
367 gm20b_blcg_gr_load_gating_prod,
368 .blcg_ltc_load_gating_prod =
369 gm20b_blcg_ltc_load_gating_prod,
370 .blcg_pwr_csb_load_gating_prod =
371 gm20b_blcg_pwr_csb_load_gating_prod,
372 .blcg_xbar_load_gating_prod =
373 gm20b_blcg_xbar_load_gating_prod,
374 .blcg_pmu_load_gating_prod =
375 gm20b_blcg_pmu_load_gating_prod,
376 .pg_gr_load_gating_prod =
377 gr_gm20b_pg_gr_load_gating_prod,
378 },
379 .fifo = {
380 .init_fifo_setup_hw = gk20a_init_fifo_setup_hw,
381 .bind_channel = channel_gm20b_bind,
382 .unbind_channel = gk20a_fifo_channel_unbind,
383 .disable_channel = gk20a_fifo_disable_channel,
384 .enable_channel = gk20a_fifo_enable_channel,
385 .alloc_inst = gk20a_fifo_alloc_inst,
386 .free_inst = gk20a_fifo_free_inst,
387 .setup_ramfc = gk20a_fifo_setup_ramfc,
388 .channel_set_timeslice = gk20a_fifo_set_timeslice,
389 .default_timeslice_us = gk20a_fifo_default_timeslice_us,
390 .setup_userd = gk20a_fifo_setup_userd,
391 .userd_gp_get = gk20a_fifo_userd_gp_get,
392 .userd_gp_put = gk20a_fifo_userd_gp_put,
393 .userd_pb_get = gk20a_fifo_userd_pb_get,
394 .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
395 .preempt_channel = gk20a_fifo_preempt_channel,
396 .preempt_tsg = gk20a_fifo_preempt_tsg,
397 .enable_tsg = gk20a_enable_tsg,
398 .disable_tsg = gk20a_disable_tsg,
399 .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
400 .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
401 .update_runlist = gk20a_fifo_update_runlist,
402 .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault,
403 .get_mmu_fault_info = gk20a_fifo_get_mmu_fault_info,
404 .wait_engine_idle = gk20a_fifo_wait_engine_idle,
405 .get_num_fifos = gm20b_fifo_get_num_fifos,
406 .get_pbdma_signature = gk20a_fifo_get_pbdma_signature,
407 .set_runlist_interleave = gk20a_fifo_set_runlist_interleave,
408 .tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
409 .force_reset_ch = gk20a_fifo_force_reset_ch,
410 .engine_enum_from_type = gk20a_fifo_engine_enum_from_type,
411 .device_info_data_parse = gm20b_device_info_data_parse,
412 .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v,
413 .init_engine_info = gk20a_fifo_init_engine_info,
414 .runlist_entry_size = ram_rl_entry_size_v,
415 .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry,
416 .get_ch_runlist_entry = gk20a_get_ch_runlist_entry,
417 .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc,
418 .dump_pbdma_status = gk20a_dump_pbdma_status,
419 .dump_eng_status = gk20a_dump_eng_status,
420 .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
421 .intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
422 .is_preempt_pending = gk20a_fifo_is_preempt_pending,
423 .init_pbdma_intr_descs = gm20b_fifo_init_pbdma_intr_descs,
424 .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
425 .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg,
426 .handle_sched_error = gk20a_fifo_handle_sched_error,
427 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
428 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
429 .tsg_bind_channel = gk20a_tsg_bind_channel,
430 .tsg_unbind_channel = gk20a_tsg_unbind_channel,
431#ifdef CONFIG_TEGRA_GK20A_NVHOST
432 .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
433 .free_syncpt_buf = gk20a_fifo_free_syncpt_buf,
434 .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd,
435 .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size,
436 .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd,
437 .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size,
438#endif
439 },
440 .gr_ctx = {
441 .get_netlist_name = gr_gm20b_get_netlist_name,
442 .is_fw_defined = gr_gm20b_is_firmware_defined,
443 },
444 .mm = {
445 .support_sparse = gm20b_mm_support_sparse,
446 .gmmu_map = gk20a_locked_gmmu_map,
447 .gmmu_unmap = gk20a_locked_gmmu_unmap,
448 .vm_bind_channel = gk20a_vm_bind_channel,
449 .fb_flush = gk20a_mm_fb_flush,
450 .l2_invalidate = gk20a_mm_l2_invalidate,
451 .l2_flush = gk20a_mm_l2_flush,
452 .cbc_clean = gk20a_mm_cbc_clean,
453 .set_big_page_size = gm20b_mm_set_big_page_size,
454 .get_big_page_sizes = gm20b_mm_get_big_page_sizes,
455 .get_default_big_page_size = gm20b_mm_get_default_big_page_size,
456 .gpu_phys_addr = gm20b_gpu_phys_addr,
457 .get_iommu_bit = gk20a_mm_get_iommu_bit,
458 .get_mmu_levels = gk20a_mm_get_mmu_levels,
459 .init_pdb = gk20a_mm_init_pdb,
460 .init_mm_setup_hw = gk20a_init_mm_setup_hw,
461 .is_bar1_supported = gm20b_mm_is_bar1_supported,
462 .alloc_inst_block = gk20a_alloc_inst_block,
463 .init_inst_block = gk20a_init_inst_block,
464 .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
465 .get_kind_invalid = gm20b_get_kind_invalid,
466 .get_kind_pitch = gm20b_get_kind_pitch,
467 },
468 .therm = {
469 .init_therm_setup_hw = gm20b_init_therm_setup_hw,
470 .elcg_init_idle_filters = gk20a_elcg_init_idle_filters,
471 },
472 .pmu = {
473 .pmu_setup_elpg = gm20b_pmu_setup_elpg,
474 .pmu_get_queue_head = pwr_pmu_queue_head_r,
475 .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v,
476 .pmu_get_queue_tail = pwr_pmu_queue_tail_r,
477 .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v,
478 .pmu_queue_head = gk20a_pmu_queue_head,
479 .pmu_queue_tail = gk20a_pmu_queue_tail,
480 .pmu_msgq_tail = gk20a_pmu_msgq_tail,
481 .pmu_mutex_size = pwr_pmu_mutex__size_1_v,
482 .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
483 .pmu_mutex_release = gk20a_pmu_mutex_release,
484 .write_dmatrfbase = gm20b_write_dmatrfbase,
485 .pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
486 .pmu_pg_init_param = NULL,
487 .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
488 .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
489 .pmu_is_lpwr_feature_supported = NULL,
490 .pmu_lpwr_enable_pg = NULL,
491 .pmu_lpwr_disable_pg = NULL,
492 .pmu_pg_param_post_init = NULL,
493 .dump_secure_fuses = pmu_dump_security_fuses_gm20b,
494 .reset_engine = gk20a_pmu_engine_reset,
495 .is_engine_in_reset = gk20a_pmu_is_engine_in_reset,
496 },
497 .clk = {
498 .init_clk_support = gm20b_init_clk_support,
499 .suspend_clk_support = gm20b_suspend_clk_support,
500#ifdef CONFIG_DEBUG_FS
501 .init_debugfs = gm20b_clk_init_debugfs,
502#endif
503 .get_voltage = gm20b_clk_get_voltage,
504 .get_gpcclk_clock_counter = gm20b_clk_get_gpcclk_clock_counter,
505 .pll_reg_write = gm20b_clk_pll_reg_write,
506 .get_pll_debug_data = gm20b_clk_get_pll_debug_data,
507 },
508 .regops = {
509 .get_global_whitelist_ranges =
510 gm20b_get_global_whitelist_ranges,
511 .get_global_whitelist_ranges_count =
512 gm20b_get_global_whitelist_ranges_count,
513 .get_context_whitelist_ranges =
514 gm20b_get_context_whitelist_ranges,
515 .get_context_whitelist_ranges_count =
516 gm20b_get_context_whitelist_ranges_count,
517 .get_runcontrol_whitelist = gm20b_get_runcontrol_whitelist,
518 .get_runcontrol_whitelist_count =
519 gm20b_get_runcontrol_whitelist_count,
520 .get_runcontrol_whitelist_ranges =
521 gm20b_get_runcontrol_whitelist_ranges,
522 .get_runcontrol_whitelist_ranges_count =
523 gm20b_get_runcontrol_whitelist_ranges_count,
524 .get_qctl_whitelist = gm20b_get_qctl_whitelist,
525 .get_qctl_whitelist_count = gm20b_get_qctl_whitelist_count,
526 .get_qctl_whitelist_ranges = gm20b_get_qctl_whitelist_ranges,
527 .get_qctl_whitelist_ranges_count =
528 gm20b_get_qctl_whitelist_ranges_count,
529 .apply_smpc_war = gm20b_apply_smpc_war,
530 },
531 .mc = {
532 .intr_enable = mc_gk20a_intr_enable,
533 .intr_unit_config = mc_gk20a_intr_unit_config,
534 .isr_stall = mc_gk20a_isr_stall,
535 .intr_stall = mc_gk20a_intr_stall,
536 .intr_stall_pause = mc_gk20a_intr_stall_pause,
537 .intr_stall_resume = mc_gk20a_intr_stall_resume,
538 .intr_nonstall = mc_gk20a_intr_nonstall,
539 .intr_nonstall_pause = mc_gk20a_intr_nonstall_pause,
540 .intr_nonstall_resume = mc_gk20a_intr_nonstall_resume,
541 .enable = gk20a_mc_enable,
542 .disable = gk20a_mc_disable,
543 .reset = gk20a_mc_reset,
544 .boot_0 = gk20a_mc_boot_0,
545 .is_intr1_pending = mc_gk20a_is_intr1_pending,
546 },
547 .debug = {
548 .show_dump = gk20a_debug_show_dump,
549 },
550 .dbg_session_ops = {
551 .exec_reg_ops = exec_regops_gk20a,
552 .dbg_set_powergate = dbg_set_powergate,
553 .check_and_set_global_reservation =
554 nvgpu_check_and_set_global_reservation,
555 .check_and_set_context_reservation =
556 nvgpu_check_and_set_context_reservation,
557 .release_profiler_reservation =
558 nvgpu_release_profiler_reservation,
559 .perfbuffer_enable = gk20a_perfbuf_enable_locked,
560 .perfbuffer_disable = gk20a_perfbuf_disable_locked,
561 },
562 .bus = {
563 .init_hw = gk20a_bus_init_hw,
564 .isr = gk20a_bus_isr,
565 .read_ptimer = gk20a_read_ptimer,
566 .get_timestamps_zipper = nvgpu_get_timestamps_zipper,
567 .bar1_bind = gm20b_bus_bar1_bind,
568 },
569#if defined(CONFIG_GK20A_CYCLE_STATS)
570 .css = {
571 .enable_snapshot = css_hw_enable_snapshot,
572 .disable_snapshot = css_hw_disable_snapshot,
573 .check_data_available = css_hw_check_data_available,
574 .set_handled_snapshots = css_hw_set_handled_snapshots,
575 .allocate_perfmon_ids = css_gr_allocate_perfmon_ids,
576 .release_perfmon_ids = css_gr_release_perfmon_ids,
577 },
578#endif
579 .falcon = {
580 .falcon_hal_sw_init = gk20a_falcon_hal_sw_init,
581 },
582 .priv_ring = {
583 .isr = gk20a_priv_ring_isr,
584 },
585 .chip_init_gpu_characteristics = gk20a_init_gpu_characteristics,
586 .get_litter_value = gm20b_get_litter_value,
587};
588
589int gm20b_init_hal(struct gk20a *g)
590{
591 struct gpu_ops *gops = &g->ops;
592 u32 val;
593
594 gops->ltc = gm20b_ops.ltc;
595 gops->ce2 = gm20b_ops.ce2;
596 gops->gr = gm20b_ops.gr;
597 gops->fb = gm20b_ops.fb;
598 gops->clock_gating = gm20b_ops.clock_gating;
599 gops->fifo = gm20b_ops.fifo;
600 gops->gr_ctx = gm20b_ops.gr_ctx;
601 gops->mm = gm20b_ops.mm;
602 gops->therm = gm20b_ops.therm;
603 gops->pmu = gm20b_ops.pmu;
604 /*
605 * clk must be assigned member by member
606 * since some clk ops are assigned during probe prior to HAL init
607 */
608 gops->clk.init_clk_support = gm20b_ops.clk.init_clk_support;
609 gops->clk.suspend_clk_support = gm20b_ops.clk.suspend_clk_support;
610 gops->clk.get_voltage = gm20b_ops.clk.get_voltage;
611 gops->clk.get_gpcclk_clock_counter =
612 gm20b_ops.clk.get_gpcclk_clock_counter;
613 gops->clk.pll_reg_write = gm20b_ops.clk.pll_reg_write;
614 gops->clk.get_pll_debug_data = gm20b_ops.clk.get_pll_debug_data;
615
616 gops->regops = gm20b_ops.regops;
617 gops->mc = gm20b_ops.mc;
618 gops->dbg_session_ops = gm20b_ops.dbg_session_ops;
619 gops->debug = gm20b_ops.debug;
620 gops->bus = gm20b_ops.bus;
621#if defined(CONFIG_GK20A_CYCLE_STATS)
622 gops->css = gm20b_ops.css;
623#endif
624 gops->falcon = gm20b_ops.falcon;
625
626 gops->priv_ring = gm20b_ops.priv_ring;
627
628 /* Lone functions */
629 gops->chip_init_gpu_characteristics =
630 gm20b_ops.chip_init_gpu_characteristics;
631 gops->get_litter_value = gm20b_ops.get_litter_value;
632
633 __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
634 __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
635 __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
636
637#ifdef CONFIG_TEGRA_ACR
638 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
639 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
640 } else {
641 val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
642 if (!val) {
643 gk20a_dbg_info("priv security is disabled in HW");
644 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
645 } else {
646 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
647 }
648 }
649#else
650 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
651 gk20a_dbg_info("running ASIM with PRIV security disabled");
652 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
653 } else {
654 val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
655 if (!val) {
656 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
657 } else {
658 gk20a_dbg_info("priv security is not supported but enabled");
659 __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
660 return -EPERM;
661 }
662 }
663#endif
664
665 /* priv security dependent ops */
666 if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
667 /* Add in ops from gm20b acr */
668 gops->pmu.is_pmu_supported = gm20b_is_pmu_supported;
669 gops->pmu.prepare_ucode = prepare_ucode_blob;
670 gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
671 gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap;
672 gops->pmu.is_priv_load = gm20b_is_priv_load;
673 gops->pmu.get_wpr = gm20b_wpr_info;
674 gops->pmu.alloc_blob_space = gm20b_alloc_blob_space;
675 gops->pmu.pmu_populate_loader_cfg =
676 gm20b_pmu_populate_loader_cfg;
677 gops->pmu.flcn_populate_bl_dmem_desc =
678 gm20b_flcn_populate_bl_dmem_desc;
679 gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt;
680 gops->pmu.falcon_clear_halt_interrupt_status =
681 clear_halt_interrupt_status;
682 gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1;
683
684 gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
685 gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
686
687 gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
688 } else {
689 /* Inherit from gk20a */
690 gops->pmu.is_pmu_supported = gk20a_is_pmu_supported;
691 gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob;
692 gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
693 gops->pmu.pmu_nsbootstrap = pmu_bootstrap;
694
695 gops->pmu.load_lsfalcon_ucode = NULL;
696 gops->pmu.init_wpr_region = NULL;
697
698 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
699 }
700
701 __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
702 g->pmu_lsf_pmu_wpr_init_done = 0;
703 g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
704
705 g->name = "gm20b";
706
707 return 0;
708}
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.h b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h
new file mode 100644
index 00000000..22eae182
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h
@@ -0,0 +1,31 @@
1/*
2 * GM20B Graphics
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_HAL_GM20B_H
26#define _NVHOST_HAL_GM20B_H
27struct gk20a;
28
29int gm20b_init_hal(struct gk20a *g);
30int gm20b_get_litter_value(struct gk20a *g, int value);
31#endif
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
new file mode 100644
index 00000000..6ec9aec5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -0,0 +1,487 @@
1/*
2 * GM20B L2
3 *
4 * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <trace/events/gk20a.h>
26
27#include <nvgpu/timers.h>
28#include <nvgpu/enabled.h>
29#include <nvgpu/bug.h>
30#include <nvgpu/ltc.h>
31
32#include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
33#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
34#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
35#include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
36
37#include "gk20a/gk20a.h"
38
39#include "ltc_gm20b.h"
40
41int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
42{
43 /* max memory size (MB) to cover */
44 u32 max_size = gr->max_comptag_mem;
45 /* one tag line covers 128KB */
46 u32 max_comptag_lines = max_size << 3;
47
48 u32 hw_max_comptag_lines =
49 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
50
51 u32 cbc_param =
52 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
53 u32 comptags_per_cacheline =
54 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
55 u32 cacheline_size =
56 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
57 u32 slices_per_ltc =
58 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
59
60 u32 compbit_backing_size;
61
62 int err;
63
64 gk20a_dbg_fn("");
65
66 if (max_comptag_lines == 0)
67 return 0;
68
69 if (max_comptag_lines > hw_max_comptag_lines)
70 max_comptag_lines = hw_max_comptag_lines;
71
72 compbit_backing_size =
73 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
74 cacheline_size * slices_per_ltc * g->ltc_count;
75
76 /* aligned to 2KB * ltc_count */
77 compbit_backing_size +=
78 g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
79
80 /* must be a multiple of 64KB */
81 compbit_backing_size = roundup(compbit_backing_size, 64*1024);
82
83 max_comptag_lines =
84 (compbit_backing_size * comptags_per_cacheline) /
85 (cacheline_size * slices_per_ltc * g->ltc_count);
86
87 if (max_comptag_lines > hw_max_comptag_lines)
88 max_comptag_lines = hw_max_comptag_lines;
89
90 gk20a_dbg_info("compbit backing store size : %d",
91 compbit_backing_size);
92 gk20a_dbg_info("max comptag lines : %d",
93 max_comptag_lines);
94
95 err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size);
96 if (err)
97 return err;
98
99 err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines);
100 if (err)
101 return err;
102
103 gr->comptags_per_cacheline = comptags_per_cacheline;
104 gr->slices_per_ltc = slices_per_ltc;
105 gr->cacheline_size = cacheline_size;
106
107 return 0;
108}
109
110int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
111 u32 min, u32 max)
112{
113 struct gr_gk20a *gr = &g->gr;
114 struct nvgpu_timeout timeout;
115 int err = 0;
116 u32 ltc, slice, ctrl1, val, hw_op = 0;
117 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
118 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
119 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
120 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
121 const u32 max_lines = 16384;
122
123 gk20a_dbg_fn("");
124
125 trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max);
126
127 if (gr->compbit_store.mem.size == 0)
128 return 0;
129
130 while (1) {
131 const u32 iter_max = min(min + max_lines - 1, max);
132 bool full_cache_op = true;
133
134 nvgpu_mutex_acquire(&g->mm.l2_op_lock);
135
136 gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max);
137
138 if (op == gk20a_cbc_op_clear) {
139 gk20a_writel(
140 g, ltc_ltcs_ltss_cbc_ctrl2_r(),
141 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
142 min));
143 gk20a_writel(
144 g, ltc_ltcs_ltss_cbc_ctrl3_r(),
145 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
146 iter_max));
147 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
148 full_cache_op = false;
149 } else if (op == gk20a_cbc_op_clean) {
150 /* this is full-cache op */
151 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
152 } else if (op == gk20a_cbc_op_invalidate) {
153 /* this is full-cache op */
154 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
155 } else {
156 nvgpu_err(g, "Unknown op: %u", (unsigned)op);
157 err = -EINVAL;
158 goto out;
159 }
160 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
161 gk20a_readl(g,
162 ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
163
164 for (ltc = 0; ltc < g->ltc_count; ltc++) {
165 for (slice = 0; slice < slices_per_ltc; slice++) {
166
167 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
168 ltc * ltc_stride + slice * lts_stride;
169
170 nvgpu_timeout_init(g, &timeout, 2000,
171 NVGPU_TIMER_RETRY_TIMER);
172 do {
173 val = gk20a_readl(g, ctrl1);
174 if (!(val & hw_op))
175 break;
176 nvgpu_udelay(5);
177 } while (!nvgpu_timeout_expired(&timeout));
178
179 if (nvgpu_timeout_peek_expired(&timeout)) {
180 nvgpu_err(g, "comp tag clear timeout");
181 err = -EBUSY;
182 goto out;
183 }
184 }
185 }
186
187 /* are we done? */
188 if (full_cache_op || iter_max == max)
189 break;
190
191 /* note: iter_max is inclusive upper bound */
192 min = iter_max + 1;
193
194 /* give a chance for higher-priority threads to progress */
195 nvgpu_mutex_release(&g->mm.l2_op_lock);
196 }
197out:
198 trace_gk20a_ltc_cbc_ctrl_done(g->name);
199 nvgpu_mutex_release(&g->mm.l2_op_lock);
200 return err;
201}
202
203void gm20b_ltc_init_fs_state(struct gk20a *g)
204{
205 u32 reg;
206
207 gk20a_dbg_info("initialize gm20b l2");
208
209 g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r());
210 g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r());
211 gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count);
212
213 gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(),
214 g->ltc_count);
215 gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(),
216 g->ltc_count);
217
218 gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(),
219 gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) |
220 ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m());
221
222 /* Disable LTC interrupts */
223 reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
224 reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m();
225 reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m();
226 reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m();
227 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg);
228}
229
230void gm20b_ltc_isr(struct gk20a *g)
231{
232 u32 mc_intr, ltc_intr;
233 unsigned int ltc, slice;
234 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
235 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
236
237 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
238 nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
239 for (ltc = 0; ltc < g->ltc_count; ltc++) {
240 if ((mc_intr & 1 << ltc) == 0)
241 continue;
242 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
243 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
244 ltc_stride * ltc +
245 lts_stride * slice);
246 nvgpu_err(g, "ltc%d, slice %d: %08x",
247 ltc, slice, ltc_intr);
248 gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
249 ltc_stride * ltc +
250 lts_stride * slice,
251 ltc_intr);
252 }
253 }
254}
255
256u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
257{
258 u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
259 if (val == 2) {
260 return base * 2;
261 } else if (val != 1) {
262 nvgpu_err(g, "Invalid number of active ltcs: %08x", val);
263 }
264
265 return base;
266}
267
268/*
269 * Performs a full flush of the L2 cache.
270 */
271void gm20b_flush_ltc(struct gk20a *g)
272{
273 struct nvgpu_timeout timeout;
274 unsigned int ltc;
275 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
276
277 /* Clean... */
278 gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
279 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
280 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() |
281 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() |
282 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() |
283 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() |
284 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f());
285
286 /* Wait on each LTC individually. */
287 for (ltc = 0; ltc < g->ltc_count; ltc++) {
288 u32 op_pending;
289
290 /*
291 * Use 5ms - this should be sufficient time to flush the cache.
292 * On tegra, rough EMC BW available for old tegra chips (newer
293 * chips are strictly faster) can be estimated as follows:
294 *
295 * Lowest reasonable EMC clock speed will be around 102MHz on
296 * t124 for display enabled boards and generally fixed to max
297 * for non-display boards (since they are generally plugged in).
298 *
299 * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
300 * BW the GPU will likely get about half (display and overhead/
301 * utilization inefficiency eating the rest) so 650MB/s at
302 * worst. Assuming at most 1MB of GPU L2 cache (less for most
303 * chips) worst case is we take 1MB/650MB/s = 1.5ms.
304 *
305 * So 5ms timeout here should be more than sufficient.
306 */
307 nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
308
309 do {
310 int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
311 ltc * ltc_stride;
312 op_pending = gk20a_readl(g, cmgmt1);
313 } while ((op_pending &
314 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
315 !nvgpu_timeout_expired_msg(&timeout,
316 "L2 flush timeout!"));
317 }
318
319 /* And invalidate. */
320 gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(),
321 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() |
322 ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() |
323 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() |
324 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() |
325 ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f());
326
327 /* Wait on each LTC individually. */
328 for (ltc = 0; ltc < g->ltc_count; ltc++) {
329 u32 op_pending;
330
331 /* Again, 5ms. */
332 nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
333
334 do {
335 int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
336 ltc * ltc_stride;
337 op_pending = gk20a_readl(g, cmgmt0);
338 } while ((op_pending &
339 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
340 !nvgpu_timeout_expired_msg(&timeout,
341 "L2 flush timeout!"));
342 }
343}
344
345int gm20b_determine_L2_size_bytes(struct gk20a *g)
346{
347 u32 lts_per_ltc;
348 u32 ways;
349 u32 sets;
350 u32 bytes_per_line;
351 u32 active_ltcs;
352 u32 cache_size;
353
354 u32 tmp;
355 u32 active_sets_value;
356
357 tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
358 ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));
359
360 active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
361 if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
362 sets = 64;
363 } else if (active_sets_value ==
364 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
365 sets = 32;
366 } else if (active_sets_value ==
367 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
368 sets = 16;
369 } else {
370 nvgpu_err(g, "Unknown constant %u for active sets",
371 (unsigned)active_sets_value);
372 sets = 0;
373 }
374
375 active_ltcs = g->gr.num_fbps;
376
377 /* chip-specific values */
378 lts_per_ltc = 2;
379 bytes_per_line = 128;
380 cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;
381
382 return cache_size;
383}
384
385/*
386 * Sets the ZBC color for the passed index.
387 */
388void gm20b_ltc_set_zbc_color_entry(struct gk20a *g,
389 struct zbc_entry *color_val,
390 u32 index)
391{
392 u32 i;
393 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
394
395 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
396 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
397
398 for (i = 0;
399 i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) {
400 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
401 color_val->color_l2[i]);
402 }
403 gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r());
404}
405
406/*
407 * Sets the ZBC depth for the passed index.
408 */
409void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g,
410 struct zbc_entry *depth_val,
411 u32 index)
412{
413 u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
414
415 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
416 ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
417
418 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
419 depth_val->depth);
420
421 gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r());
422}
423
424void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
425{
426 u32 max_size = gr->max_comptag_mem;
427 u32 max_comptag_lines = max_size << 3;
428
429 u32 compbit_base_post_divide;
430 u64 compbit_base_post_multiply64;
431 u64 compbit_store_iova;
432 u64 compbit_base_post_divide64;
433
434 if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
435 compbit_store_iova = nvgpu_mem_get_phys_addr(g,
436 &gr->compbit_store.mem);
437 else
438 compbit_store_iova = nvgpu_mem_get_addr(g,
439 &gr->compbit_store.mem);
440
441 compbit_base_post_divide64 = compbit_store_iova >>
442 ltc_ltcs_ltss_cbc_base_alignment_shift_v();
443
444 do_div(compbit_base_post_divide64, g->ltc_count);
445 compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
446
447 compbit_base_post_multiply64 = ((u64)compbit_base_post_divide *
448 g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
449
450 if (compbit_base_post_multiply64 < compbit_store_iova)
451 compbit_base_post_divide++;
452
453 /* Bug 1477079 indicates sw adjustment on the posted divided base. */
454 if (g->ops.ltc.cbc_fix_config)
455 compbit_base_post_divide =
456 g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide);
457
458 gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
459 compbit_base_post_divide);
460
461 gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
462 "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n",
463 (u32)(compbit_store_iova >> 32),
464 (u32)(compbit_store_iova & 0xffffffff),
465 compbit_base_post_divide);
466
467 gr->compbit_store.base_hw = compbit_base_post_divide;
468
469 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate,
470 0, max_comptag_lines - 1);
471
472}
473
474void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled)
475{
476 u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
477 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
478
479 if (enabled)
480 /* bypass disabled (normal caching ops)*/
481 reg &= ~reg_f;
482 else
483 /* bypass enabled (no caching) */
484 reg |= reg_f;
485
486 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
487}
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
new file mode 100644
index 00000000..0f9145be
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
@@ -0,0 +1,49 @@
1/*
2 * GM20B L2
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_LTC
26#define _NVHOST_GM20B_LTC
27struct gpu_ops;
28
29int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
30int gm20b_determine_L2_size_bytes(struct gk20a *g);
31void gm20b_ltc_set_zbc_color_entry(struct gk20a *g,
32 struct zbc_entry *color_val,
33 u32 index);
34void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g,
35 struct zbc_entry *depth_val,
36 u32 index);
37void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr);
38void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled);
39void gm20b_ltc_init_fs_state(struct gk20a *g);
40int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
41 u32 min, u32 max);
42void gm20b_ltc_isr(struct gk20a *g);
43u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base);
44void gm20b_flush_ltc(struct gk20a *g);
45int gm20b_ltc_alloc_phys_cbc(struct gk20a *g,
46 size_t compbit_backing_size);
47int gm20b_ltc_alloc_virt_cbc(struct gk20a *g,
48 size_t compbit_backing_size);
49#endif
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
new file mode 100644
index 00000000..5cd7706d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -0,0 +1,86 @@
1/*
2 * GM20B MMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26
27#include "mm_gm20b.h"
28
29#include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
30#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
31
32void gm20b_mm_set_big_page_size(struct gk20a *g,
33 struct nvgpu_mem *mem, int size)
34{
35 u32 val;
36
37 gk20a_dbg_fn("");
38
39 gk20a_dbg_info("big page size %d\n", size);
40 val = nvgpu_mem_rd32(g, mem, ram_in_big_page_size_w());
41 val &= ~ram_in_big_page_size_m();
42
43 if (size == SZ_64K)
44 val |= ram_in_big_page_size_64kb_f();
45 else
46 val |= ram_in_big_page_size_128kb_f();
47
48 nvgpu_mem_wr32(g, mem, ram_in_big_page_size_w(), val);
49 gk20a_dbg_fn("done");
50}
51
52u32 gm20b_mm_get_big_page_sizes(void)
53{
54 return SZ_64K | SZ_128K;
55}
56
57u32 gm20b_mm_get_default_big_page_size(void)
58{
59 return SZ_128K;
60}
61
62bool gm20b_mm_support_sparse(struct gk20a *g)
63{
64 return true;
65}
66
67bool gm20b_mm_is_bar1_supported(struct gk20a *g)
68{
69 return true;
70}
71
72u64 gm20b_gpu_phys_addr(struct gk20a *g,
73 struct nvgpu_gmmu_attrs *attrs, u64 phys)
74{
75 return phys;
76}
77
78u32 gm20b_get_kind_invalid(void)
79{
80 return gmmu_pte_kind_invalid_v();
81}
82
83u32 gm20b_get_kind_pitch(void)
84{
85 return gmmu_pte_kind_pitch_v();
86}
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
new file mode 100644
index 00000000..af67845a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -0,0 +1,43 @@
1/*
2 * GM20B GMMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef _NVHOST_GM20B_MM
26#define _NVHOST_GM20B_MM
27struct gk20a;
28
29#define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1))
30#define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1))
31
32void gm20b_mm_set_big_page_size(struct gk20a *g,
33 struct nvgpu_mem *mem, int size);
34u32 gm20b_mm_get_big_page_sizes(void);
35u32 gm20b_mm_get_default_big_page_size(void);
36bool gm20b_mm_support_sparse(struct gk20a *g);
37bool gm20b_mm_is_bar1_supported(struct gk20a *g);
38int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
39u64 gm20b_gpu_phys_addr(struct gk20a *g,
40 struct nvgpu_gmmu_attrs *attrs, u64 phys);
41u32 gm20b_get_kind_invalid(void);
42u32 gm20b_get_kind_pitch(void);
43#endif
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
new file mode 100644
index 00000000..664134f9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -0,0 +1,283 @@
1/*
2 * GM20B PMU
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5*
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/timers.h>
26#include <nvgpu/pmu.h>
27#include <nvgpu/fuse.h>
28#include <nvgpu/enabled.h>
29
30#include "gk20a/gk20a.h"
31#include "gk20a/pmu_gk20a.h"
32
33#include "acr_gm20b.h"
34#include "pmu_gm20b.h"
35
36#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
37#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
38#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
39
40/*!
41 * Structure/object which single register write need to be done during PG init
42 * sequence to set PROD values.
43 */
44struct pg_init_sequence_list {
45 u32 regaddr;
46 u32 writeval;
47};
48
49#define gm20b_dbg_pmu(fmt, arg...) \
50 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
51
52
53/* PROD settings for ELPG sequencing registers*/
54static struct pg_init_sequence_list _pginitseq_gm20b[] = {
55 { 0x0010ab10, 0x8180},
56 { 0x0010e118, 0x83828180},
57 { 0x0010e068, 0},
58 { 0x0010e06c, 0x00000080},
59 { 0x0010e06c, 0x00000081},
60 { 0x0010e06c, 0x00000082},
61 { 0x0010e06c, 0x00000083},
62 { 0x0010e06c, 0x00000084},
63 { 0x0010e06c, 0x00000085},
64 { 0x0010e06c, 0x00000086},
65 { 0x0010e06c, 0x00000087},
66 { 0x0010e06c, 0x00000088},
67 { 0x0010e06c, 0x00000089},
68 { 0x0010e06c, 0x0000008a},
69 { 0x0010e06c, 0x0000008b},
70 { 0x0010e06c, 0x0000008c},
71 { 0x0010e06c, 0x0000008d},
72 { 0x0010e06c, 0x0000008e},
73 { 0x0010e06c, 0x0000008f},
74 { 0x0010e06c, 0x00000090},
75 { 0x0010e06c, 0x00000091},
76 { 0x0010e06c, 0x00000092},
77 { 0x0010e06c, 0x00000093},
78 { 0x0010e06c, 0x00000094},
79 { 0x0010e06c, 0x00000095},
80 { 0x0010e06c, 0x00000096},
81 { 0x0010e06c, 0x00000097},
82 { 0x0010e06c, 0x00000098},
83 { 0x0010e06c, 0x00000099},
84 { 0x0010e06c, 0x0000009a},
85 { 0x0010e06c, 0x0000009b},
86 { 0x0010ab14, 0x00000000},
87 { 0x0010ab18, 0x00000000},
88 { 0x0010e024, 0x00000000},
89 { 0x0010e028, 0x00000000},
90 { 0x0010e11c, 0x00000000},
91 { 0x0010e120, 0x00000000},
92 { 0x0010ab1c, 0x02010155},
93 { 0x0010e020, 0x001b1b55},
94 { 0x0010e124, 0x01030355},
95 { 0x0010ab20, 0x89abcdef},
96 { 0x0010ab24, 0x00000000},
97 { 0x0010e02c, 0x89abcdef},
98 { 0x0010e030, 0x00000000},
99 { 0x0010e128, 0x89abcdef},
100 { 0x0010e12c, 0x00000000},
101 { 0x0010ab28, 0x74444444},
102 { 0x0010ab2c, 0x70000000},
103 { 0x0010e034, 0x74444444},
104 { 0x0010e038, 0x70000000},
105 { 0x0010e130, 0x74444444},
106 { 0x0010e134, 0x70000000},
107 { 0x0010ab30, 0x00000000},
108 { 0x0010ab34, 0x00000001},
109 { 0x00020004, 0x00000000},
110 { 0x0010e138, 0x00000000},
111 { 0x0010e040, 0x00000000},
112};
113
114int gm20b_pmu_setup_elpg(struct gk20a *g)
115{
116 int ret = 0;
117 u32 reg_writes;
118 u32 index;
119
120 gk20a_dbg_fn("");
121
122 if (g->elpg_enabled) {
123 reg_writes = ((sizeof(_pginitseq_gm20b) /
124 sizeof((_pginitseq_gm20b)[0])));
125 /* Initialize registers with production values*/
126 for (index = 0; index < reg_writes; index++) {
127 gk20a_writel(g, _pginitseq_gm20b[index].regaddr,
128 _pginitseq_gm20b[index].writeval);
129 }
130 }
131
132 gk20a_dbg_fn("done");
133 return ret;
134}
135
136static void pmu_handle_acr_init_wpr_msg(struct gk20a *g, struct pmu_msg *msg,
137 void *param, u32 handle, u32 status)
138{
139 gk20a_dbg_fn("");
140
141 gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_INIT_WPR_REGION");
142
143 if (msg->msg.acr.acrmsg.errorcode == PMU_ACR_SUCCESS)
144 g->pmu_lsf_pmu_wpr_init_done = 1;
145 gk20a_dbg_fn("done");
146}
147
148
149int gm20b_pmu_init_acr(struct gk20a *g)
150{
151 struct nvgpu_pmu *pmu = &g->pmu;
152 struct pmu_cmd cmd;
153 u32 seq;
154
155 gk20a_dbg_fn("");
156
157 /* init ACR */
158 memset(&cmd, 0, sizeof(struct pmu_cmd));
159 cmd.hdr.unit_id = PMU_UNIT_ACR;
160 cmd.hdr.size = PMU_CMD_HDR_SIZE +
161 sizeof(struct pmu_acr_cmd_init_wpr_details);
162 cmd.cmd.acr.init_wpr.cmd_type = PMU_ACR_CMD_ID_INIT_WPR_REGION;
163 cmd.cmd.acr.init_wpr.regionid = 0x01;
164 cmd.cmd.acr.init_wpr.wproffset = 0x00;
165 gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_INIT_WPR_REGION");
166 nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
167 pmu_handle_acr_init_wpr_msg, pmu, &seq, ~0);
168
169 gk20a_dbg_fn("done");
170 return 0;
171}
172
173void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
174 void *param, u32 handle, u32 status)
175{
176
177 gk20a_dbg_fn("");
178
179
180 gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON");
181
182 gm20b_dbg_pmu("response code = %x\n", msg->msg.acr.acrmsg.falconid);
183 g->pmu_lsf_loaded_falcon_id = msg->msg.acr.acrmsg.falconid;
184 gk20a_dbg_fn("done");
185}
186
187static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout_ms,
188 u32 val)
189{
190 unsigned long delay = GR_FECS_POLL_INTERVAL;
191 u32 reg;
192 struct nvgpu_timeout timeout;
193
194 gk20a_dbg_fn("");
195 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
196
197 nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
198
199 do {
200 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
201 if (reg == val)
202 return 0;
203 nvgpu_udelay(delay);
204 } while (!nvgpu_timeout_expired(&timeout));
205
206 return -ETIMEDOUT;
207}
208
209void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags)
210{
211 struct nvgpu_pmu *pmu = &g->pmu;
212 struct pmu_cmd cmd;
213 u32 seq;
214
215 gk20a_dbg_fn("");
216
217 gm20b_dbg_pmu("wprinit status = %x\n", g->pmu_lsf_pmu_wpr_init_done);
218 if (g->pmu_lsf_pmu_wpr_init_done) {
219 /* send message to load FECS falcon */
220 memset(&cmd, 0, sizeof(struct pmu_cmd));
221 cmd.hdr.unit_id = PMU_UNIT_ACR;
222 cmd.hdr.size = PMU_CMD_HDR_SIZE +
223 sizeof(struct pmu_acr_cmd_bootstrap_falcon);
224 cmd.cmd.acr.bootstrap_falcon.cmd_type =
225 PMU_ACR_CMD_ID_BOOTSTRAP_FALCON;
226 cmd.cmd.acr.bootstrap_falcon.flags = flags;
227 cmd.cmd.acr.bootstrap_falcon.falconid = falcon_id;
228 gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_BOOTSTRAP_FALCON: %x\n",
229 falcon_id);
230 nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
231 pmu_handle_fecs_boot_acr_msg, pmu, &seq, ~0);
232 }
233
234 gk20a_dbg_fn("done");
235 return;
236}
237
238int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
239{
240 u32 err = 0;
241 u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
242 unsigned long timeout = gk20a_get_gr_idle_timeout(g);
243
244 /* GM20B PMU supports loading FECS only */
245 if (!(falconidmask == (1 << LSF_FALCON_ID_FECS)))
246 return -EINVAL;
247 /* check whether pmu is ready to bootstrap lsf if not wait for it */
248 if (!g->pmu_lsf_pmu_wpr_init_done) {
249 pmu_wait_message_cond(&g->pmu,
250 gk20a_get_gr_idle_timeout(g),
251 &g->pmu_lsf_pmu_wpr_init_done, 1);
252 /* check again if it still not ready indicate an error */
253 if (!g->pmu_lsf_pmu_wpr_init_done) {
254 nvgpu_err(g, "PMU not ready to load LSF");
255 return -ETIMEDOUT;
256 }
257 }
258 /* load FECS */
259 gk20a_writel(g,
260 gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
261 gm20b_pmu_load_lsf(g, LSF_FALCON_ID_FECS, flags);
262 err = pmu_gm20b_ctx_wait_lsf_ready(g, timeout,
263 0x55AA55AA);
264 return err;
265}
266
267void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr)
268{
269 gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr);
270}
271
272/*Dump Security related fuses*/
273void pmu_dump_security_fuses_gm20b(struct gk20a *g)
274{
275 u32 val;
276
277 nvgpu_err(g, "FUSE_OPT_SEC_DEBUG_EN_0: 0x%x",
278 gk20a_readl(g, fuse_opt_sec_debug_en_r()));
279 nvgpu_err(g, "FUSE_OPT_PRIV_SEC_EN_0: 0x%x",
280 gk20a_readl(g, fuse_opt_priv_sec_en_r()));
281 nvgpu_tegra_fuse_read_gcplex_config_fuse(g, &val);
282 nvgpu_err(g, "FUSE_GCPLEX_CONFIG_FUSE_0: 0x%x", val);
283}
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
new file mode 100644
index 00000000..ec50fb06
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -0,0 +1,37 @@
1/*
2 * GM20B PMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef __PMU_GM20B_H_
26#define __PMU_GM20B_H_
27
28struct gk20a;
29
30int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
31int gm20b_pmu_setup_elpg(struct gk20a *g);
32void pmu_dump_security_fuses_gm20b(struct gk20a *g);
33void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags);
34int gm20b_pmu_init_acr(struct gk20a *g);
35void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr);
36
37#endif /*__PMU_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.c b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c
new file mode 100644
index 00000000..79c980f4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c
@@ -0,0 +1,450 @@
1/*
2 * Tegra GK20A GPU Debugger Driver Register Ops
3 *
4 * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26#include "gk20a/dbg_gpu_gk20a.h"
27#include "gk20a/regops_gk20a.h"
28#include "regops_gm20b.h"
29
30#include <nvgpu/bsearch.h>
31
32static const struct regop_offset_range gm20b_global_whitelist_ranges[] = {
33 { 0x00001a00, 3 },
34 { 0x0000259c, 1 },
35 { 0x0000280c, 1 },
36 { 0x00009400, 1 },
37 { 0x00009410, 1 },
38 { 0x00021970, 1 },
39 { 0x00021c00, 4 },
40 { 0x00021c14, 3 },
41 { 0x00021c24, 1 },
42 { 0x00021c2c, 5 },
43 { 0x00021cb8, 2 },
44 { 0x00021d38, 2 },
45 { 0x00021d44, 1 },
46 { 0x00021d4c, 1 },
47 { 0x00021d54, 1 },
48 { 0x00021d5c, 1 },
49 { 0x00021d64, 2 },
50 { 0x00021d70, 1 },
51 { 0x00022430, 7 },
52 { 0x00100c18, 3 },
53 { 0x00100c84, 1 },
54 { 0x00100cc4, 1 },
55 { 0x00106640, 1 },
56 { 0x0010a0a8, 1 },
57 { 0x0010a4f0, 1 },
58 { 0x0010e064, 1 },
59 { 0x0010e164, 1 },
60 { 0x0010e490, 1 },
61 { 0x00140028, 1 },
62 { 0x00140350, 1 },
63 { 0x00140550, 1 },
64 { 0x00142028, 1 },
65 { 0x00142350, 1 },
66 { 0x00142550, 1 },
67 { 0x0017e028, 1 },
68 { 0x0017e350, 1 },
69 { 0x0017e550, 1 },
70 { 0x00180040, 52 },
71 { 0x00180240, 52 },
72 { 0x00180440, 52 },
73 { 0x001a0040, 52 },
74 { 0x001b0040, 52 },
75 { 0x001b0240, 52 },
76 { 0x001b0440, 52 },
77 { 0x001b0640, 52 },
78 { 0x001b4000, 3 },
79 { 0x001b4010, 3 },
80 { 0x001b4020, 3 },
81 { 0x001b4030, 3 },
82 { 0x001b4040, 3 },
83 { 0x001b4050, 3 },
84 { 0x001b4060, 4 },
85 { 0x001b4074, 11 },
86 { 0x001b40a4, 1 },
87 { 0x001b4100, 6 },
88 { 0x001b4124, 2 },
89 { 0x001b8000, 7 },
90 { 0x001bc000, 7 },
91 { 0x001be000, 7 },
92 { 0x00400500, 1 },
93 { 0x00400700, 1 },
94 { 0x0040415c, 1 },
95 { 0x00405850, 1 },
96 { 0x00405908, 1 },
97 { 0x00405b40, 1 },
98 { 0x00405b50, 1 },
99 { 0x00406024, 1 },
100 { 0x00407010, 1 },
101 { 0x00407808, 1 },
102 { 0x0040803c, 1 },
103 { 0x0040880c, 1 },
104 { 0x00408910, 1 },
105 { 0x00408984, 1 },
106 { 0x004090a8, 1 },
107 { 0x004098a0, 1 },
108 { 0x00409b00, 1 },
109 { 0x0041000c, 1 },
110 { 0x00410110, 1 },
111 { 0x00410184, 1 },
112 { 0x0041040c, 1 },
113 { 0x00410510, 1 },
114 { 0x00410584, 1 },
115 { 0x00418384, 1 },
116 { 0x004184a0, 1 },
117 { 0x00418604, 1 },
118 { 0x00418680, 1 },
119 { 0x00418714, 1 },
120 { 0x0041881c, 1 },
121 { 0x00418884, 1 },
122 { 0x004188b0, 1 },
123 { 0x004188c8, 2 },
124 { 0x00418b04, 1 },
125 { 0x00418c04, 1 },
126 { 0x00418c1c, 1 },
127 { 0x00418c88, 1 },
128 { 0x00418d00, 1 },
129 { 0x00418e08, 1 },
130 { 0x00418f08, 1 },
131 { 0x00419000, 1 },
132 { 0x0041900c, 1 },
133 { 0x00419018, 1 },
134 { 0x00419854, 1 },
135 { 0x00419ab0, 1 },
136 { 0x00419ab8, 3 },
137 { 0x00419c0c, 1 },
138 { 0x00419c90, 1 },
139 { 0x00419d08, 2 },
140 { 0x00419e00, 4 },
141 { 0x00419e24, 2 },
142 { 0x00419e44, 11 },
143 { 0x00419e74, 9 },
144 { 0x00419ea4, 1 },
145 { 0x00419eb0, 1 },
146 { 0x00419ef0, 26 },
147 { 0x0041a0a0, 1 },
148 { 0x0041a0a8, 1 },
149 { 0x0041a17c, 1 },
150 { 0x0041a890, 2 },
151 { 0x0041a8a0, 3 },
152 { 0x0041a8b0, 2 },
153 { 0x0041b014, 1 },
154 { 0x0041b0a0, 1 },
155 { 0x0041b0cc, 1 },
156 { 0x0041b0e8, 2 },
157 { 0x0041b1dc, 1 },
158 { 0x0041be14, 1 },
159 { 0x0041bea0, 1 },
160 { 0x0041becc, 1 },
161 { 0x0041bee8, 2 },
162 { 0x0041bfdc, 1 },
163 { 0x0041c054, 1 },
164 { 0x0041c2b0, 1 },
165 { 0x0041c2b8, 3 },
166 { 0x0041c40c, 1 },
167 { 0x0041c490, 1 },
168 { 0x0041c508, 2 },
169 { 0x0041c600, 4 },
170 { 0x0041c624, 2 },
171 { 0x0041c644, 11 },
172 { 0x0041c674, 9 },
173 { 0x0041c6a4, 1 },
174 { 0x0041c6b0, 1 },
175 { 0x0041c6f0, 26 },
176 { 0x0041c854, 1 },
177 { 0x0041cab0, 1 },
178 { 0x0041cab8, 3 },
179 { 0x0041cc0c, 1 },
180 { 0x0041cc90, 1 },
181 { 0x0041cd08, 2 },
182 { 0x0041ce00, 4 },
183 { 0x0041ce24, 2 },
184 { 0x0041ce44, 11 },
185 { 0x0041ce74, 9 },
186 { 0x0041cea4, 1 },
187 { 0x0041ceb0, 1 },
188 { 0x0041cef0, 26 },
189 { 0x00500384, 1 },
190 { 0x005004a0, 1 },
191 { 0x00500604, 1 },
192 { 0x00500680, 1 },
193 { 0x00500714, 1 },
194 { 0x0050081c, 1 },
195 { 0x00500884, 1 },
196 { 0x005008c8, 2 },
197 { 0x00500b04, 1 },
198 { 0x00500c04, 1 },
199 { 0x00500c88, 1 },
200 { 0x00500d00, 1 },
201 { 0x00500e08, 1 },
202 { 0x00500f08, 1 },
203 { 0x00501000, 1 },
204 { 0x0050100c, 1 },
205 { 0x00501018, 1 },
206 { 0x00501854, 1 },
207 { 0x00501ab0, 1 },
208 { 0x00501ab8, 3 },
209 { 0x00501c0c, 1 },
210 { 0x00501c90, 1 },
211 { 0x00501d08, 2 },
212 { 0x00501e00, 4 },
213 { 0x00501e24, 2 },
214 { 0x00501e44, 11 },
215 { 0x00501e74, 9 },
216 { 0x00501ea4, 1 },
217 { 0x00501eb0, 1 },
218 { 0x00501ef0, 26 },
219 { 0x005020a0, 1 },
220 { 0x005020a8, 1 },
221 { 0x0050217c, 1 },
222 { 0x00502890, 2 },
223 { 0x005028a0, 3 },
224 { 0x005028b0, 2 },
225 { 0x00503014, 1 },
226 { 0x005030a0, 1 },
227 { 0x005030cc, 1 },
228 { 0x005030e8, 2 },
229 { 0x005031dc, 1 },
230 { 0x00503e14, 1 },
231 { 0x00503ea0, 1 },
232 { 0x00503ecc, 1 },
233 { 0x00503ee8, 2 },
234 { 0x00503fdc, 1 },
235 { 0x00504054, 1 },
236 { 0x005042b0, 1 },
237 { 0x005042b8, 3 },
238 { 0x0050440c, 1 },
239 { 0x00504490, 1 },
240 { 0x00504508, 2 },
241 { 0x00504600, 4 },
242 { 0x00504614, 6 },
243 { 0x00504634, 2 },
244 { 0x00504644, 11 },
245 { 0x00504674, 9 },
246 { 0x005046a4, 1 },
247 { 0x005046b0, 1 },
248 { 0x005046f0, 28 },
249 { 0x00504854, 1 },
250 { 0x00504ab0, 1 },
251 { 0x00504ab8, 3 },
252 { 0x00504c0c, 1 },
253 { 0x00504c90, 1 },
254 { 0x00504d08, 2 },
255 { 0x00504e00, 4 },
256 { 0x00504e14, 6 },
257 { 0x00504e34, 2 },
258 { 0x00504e44, 11 },
259 { 0x00504e74, 9 },
260 { 0x00504ea4, 1 },
261 { 0x00504eb0, 1 },
262 { 0x00504ef0, 28 },
263};
264static const u32 gm20b_global_whitelist_ranges_count =
265 ARRAY_SIZE(gm20b_global_whitelist_ranges);
266
267/* context */
268
269static const struct regop_offset_range gm20b_context_whitelist_ranges[] = {
270 { 0x0000259c, 1 },
271 { 0x0000280c, 1 },
272 { 0x00400500, 1 },
273 { 0x00405b40, 1 },
274 { 0x00418e00, 1 },
275 { 0x00418e34, 1 },
276 { 0x00418e40, 2 },
277 { 0x00418e58, 2 },
278 { 0x00419000, 1 },
279 { 0x00419864, 1 },
280 { 0x00419c90, 1 },
281 { 0x00419d08, 2 },
282 { 0x00419e04, 3 },
283 { 0x00419e24, 2 },
284 { 0x00419e44, 11 },
285 { 0x00419e74, 10 },
286 { 0x00419ea4, 1 },
287 { 0x00419eac, 2 },
288 { 0x00419ee8, 1 },
289 { 0x00419ef0, 26 },
290 { 0x0041b0e8, 2 },
291 { 0x0041bee8, 2 },
292 { 0x0041c490, 1 },
293 { 0x0041c508, 2 },
294 { 0x0041c604, 3 },
295 { 0x0041c624, 2 },
296 { 0x0041c644, 11 },
297 { 0x0041c674, 10 },
298 { 0x0041c6a4, 1 },
299 { 0x0041c6ac, 2 },
300 { 0x0041c6e8, 1 },
301 { 0x0041c6f0, 26 },
302 { 0x0041cc90, 1 },
303 { 0x0041cd08, 2 },
304 { 0x0041ce04, 3 },
305 { 0x0041ce24, 2 },
306 { 0x0041ce44, 11 },
307 { 0x0041ce74, 10 },
308 { 0x0041cea4, 1 },
309 { 0x0041ceac, 2 },
310 { 0x0041cee8, 1 },
311 { 0x0041cef0, 26 },
312 { 0x00501000, 1 },
313 { 0x00501c90, 1 },
314 { 0x00501d08, 2 },
315 { 0x00501e04, 3 },
316 { 0x00501e24, 2 },
317 { 0x00501e44, 11 },
318 { 0x00501e74, 10 },
319 { 0x00501ea4, 1 },
320 { 0x00501eac, 2 },
321 { 0x00501ee8, 1 },
322 { 0x00501ef0, 26 },
323 { 0x005030e8, 2 },
324 { 0x00503ee8, 2 },
325 { 0x00504490, 1 },
326 { 0x00504508, 2 },
327 { 0x00504604, 3 },
328 { 0x00504614, 6 },
329 { 0x00504634, 2 },
330 { 0x00504644, 11 },
331 { 0x00504674, 10 },
332 { 0x005046a4, 1 },
333 { 0x005046ac, 2 },
334 { 0x005046e8, 1 },
335 { 0x005046f0, 28 },
336 { 0x00504c90, 1 },
337 { 0x00504d08, 2 },
338 { 0x00504e04, 3 },
339 { 0x00504e14, 6 },
340 { 0x00504e34, 2 },
341 { 0x00504e44, 11 },
342 { 0x00504e74, 10 },
343 { 0x00504ea4, 1 },
344 { 0x00504eac, 2 },
345 { 0x00504ee8, 1 },
346 { 0x00504ef0, 28 },
347};
348static const u32 gm20b_context_whitelist_ranges_count =
349 ARRAY_SIZE(gm20b_context_whitelist_ranges);
350
351/* runcontrol */
352static const u32 gm20b_runcontrol_whitelist[] = {
353 0x00419e10,
354 0x0041c610,
355 0x0041ce10,
356 0x00501e10,
357 0x00504610,
358 0x00504e10,
359};
360static const u32 gm20b_runcontrol_whitelist_count =
361 ARRAY_SIZE(gm20b_runcontrol_whitelist);
362
363static const struct regop_offset_range gm20b_runcontrol_whitelist_ranges[] = {
364 { 0x00419e10, 1 },
365 { 0x0041c610, 1 },
366 { 0x0041ce10, 1 },
367 { 0x00501e10, 1 },
368 { 0x00504610, 1 },
369 { 0x00504e10, 1 },
370};
371static const u32 gm20b_runcontrol_whitelist_ranges_count =
372 ARRAY_SIZE(gm20b_runcontrol_whitelist_ranges);
373
374
375/* quad ctl */
376static const u32 gm20b_qctl_whitelist[] = {
377};
378static const u32 gm20b_qctl_whitelist_count =
379 ARRAY_SIZE(gm20b_qctl_whitelist);
380
381static const struct regop_offset_range gm20b_qctl_whitelist_ranges[] = {
382};
383static const u32 gm20b_qctl_whitelist_ranges_count =
384 ARRAY_SIZE(gm20b_qctl_whitelist_ranges);
385
386const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void)
387{
388 return gm20b_global_whitelist_ranges;
389}
390
391int gm20b_get_global_whitelist_ranges_count(void)
392{
393 return gm20b_global_whitelist_ranges_count;
394}
395
396const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void)
397{
398 return gm20b_context_whitelist_ranges;
399}
400
401int gm20b_get_context_whitelist_ranges_count(void)
402{
403 return gm20b_context_whitelist_ranges_count;
404}
405
406const u32 *gm20b_get_runcontrol_whitelist(void)
407{
408 return gm20b_runcontrol_whitelist;
409}
410
411int gm20b_get_runcontrol_whitelist_count(void)
412{
413 return gm20b_runcontrol_whitelist_count;
414}
415
416const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void)
417{
418 return gm20b_runcontrol_whitelist_ranges;
419}
420
421int gm20b_get_runcontrol_whitelist_ranges_count(void)
422{
423 return gm20b_runcontrol_whitelist_ranges_count;
424}
425
426const u32 *gm20b_get_qctl_whitelist(void)
427{
428 return gm20b_qctl_whitelist;
429}
430
431int gm20b_get_qctl_whitelist_count(void)
432{
433 return gm20b_qctl_whitelist_count;
434}
435
436const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void)
437{
438 return gm20b_qctl_whitelist_ranges;
439}
440
441int gm20b_get_qctl_whitelist_ranges_count(void)
442{
443 return gm20b_qctl_whitelist_ranges_count;
444}
445
446int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s)
447{
448 /* Not needed on gm20b */
449 return 0;
450}
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.h b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h
new file mode 100644
index 00000000..f0246e0e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h
@@ -0,0 +1,44 @@
1/*
2 *
3 * Tegra GK20A GPU Debugger Driver Register Ops
4 *
5 * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25#ifndef __REGOPS_GM20B_H_
26#define __REGOPS_GM20B_H_
27
28struct dbg_session_gk20a;
29
30const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void);
31int gm20b_get_global_whitelist_ranges_count(void);
32const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void);
33int gm20b_get_context_whitelist_ranges_count(void);
34const u32 *gm20b_get_runcontrol_whitelist(void);
35int gm20b_get_runcontrol_whitelist_count(void);
36const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void);
37int gm20b_get_runcontrol_whitelist_ranges_count(void);
38const u32 *gm20b_get_qctl_whitelist(void);
39int gm20b_get_qctl_whitelist_count(void);
40const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void);
41int gm20b_get_qctl_whitelist_ranges_count(void);
42int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s);
43
44#endif /* __REGOPS_GM20B_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.c b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c
new file mode 100644
index 00000000..ce4d4fab
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c
@@ -0,0 +1,78 @@
1/*
2 * GM20B THERMAL
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include "gk20a/gk20a.h"
26
27#include "therm_gm20b.h"
28
29#include <nvgpu/hw/gm20b/hw_therm_gm20b.h>
30
31int gm20b_init_therm_setup_hw(struct gk20a *g)
32{
33 u32 v;
34
35 gk20a_dbg_fn("");
36
37 /* program NV_THERM registers */
38 gk20a_writel(g, therm_use_a_r(), therm_use_a_ext_therm_0_enable_f() |
39 therm_use_a_ext_therm_1_enable_f() |
40 therm_use_a_ext_therm_2_enable_f());
41 gk20a_writel(g, therm_evt_ext_therm_0_r(),
42 therm_evt_ext_therm_0_slow_factor_f(0x2));
43 gk20a_writel(g, therm_evt_ext_therm_1_r(),
44 therm_evt_ext_therm_1_slow_factor_f(0x6));
45 gk20a_writel(g, therm_evt_ext_therm_2_r(),
46 therm_evt_ext_therm_2_slow_factor_f(0xe));
47
48 gk20a_writel(g, therm_grad_stepping_table_r(0),
49 therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) |
50 therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) |
51 therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) |
52 therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
53 therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
54 gk20a_writel(g, therm_grad_stepping_table_r(1),
55 therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
56 therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
57 therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
58 therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
59 therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
60
61 v = gk20a_readl(g, therm_clk_timing_r(0));
62 v |= therm_clk_timing_grad_slowdown_enabled_f();
63 gk20a_writel(g, therm_clk_timing_r(0), v);
64
65 v = gk20a_readl(g, therm_config2_r());
66 v |= therm_config2_grad_enable_f(1);
67 v |= therm_config2_slowdown_factor_extended_f(1);
68 gk20a_writel(g, therm_config2_r(), v);
69
70 gk20a_writel(g, therm_grad_stepping1_r(),
71 therm_grad_stepping1_pdiv_duration_f(32));
72
73 v = gk20a_readl(g, therm_grad_stepping0_r());
74 v |= therm_grad_stepping0_feature_enable_f();
75 gk20a_writel(g, therm_grad_stepping0_r(), v);
76
77 return 0;
78}
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.h b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h
new file mode 100644
index 00000000..df0b4219
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h
@@ -0,0 +1,30 @@
1/*
2 * GM20B THERMAL
3 *
4 * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef THERM_GM20B_H
25#define THERM_GM20B_H
26
27struct gk20a;
28int gm20b_init_therm_setup_hw(struct gk20a *g);
29
30#endif /* THERM_GM20B_H */