diff options
author | Bo Yan <byan@nvidia.com> | 2014-04-14 15:03:27 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:38 -0400 |
commit | 9eb1f57ba2b7072c6c53ca9dc59b10a87d88db23 (patch) | |
tree | de095fcbae617e938064e45a96b4e176015fe773 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |
parent | 2531751f53fc21bdeb0ece4af550ea1e8efb9653 (diff) |
gpu: nvgpu: Add GPU driver for GM20B
this moves GM20B driver to the new location
Change-Id: I5fde14e114a8db79738a4c61849912b1ae225fb5
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c new file mode 100644 index 00000000..54184766 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -0,0 +1,593 @@ | |||
1 | /* | ||
2 | * GM20B GPC MMU | ||
3 | * | ||
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | |||
18 | #include "gk20a/gk20a.h" | ||
19 | #include "gk20a/gr_gk20a.h" | ||
20 | |||
21 | #include "gr_gm20b.h" | ||
22 | #include "hw_gr_gm20b.h" | ||
23 | #include "hw_fb_gm20b.h" | ||
24 | #include "hw_proj_gm20b.h" | ||
25 | #include "hw_ctxsw_prog_gm20b.h" | ||
26 | |||
27 | static void gr_gm20b_init_gpc_mmu(struct gk20a *g) | ||
28 | { | ||
29 | u32 temp; | ||
30 | |||
31 | gk20a_dbg_info("initialize gpc mmu"); | ||
32 | |||
33 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
34 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
35 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
36 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
37 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
38 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
39 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
40 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
41 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
42 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); | ||
43 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
44 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
45 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
46 | |||
47 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
48 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
49 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
50 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
51 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
52 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
53 | |||
54 | gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), | ||
55 | gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); | ||
56 | } | ||
57 | |||
58 | static void gr_gm20b_bundle_cb_defaults(struct gk20a *g) | ||
59 | { | ||
60 | struct gr_gk20a *gr = &g->gr; | ||
61 | |||
62 | gr->bundle_cb_default_size = | ||
63 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
64 | gr->min_gpm_fifo_depth = | ||
65 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
66 | gr->bundle_cb_token_limit = | ||
67 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
68 | } | ||
69 | |||
70 | static void gr_gm20b_cb_size_default(struct gk20a *g) | ||
71 | { | ||
72 | struct gr_gk20a *gr = &g->gr; | ||
73 | |||
74 | gr->attrib_cb_default_size = | ||
75 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
76 | gr->alpha_cb_default_size = | ||
77 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
78 | } | ||
79 | |||
80 | static int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) | ||
81 | { | ||
82 | struct gr_gk20a *gr = &g->gr; | ||
83 | int size; | ||
84 | |||
85 | gr->attrib_cb_size = gr->attrib_cb_default_size | ||
86 | + (gr->attrib_cb_default_size >> 1); | ||
87 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
88 | + (gr->alpha_cb_default_size >> 1); | ||
89 | |||
90 | size = gr->attrib_cb_size * | ||
91 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() * | ||
92 | gr->max_tpc_count; | ||
93 | |||
94 | size += gr->alpha_cb_size * | ||
95 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() * | ||
96 | gr->max_tpc_count; | ||
97 | |||
98 | return size; | ||
99 | } | ||
100 | |||
101 | static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g, | ||
102 | struct channel_ctx_gk20a *ch_ctx, | ||
103 | u64 addr, bool patch) | ||
104 | { | ||
105 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
106 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
107 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
108 | |||
109 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
110 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
111 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
112 | |||
113 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | ||
114 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | ||
115 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | ||
116 | } | ||
117 | |||
118 | static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | ||
119 | struct channel_ctx_gk20a *ch_ctx, | ||
120 | u64 addr, u64 size, bool patch) | ||
121 | { | ||
122 | u32 data; | ||
123 | |||
124 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
125 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
126 | |||
127 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
128 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
129 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
130 | |||
131 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), | ||
132 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); | ||
133 | |||
134 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), | ||
135 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | | ||
136 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); | ||
137 | |||
138 | /* data for state_limit */ | ||
139 | data = (g->gr.bundle_cb_default_size * | ||
140 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
141 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
142 | |||
143 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
144 | |||
145 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
146 | g->gr.bundle_cb_token_limit, data); | ||
147 | |||
148 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
149 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
150 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
151 | |||
152 | } | ||
153 | |||
154 | static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | ||
155 | struct channel_gk20a *c, bool patch) | ||
156 | { | ||
157 | struct gr_gk20a *gr = &g->gr; | ||
158 | struct channel_ctx_gk20a *ch_ctx = NULL; | ||
159 | u32 attrib_offset_in_chunk = 0; | ||
160 | u32 alpha_offset_in_chunk = 0; | ||
161 | u32 pd_ab_max_output; | ||
162 | u32 gpc_index, ppc_index; | ||
163 | u32 temp; | ||
164 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
165 | |||
166 | gk20a_dbg_fn(""); | ||
167 | |||
168 | if (patch) { | ||
169 | int err; | ||
170 | ch_ctx = &c->ch_ctx; | ||
171 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
172 | if (err) | ||
173 | return err; | ||
174 | } | ||
175 | |||
176 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
177 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
178 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
179 | patch); | ||
180 | |||
181 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
182 | gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / | ||
183 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
184 | |||
185 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
186 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
187 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
188 | |||
189 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
190 | gr->tpc_count * gr->attrib_cb_size; | ||
191 | |||
192 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
193 | temp = proj_gpc_stride_v() * gpc_index; | ||
194 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
195 | ppc_index++) { | ||
196 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
197 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
198 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
199 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
200 | |||
201 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
202 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | ||
203 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
204 | cbm_cfg_size1, patch); | ||
205 | |||
206 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
207 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | ||
208 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
209 | attrib_offset_in_chunk, patch); | ||
210 | |||
211 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
212 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
213 | |||
214 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
215 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | ||
216 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
217 | cbm_cfg_size2, patch); | ||
218 | |||
219 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
220 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | ||
221 | proj_ppc_in_gpc_stride_v() * ppc_index, | ||
222 | alpha_offset_in_chunk, patch); | ||
223 | |||
224 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
225 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
226 | |||
227 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
228 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + gpc_index), | ||
229 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) | | ||
230 | gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3), | ||
231 | patch); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | if (patch) | ||
236 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
237 | |||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static void gr_gm20b_commit_global_pagepool(struct gk20a *g, | ||
242 | struct channel_ctx_gk20a *ch_ctx, | ||
243 | u64 addr, u32 size, bool patch) | ||
244 | { | ||
245 | gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); | ||
246 | |||
247 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(), | ||
248 | gr_gpcs_swdx_rm_pagepool_total_pages_f(size) | | ||
249 | gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch); | ||
250 | |||
251 | } | ||
252 | |||
253 | static int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, | ||
254 | u32 class_num, u32 offset, u32 data) | ||
255 | { | ||
256 | gk20a_dbg_fn(""); | ||
257 | |||
258 | if (class_num == MAXWELL_COMPUTE_B) { | ||
259 | switch (offset << 2) { | ||
260 | case NVB1C0_SET_SHADER_EXCEPTIONS: | ||
261 | gk20a_gr_set_shader_exceptions(g, data); | ||
262 | break; | ||
263 | default: | ||
264 | goto fail; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | if (class_num == MAXWELL_B) { | ||
269 | switch (offset << 2) { | ||
270 | case NVB197_SET_SHADER_EXCEPTIONS: | ||
271 | gk20a_gr_set_shader_exceptions(g, data); | ||
272 | break; | ||
273 | case NVB197_SET_CIRCULAR_BUFFER_SIZE: | ||
274 | g->ops.gr.set_circular_buffer_size(g, data); | ||
275 | break; | ||
276 | case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
277 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
278 | break; | ||
279 | default: | ||
280 | goto fail; | ||
281 | } | ||
282 | } | ||
283 | return 0; | ||
284 | |||
285 | fail: | ||
286 | return -EINVAL; | ||
287 | } | ||
288 | |||
289 | static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
290 | { | ||
291 | struct gr_gk20a *gr = &g->gr; | ||
292 | u32 gpc_index, ppc_index, stride, val; | ||
293 | u32 pd_ab_max_output; | ||
294 | u32 alpha_cb_size = data * 4; | ||
295 | |||
296 | gk20a_dbg_fn(""); | ||
297 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
298 | return; */ | ||
299 | |||
300 | if (alpha_cb_size > gr->alpha_cb_size) | ||
301 | alpha_cb_size = gr->alpha_cb_size; | ||
302 | |||
303 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
304 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
305 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
306 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
307 | |||
308 | pd_ab_max_output = alpha_cb_size * | ||
309 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / | ||
310 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
311 | |||
312 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
313 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output)); | ||
314 | |||
315 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
316 | stride = proj_gpc_stride_v() * gpc_index; | ||
317 | |||
318 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
319 | ppc_index++) { | ||
320 | |||
321 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
322 | stride + | ||
323 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
324 | |||
325 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | ||
326 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | ||
327 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
328 | |||
329 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | ||
330 | stride + | ||
331 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | |||
336 | void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
337 | { | ||
338 | struct gr_gk20a *gr = &g->gr; | ||
339 | u32 gpc_index, ppc_index, stride, val; | ||
340 | u32 cb_size = data * 4; | ||
341 | |||
342 | gk20a_dbg_fn(""); | ||
343 | |||
344 | if (cb_size > gr->attrib_cb_size) | ||
345 | cb_size = gr->attrib_cb_size; | ||
346 | |||
347 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
348 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
349 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
350 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
351 | |||
352 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
353 | stride = proj_gpc_stride_v() * gpc_index; | ||
354 | |||
355 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
356 | ppc_index++) { | ||
357 | |||
358 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
359 | stride + | ||
360 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
361 | |||
362 | val = set_field(val, | ||
363 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | ||
364 | gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * | ||
365 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
366 | |||
367 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | ||
368 | stride + | ||
369 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
370 | |||
371 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
372 | ppc_index + gpc_index)); | ||
373 | |||
374 | val = set_field(val, | ||
375 | gr_gpcs_swdx_tc_beta_cb_size_v_m(), | ||
376 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * | ||
377 | gr->gpc_ppc_count[gpc_index])); | ||
378 | val = set_field(val, | ||
379 | gr_gpcs_swdx_tc_beta_cb_size_div3_m(), | ||
380 | gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * | ||
381 | gr->gpc_ppc_count[gpc_index])/3)); | ||
382 | |||
383 | gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( | ||
384 | ppc_index + gpc_index), val); | ||
385 | } | ||
386 | } | ||
387 | } | ||
388 | |||
389 | static void gr_gm20b_enable_hww_exceptions(struct gk20a *g) | ||
390 | { | ||
391 | gr_gk20a_enable_hww_exceptions(g); | ||
392 | |||
393 | gk20a_writel(g, gr_ds_hww_esr_2_r(), | ||
394 | gr_ds_hww_esr_2_en_enabled_f() | | ||
395 | gr_ds_hww_esr_2_reset_task_f()); | ||
396 | gk20a_writel(g, gr_ds_hww_report_mask_2_r(), | ||
397 | gr_ds_hww_report_mask_2_sph24_err_report_f()); | ||
398 | } | ||
399 | |||
400 | static void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g) | ||
401 | { | ||
402 | /* setup sm warp esr report masks */ | ||
403 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
404 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
405 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
406 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
407 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
408 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
409 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
410 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
411 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
412 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
413 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
414 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
415 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
416 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
417 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
418 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
419 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
420 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
421 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
422 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() | | ||
423 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() | | ||
424 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
425 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
426 | |||
427 | /* setup sm global esr report mask */ | ||
428 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
429 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
430 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f()); | ||
431 | } | ||
432 | |||
433 | static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | ||
434 | { | ||
435 | bool valid = false; | ||
436 | |||
437 | switch (class_num) { | ||
438 | case MAXWELL_COMPUTE_B: | ||
439 | case MAXWELL_B: | ||
440 | case FERMI_TWOD_A: | ||
441 | case KEPLER_DMA_COPY_A: | ||
442 | case MAXWELL_DMA_COPY_A: | ||
443 | valid = true; | ||
444 | break; | ||
445 | |||
446 | default: | ||
447 | break; | ||
448 | } | ||
449 | |||
450 | return valid; | ||
451 | } | ||
452 | |||
453 | static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, | ||
454 | u32 *num_sm_dsm_perf_regs, | ||
455 | u32 **sm_dsm_perf_regs, | ||
456 | u32 *perf_register_stride) | ||
457 | { | ||
458 | gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, | ||
459 | sm_dsm_perf_regs, | ||
460 | perf_register_stride); | ||
461 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | ||
462 | } | ||
463 | |||
464 | static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
465 | u32 *num_sm_dsm_perf_regs, | ||
466 | u32 **sm_dsm_perf_regs, | ||
467 | u32 *ctrl_register_stride) | ||
468 | { | ||
469 | gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, | ||
470 | sm_dsm_perf_regs, | ||
471 | ctrl_register_stride); | ||
472 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
473 | } | ||
474 | |||
475 | static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | ||
476 | { | ||
477 | struct gr_gk20a *gr = &g->gr; | ||
478 | u32 tpc_index, gpc_index; | ||
479 | u32 tpc_offset, gpc_offset; | ||
480 | u32 sm_id = 0, gpc_id = 0; | ||
481 | u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; | ||
482 | u32 tpc_per_gpc; | ||
483 | |||
484 | gk20a_dbg_fn(""); | ||
485 | |||
486 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | ||
487 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
488 | gpc_offset = proj_gpc_stride_v() * gpc_index; | ||
489 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | ||
490 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | ||
491 | |||
492 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
493 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
494 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, | ||
495 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
496 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
497 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
498 | |||
499 | sm_id_to_gpc_id[sm_id] = gpc_index; | ||
500 | sm_id++; | ||
501 | } | ||
502 | } | ||
503 | } | ||
504 | |||
505 | for (tpc_index = 0, gpc_id = 0; | ||
506 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); | ||
507 | tpc_index++, gpc_id += 8) { | ||
508 | |||
509 | if (gpc_id >= gr->gpc_count) | ||
510 | gpc_id = 0; | ||
511 | |||
512 | tpc_per_gpc = | ||
513 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | | ||
514 | gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) | | ||
515 | gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) | | ||
516 | gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) | | ||
517 | gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) | | ||
518 | gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) | | ||
519 | gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) | | ||
520 | gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]); | ||
521 | |||
522 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
523 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc); | ||
524 | } | ||
525 | |||
526 | /* gr__setup_pd_mapping stubbed for gk20a */ | ||
527 | gr_gk20a_setup_rop_mapping(g, gr); | ||
528 | |||
529 | for (gpc_index = 0; | ||
530 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; | ||
531 | gpc_index += 4) { | ||
532 | |||
533 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), | ||
534 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) || | ||
535 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) || | ||
536 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) || | ||
537 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | ||
538 | } | ||
539 | |||
540 | gk20a_writel(g, gr_cwd_fs_r(), | ||
541 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
542 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
543 | |||
544 | gk20a_writel(g, gr_bes_zrop_settings_r(), | ||
545 | gr_bes_zrop_settings_num_active_ltcs_f(gr->num_fbps)); | ||
546 | gk20a_writel(g, gr_bes_crop_settings_r(), | ||
547 | gr_bes_crop_settings_num_active_ltcs_f(gr->num_fbps)); | ||
548 | |||
549 | gk20a_writel(g, gr_bes_crop_debug3_r(), | ||
550 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | ||
551 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | ||
552 | |||
553 | if (tegra_platform_is_silicon()) { | ||
554 | gk20a_writel(g, gr_fe_tpc_fs_r(), gr->pes_tpc_mask[0][0]); | ||
555 | |||
556 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0) | | ||
557 | gr_cwd_gpc_tpc_id_tpc1_f(1)); | ||
558 | |||
559 | gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0) | | ||
560 | gr_cwd_sm_id_tpc1_f(1)); | ||
561 | } else { | ||
562 | gk20a_writel(g, gr_fe_tpc_fs_r(), 1); | ||
563 | |||
564 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0)); | ||
565 | |||
566 | gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0)); | ||
567 | |||
568 | } | ||
569 | |||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | void gm20b_init_gr(struct gpu_ops *gops) | ||
574 | { | ||
575 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | ||
576 | gops->gr.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults; | ||
577 | gops->gr.cb_size_default = gr_gm20b_cb_size_default; | ||
578 | gops->gr.calc_global_ctx_buffer_size = | ||
579 | gr_gm20b_calc_global_ctx_buffer_size; | ||
580 | gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb; | ||
581 | gops->gr.commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb; | ||
582 | gops->gr.commit_global_cb_manager = gr_gm20b_commit_global_cb_manager; | ||
583 | gops->gr.commit_global_pagepool = gr_gm20b_commit_global_pagepool; | ||
584 | gops->gr.handle_sw_method = gr_gm20b_handle_sw_method; | ||
585 | gops->gr.set_alpha_circular_buffer_size = gr_gm20b_set_alpha_circular_buffer_size; | ||
586 | gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; | ||
587 | gops->gr.enable_hww_exceptions = gr_gm20b_enable_hww_exceptions; | ||
588 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; | ||
589 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; | ||
590 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | ||
591 | gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; | ||
592 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; | ||
593 | } | ||