diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2018-08-13 15:58:18 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-16 13:14:40 -0400 |
commit | 974d541623929fa2622d27d5d338a5b63596794b (patch) | |
tree | f47a540bf07efd7f6cda68f49d3675c2462d731a /drivers/gpu/nvgpu/gm20b | |
parent | 1e7f229e5d92078f772d4f81893b23504cd847a8 (diff) |
gpu: nvgpu: Move ltc HAL to common
Move implementation of ltc HAL to common/ltc.
JIRA NVGPU-956
Change-Id: Id78d74e8612d7dacfb8d322d491abecd798e42b5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1798461
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 76 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 489 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.h | 49 |
5 files changed, 6 insertions, 628 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 49b81783..abc39362 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> | 39 | #include <nvgpu/hw/gm20b/hw_gr_gm20b.h> |
40 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> | 40 | #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h> |
41 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | 41 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> |
42 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
43 | #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> | 42 | #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> |
44 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> | 43 | #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h> |
45 | #include <nvgpu/hw/gm20b/hw_perf_gm20b.h> | 44 | #include <nvgpu/hw/gm20b/hw_perf_gm20b.h> |
@@ -1438,81 +1437,6 @@ int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | |||
1438 | return 0; | 1437 | return 0; |
1439 | } | 1438 | } |
1440 | 1439 | ||
1441 | bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) | ||
1442 | { | ||
1443 | u32 ltc_shared_base = ltc_ltcs_ltss_v(); | ||
1444 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1445 | |||
1446 | return (addr >= ltc_shared_base) && | ||
1447 | (addr < (ltc_shared_base + lts_stride)); | ||
1448 | } | ||
1449 | |||
1450 | bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) | ||
1451 | { | ||
1452 | u32 lts_shared_base = ltc_ltc0_ltss_v(); | ||
1453 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1454 | u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; | ||
1455 | u32 base_offset = lts_shared_base & addr_mask; | ||
1456 | u32 end_offset = base_offset + lts_stride; | ||
1457 | |||
1458 | return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && | ||
1459 | ((addr & addr_mask) >= base_offset) && | ||
1460 | ((addr & addr_mask) < end_offset); | ||
1461 | } | ||
1462 | |||
1463 | static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, | ||
1464 | u32 *priv_addr_table, | ||
1465 | u32 *priv_addr_table_index) | ||
1466 | { | ||
1467 | u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); | ||
1468 | u32 index = *priv_addr_table_index; | ||
1469 | u32 lts_num; | ||
1470 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1471 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1472 | |||
1473 | for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) { | ||
1474 | priv_addr_table[index++] = ltc_ltc0_lts0_v() + | ||
1475 | ltc_num * ltc_stride + | ||
1476 | lts_num * lts_stride + | ||
1477 | (addr & (lts_stride - 1)); | ||
1478 | } | ||
1479 | |||
1480 | *priv_addr_table_index = index; | ||
1481 | } | ||
1482 | |||
1483 | void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
1484 | u32 *priv_addr_table, | ||
1485 | u32 *priv_addr_table_index) | ||
1486 | { | ||
1487 | u32 num_ltc = g->ltc_count; | ||
1488 | u32 i, start, ltc_num = 0; | ||
1489 | u32 pltcg_base = ltc_pltcg_base_v(); | ||
1490 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1491 | |||
1492 | for (i = 0; i < num_ltc; i++) { | ||
1493 | start = pltcg_base + i * ltc_stride; | ||
1494 | if ((addr >= start) && (addr < (start + ltc_stride))) { | ||
1495 | ltc_num = i; | ||
1496 | break; | ||
1497 | } | ||
1498 | } | ||
1499 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, | ||
1500 | priv_addr_table_index); | ||
1501 | } | ||
1502 | |||
1503 | void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
1504 | u32 *priv_addr_table, | ||
1505 | u32 *priv_addr_table_index) | ||
1506 | { | ||
1507 | u32 num_ltc = g->ltc_count; | ||
1508 | u32 ltc_num; | ||
1509 | |||
1510 | for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) { | ||
1511 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, | ||
1512 | priv_addr_table, priv_addr_table_index); | ||
1513 | } | ||
1514 | } | ||
1515 | |||
1516 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | 1440 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
1517 | u32 global_esr) | 1441 | u32 global_esr) |
1518 | { | 1442 | { |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 5c82fd65..9d8e5cdf 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h | |||
@@ -124,14 +124,6 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, | |||
124 | struct channel_gk20a *ch, u32 sm_id); | 124 | struct channel_gk20a *ch, u32 sm_id); |
125 | int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, | 125 | int gr_gm20b_get_preemption_mode_flags(struct gk20a *g, |
126 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 126 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
127 | bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr); | ||
128 | bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr); | ||
129 | void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
130 | u32 *priv_addr_table, | ||
131 | u32 *priv_addr_table_index); | ||
132 | void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
133 | u32 *priv_addr_table, | ||
134 | u32 *priv_addr_table_index); | ||
135 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | 127 | void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
136 | u32 global_esr); | 128 | u32 global_esr); |
137 | u32 gr_gm20b_get_pmm_per_chiplet_offset(void); | 129 | u32 gr_gm20b_get_pmm_per_chiplet_offset(void); |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 05acc0cf..3b164f9c 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "common/fb/fb_gm20b.h" | 31 | #include "common/fb/fb_gm20b.h" |
32 | #include "common/therm/therm_gm20b.h" | 32 | #include "common/therm/therm_gm20b.h" |
33 | #include "common/therm/therm_gm20b.h" | 33 | #include "common/therm/therm_gm20b.h" |
34 | #include "common/ltc/ltc_gm20b.h" | ||
34 | 35 | ||
35 | #include "gk20a/gk20a.h" | 36 | #include "gk20a/gk20a.h" |
36 | #include "gk20a/ce2_gk20a.h" | 37 | #include "gk20a/ce2_gk20a.h" |
@@ -45,9 +46,7 @@ | |||
45 | #include "gk20a/gr_gk20a.h" | 46 | #include "gk20a/gr_gk20a.h" |
46 | #include "gk20a/tsg_gk20a.h" | 47 | #include "gk20a/tsg_gk20a.h" |
47 | 48 | ||
48 | #include "ltc_gm20b.h" | ||
49 | #include "gr_gm20b.h" | 49 | #include "gr_gm20b.h" |
50 | #include "ltc_gm20b.h" | ||
51 | #include "fifo_gm20b.h" | 50 | #include "fifo_gm20b.h" |
52 | #include "gr_ctx_gm20b.h" | 51 | #include "gr_ctx_gm20b.h" |
53 | #include "mm_gm20b.h" | 52 | #include "mm_gm20b.h" |
@@ -200,6 +199,11 @@ static const struct gpu_ops gm20b_ops = { | |||
200 | .cbc_fix_config = gm20b_ltc_cbc_fix_config, | 199 | .cbc_fix_config = gm20b_ltc_cbc_fix_config, |
201 | .flush = gm20b_flush_ltc, | 200 | .flush = gm20b_flush_ltc, |
202 | .set_enabled = gm20b_ltc_set_enabled, | 201 | .set_enabled = gm20b_ltc_set_enabled, |
202 | .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, | ||
203 | .is_ltcs_ltss_addr = gm20b_ltc_is_ltcs_ltss_addr, | ||
204 | .is_ltcn_ltss_addr = gm20b_ltc_is_ltcn_ltss_addr, | ||
205 | .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, | ||
206 | .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, | ||
203 | }, | 207 | }, |
204 | .ce2 = { | 208 | .ce2 = { |
205 | .isr_stall = gk20a_ce2_isr, | 209 | .isr_stall = gk20a_ce2_isr, |
@@ -281,10 +285,6 @@ static const struct gpu_ops gm20b_ops = { | |||
281 | .init_sm_id_table = gr_gk20a_init_sm_id_table, | 285 | .init_sm_id_table = gr_gk20a_init_sm_id_table, |
282 | .load_smid_config = gr_gm20b_load_smid_config, | 286 | .load_smid_config = gr_gm20b_load_smid_config, |
283 | .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, | 287 | .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering, |
284 | .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr, | ||
285 | .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr, | ||
286 | .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr, | ||
287 | .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr, | ||
288 | .setup_rop_mapping = gr_gk20a_setup_rop_mapping, | 288 | .setup_rop_mapping = gr_gk20a_setup_rop_mapping, |
289 | .program_zcull_mapping = gr_gk20a_program_zcull_mapping, | 289 | .program_zcull_mapping = gr_gk20a_program_zcull_mapping, |
290 | .commit_global_timeslice = gr_gk20a_commit_global_timeslice, | 290 | .commit_global_timeslice = gr_gk20a_commit_global_timeslice, |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c deleted file mode 100644 index 65945fad..00000000 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ /dev/null | |||
@@ -1,489 +0,0 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2018 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/ltc.h> | ||
31 | #include <nvgpu/io.h> | ||
32 | #include <nvgpu/utils.h> | ||
33 | |||
34 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> | ||
35 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
36 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
37 | #include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h> | ||
38 | |||
39 | #include "gk20a/gk20a.h" | ||
40 | |||
41 | #include "ltc_gm20b.h" | ||
42 | |||
43 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
44 | { | ||
45 | /* max memory size (MB) to cover */ | ||
46 | u32 max_size = gr->max_comptag_mem; | ||
47 | /* one tag line covers 128KB */ | ||
48 | u32 max_comptag_lines = max_size << 3U; | ||
49 | |||
50 | u32 hw_max_comptag_lines = | ||
51 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
52 | |||
53 | u32 cbc_param = | ||
54 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
55 | u32 comptags_per_cacheline = | ||
56 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
57 | |||
58 | u32 compbit_backing_size; | ||
59 | |||
60 | int err; | ||
61 | |||
62 | nvgpu_log_fn(g, " "); | ||
63 | |||
64 | if (max_comptag_lines == 0U) | ||
65 | return 0; | ||
66 | |||
67 | if (max_comptag_lines > hw_max_comptag_lines) | ||
68 | max_comptag_lines = hw_max_comptag_lines; | ||
69 | |||
70 | compbit_backing_size = | ||
71 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
72 | gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; | ||
73 | |||
74 | /* aligned to 2KB * ltc_count */ | ||
75 | compbit_backing_size += | ||
76 | g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
77 | |||
78 | /* must be a multiple of 64KB */ | ||
79 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
80 | |||
81 | max_comptag_lines = | ||
82 | (compbit_backing_size * comptags_per_cacheline) / | ||
83 | (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); | ||
84 | |||
85 | if (max_comptag_lines > hw_max_comptag_lines) | ||
86 | max_comptag_lines = hw_max_comptag_lines; | ||
87 | |||
88 | nvgpu_log_info(g, "compbit backing store size : %d", | ||
89 | compbit_backing_size); | ||
90 | nvgpu_log_info(g, "max comptag lines : %d", | ||
91 | max_comptag_lines); | ||
92 | |||
93 | err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size); | ||
94 | if (err) | ||
95 | return err; | ||
96 | |||
97 | err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); | ||
98 | if (err) | ||
99 | return err; | ||
100 | |||
101 | gr->max_comptag_lines = max_comptag_lines; | ||
102 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
103 | |||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
108 | u32 min, u32 max) | ||
109 | { | ||
110 | struct gr_gk20a *gr = &g->gr; | ||
111 | struct nvgpu_timeout timeout; | ||
112 | int err = 0; | ||
113 | u32 ltc, slice, ctrl1, val, hw_op = 0U; | ||
114 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | ||
115 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
116 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
117 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
118 | const u32 max_lines = 16384U; | ||
119 | |||
120 | nvgpu_log_fn(g, " "); | ||
121 | |||
122 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
123 | |||
124 | if (gr->compbit_store.mem.size == 0) | ||
125 | return 0; | ||
126 | |||
127 | while (1) { | ||
128 | const u32 iter_max = min(min + max_lines - 1, max); | ||
129 | bool full_cache_op = true; | ||
130 | |||
131 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
132 | |||
133 | nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); | ||
134 | |||
135 | if (op == gk20a_cbc_op_clear) { | ||
136 | gk20a_writel( | ||
137 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
138 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( | ||
139 | min)); | ||
140 | gk20a_writel( | ||
141 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
142 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( | ||
143 | iter_max)); | ||
144 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
145 | full_cache_op = false; | ||
146 | } else if (op == gk20a_cbc_op_clean) { | ||
147 | /* this is full-cache op */ | ||
148 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
149 | } else if (op == gk20a_cbc_op_invalidate) { | ||
150 | /* this is full-cache op */ | ||
151 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
152 | } else { | ||
153 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); | ||
154 | err = -EINVAL; | ||
155 | goto out; | ||
156 | } | ||
157 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
158 | gk20a_readl(g, | ||
159 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
160 | |||
161 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
162 | for (slice = 0; slice < slices_per_ltc; slice++) { | ||
163 | |||
164 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
165 | ltc * ltc_stride + slice * lts_stride; | ||
166 | |||
167 | nvgpu_timeout_init(g, &timeout, 2000, | ||
168 | NVGPU_TIMER_RETRY_TIMER); | ||
169 | do { | ||
170 | val = gk20a_readl(g, ctrl1); | ||
171 | if (!(val & hw_op)) | ||
172 | break; | ||
173 | nvgpu_udelay(5); | ||
174 | } while (!nvgpu_timeout_expired(&timeout)); | ||
175 | |||
176 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
177 | nvgpu_err(g, "comp tag clear timeout"); | ||
178 | err = -EBUSY; | ||
179 | goto out; | ||
180 | } | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* are we done? */ | ||
185 | if (full_cache_op || iter_max == max) | ||
186 | break; | ||
187 | |||
188 | /* note: iter_max is inclusive upper bound */ | ||
189 | min = iter_max + 1; | ||
190 | |||
191 | /* give a chance for higher-priority threads to progress */ | ||
192 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
193 | } | ||
194 | out: | ||
195 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
196 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
197 | return err; | ||
198 | } | ||
199 | |||
200 | void gm20b_ltc_init_fs_state(struct gk20a *g) | ||
201 | { | ||
202 | struct gr_gk20a *gr = &g->gr; | ||
203 | u32 reg; | ||
204 | |||
205 | nvgpu_log_info(g, "initialize gm20b l2"); | ||
206 | |||
207 | g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); | ||
208 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | ||
209 | nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); | ||
210 | |||
211 | reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
212 | gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; | ||
213 | gr->cacheline_size = | ||
214 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); | ||
215 | |||
216 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), | ||
217 | g->ltc_count); | ||
218 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), | ||
219 | g->ltc_count); | ||
220 | |||
221 | gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), | ||
222 | gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | | ||
223 | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); | ||
224 | |||
225 | /* Disable LTC interrupts */ | ||
226 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
227 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | ||
228 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); | ||
229 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
230 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | ||
231 | } | ||
232 | |||
233 | void gm20b_ltc_isr(struct gk20a *g) | ||
234 | { | ||
235 | u32 mc_intr, ltc_intr; | ||
236 | unsigned int ltc, slice; | ||
237 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
238 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
239 | |||
240 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | ||
241 | nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); | ||
242 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
243 | if ((mc_intr & 1U << ltc) == 0) | ||
244 | continue; | ||
245 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | ||
246 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + | ||
247 | ltc_stride * ltc + | ||
248 | lts_stride * slice); | ||
249 | nvgpu_err(g, "ltc%d, slice %d: %08x", | ||
250 | ltc, slice, ltc_intr); | ||
251 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + | ||
252 | ltc_stride * ltc + | ||
253 | lts_stride * slice, | ||
254 | ltc_intr); | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | |||
259 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | ||
260 | { | ||
261 | u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); | ||
262 | if (val == 2U) { | ||
263 | return base * 2; | ||
264 | } else if (val != 1) { | ||
265 | nvgpu_err(g, "Invalid number of active ltcs: %08x", val); | ||
266 | } | ||
267 | |||
268 | return base; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Performs a full flush of the L2 cache. | ||
273 | */ | ||
274 | void gm20b_flush_ltc(struct gk20a *g) | ||
275 | { | ||
276 | struct nvgpu_timeout timeout; | ||
277 | unsigned int ltc; | ||
278 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
279 | |||
280 | /* Clean... */ | ||
281 | nvgpu_writel_check(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), | ||
282 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | | ||
283 | ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() | | ||
284 | ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() | | ||
285 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() | | ||
286 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() | | ||
287 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); | ||
288 | |||
289 | /* Wait on each LTC individually. */ | ||
290 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
291 | u32 op_pending; | ||
292 | |||
293 | /* | ||
294 | * Use 5ms - this should be sufficient time to flush the cache. | ||
295 | * On tegra, rough EMC BW available for old tegra chips (newer | ||
296 | * chips are strictly faster) can be estimated as follows: | ||
297 | * | ||
298 | * Lowest reasonable EMC clock speed will be around 102MHz on | ||
299 | * t124 for display enabled boards and generally fixed to max | ||
300 | * for non-display boards (since they are generally plugged in). | ||
301 | * | ||
302 | * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that | ||
303 | * BW the GPU will likely get about half (display and overhead/ | ||
304 | * utilization inefficiency eating the rest) so 650MB/s at | ||
305 | * worst. Assuming at most 1MB of GPU L2 cache (less for most | ||
306 | * chips) worst case is we take 1MB/650MB/s = 1.5ms. | ||
307 | * | ||
308 | * So 5ms timeout here should be more than sufficient. | ||
309 | */ | ||
310 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
311 | |||
312 | do { | ||
313 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | ||
314 | ltc * ltc_stride; | ||
315 | op_pending = gk20a_readl(g, cmgmt1); | ||
316 | } while ((op_pending & | ||
317 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && | ||
318 | !nvgpu_timeout_expired_msg(&timeout, | ||
319 | "L2 flush timeout!")); | ||
320 | } | ||
321 | |||
322 | /* And invalidate. */ | ||
323 | nvgpu_writel_check(g, ltc_ltcs_ltss_tstg_cmgmt0_r(), | ||
324 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() | | ||
325 | ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() | | ||
326 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() | | ||
327 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() | | ||
328 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); | ||
329 | |||
330 | /* Wait on each LTC individually. */ | ||
331 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
332 | u32 op_pending; | ||
333 | |||
334 | /* Again, 5ms. */ | ||
335 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
336 | |||
337 | do { | ||
338 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | ||
339 | ltc * ltc_stride; | ||
340 | op_pending = gk20a_readl(g, cmgmt0); | ||
341 | } while ((op_pending & | ||
342 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && | ||
343 | !nvgpu_timeout_expired_msg(&timeout, | ||
344 | "L2 flush timeout!")); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | int gm20b_determine_L2_size_bytes(struct gk20a *g) | ||
349 | { | ||
350 | u32 lts_per_ltc; | ||
351 | u32 ways; | ||
352 | u32 sets; | ||
353 | u32 bytes_per_line; | ||
354 | u32 active_ltcs; | ||
355 | u32 cache_size; | ||
356 | |||
357 | u32 tmp; | ||
358 | u32 active_sets_value; | ||
359 | |||
360 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
361 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
362 | |||
363 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
364 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
365 | sets = 64U; | ||
366 | } else if (active_sets_value == | ||
367 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
368 | sets = 32U; | ||
369 | } else if (active_sets_value == | ||
370 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
371 | sets = 16U; | ||
372 | } else { | ||
373 | nvgpu_err(g, "Unknown constant %u for active sets", | ||
374 | (unsigned)active_sets_value); | ||
375 | sets = 0U; | ||
376 | } | ||
377 | |||
378 | active_ltcs = g->gr.num_fbps; | ||
379 | |||
380 | /* chip-specific values */ | ||
381 | lts_per_ltc = 2U; | ||
382 | bytes_per_line = 128U; | ||
383 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
384 | |||
385 | return cache_size; | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Sets the ZBC color for the passed index. | ||
390 | */ | ||
391 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
392 | struct zbc_entry *color_val, | ||
393 | u32 index) | ||
394 | { | ||
395 | u32 i; | ||
396 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
397 | |||
398 | nvgpu_writel_check(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
399 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
400 | |||
401 | for (i = 0; | ||
402 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
403 | nvgpu_writel_check(g, | ||
404 | ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
405 | color_val->color_l2[i]); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Sets the ZBC depth for the passed index. | ||
411 | */ | ||
412 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
413 | struct zbc_entry *depth_val, | ||
414 | u32 index) | ||
415 | { | ||
416 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
417 | |||
418 | nvgpu_writel_check(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
419 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
420 | |||
421 | nvgpu_writel_check(g, | ||
422 | ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
423 | depth_val->depth); | ||
424 | } | ||
425 | |||
426 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
427 | { | ||
428 | u32 max_size = gr->max_comptag_mem; | ||
429 | u32 max_comptag_lines = max_size << 3U; | ||
430 | |||
431 | u32 compbit_base_post_divide; | ||
432 | u64 compbit_base_post_multiply64; | ||
433 | u64 compbit_store_iova; | ||
434 | u64 compbit_base_post_divide64; | ||
435 | |||
436 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
437 | compbit_store_iova = nvgpu_mem_get_phys_addr(g, | ||
438 | &gr->compbit_store.mem); | ||
439 | else | ||
440 | compbit_store_iova = nvgpu_mem_get_addr(g, | ||
441 | &gr->compbit_store.mem); | ||
442 | |||
443 | compbit_base_post_divide64 = compbit_store_iova >> | ||
444 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
445 | |||
446 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
447 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
448 | |||
449 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
450 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
451 | |||
452 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
453 | compbit_base_post_divide++; | ||
454 | |||
455 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
456 | if (g->ops.ltc.cbc_fix_config) | ||
457 | compbit_base_post_divide = | ||
458 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
459 | |||
460 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
461 | compbit_base_post_divide); | ||
462 | |||
463 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
464 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
465 | (u32)(compbit_store_iova >> 32), | ||
466 | (u32)(compbit_store_iova & 0xffffffff), | ||
467 | compbit_base_post_divide); | ||
468 | |||
469 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
470 | |||
471 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
472 | 0, max_comptag_lines - 1); | ||
473 | |||
474 | } | ||
475 | |||
476 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) | ||
477 | { | ||
478 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
479 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
480 | |||
481 | if (enabled) | ||
482 | /* bypass disabled (normal caching ops)*/ | ||
483 | reg &= ~reg_f; | ||
484 | else | ||
485 | /* bypass enabled (no caching) */ | ||
486 | reg |= reg_f; | ||
487 | |||
488 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
489 | } | ||
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h deleted file mode 100644 index 0f9145be..00000000 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_LTC | ||
26 | #define _NVHOST_GM20B_LTC | ||
27 | struct gpu_ops; | ||
28 | |||
29 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); | ||
30 | int gm20b_determine_L2_size_bytes(struct gk20a *g); | ||
31 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
32 | struct zbc_entry *color_val, | ||
33 | u32 index); | ||
34 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
35 | struct zbc_entry *depth_val, | ||
36 | u32 index); | ||
37 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); | ||
38 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled); | ||
39 | void gm20b_ltc_init_fs_state(struct gk20a *g); | ||
40 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
41 | u32 min, u32 max); | ||
42 | void gm20b_ltc_isr(struct gk20a *g); | ||
43 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); | ||
44 | void gm20b_flush_ltc(struct gk20a *g); | ||
45 | int gm20b_ltc_alloc_phys_cbc(struct gk20a *g, | ||
46 | size_t compbit_backing_size); | ||
47 | int gm20b_ltc_alloc_virt_cbc(struct gk20a *g, | ||
48 | size_t compbit_backing_size); | ||
49 | #endif | ||