diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_common.c | 59 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 41 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 43 |
3 files changed, 83 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index 9d534b7b..e0ab3f9b 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c | |||
@@ -24,65 +24,6 @@ | |||
24 | #include "gk20a.h" | 24 | #include "gk20a.h" |
25 | #include "gr_gk20a.h" | 25 | #include "gr_gk20a.h" |
26 | 26 | ||
27 | static int gk20a_determine_L2_size_bytes(struct gk20a *g) | ||
28 | { | ||
29 | const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch, | ||
30 | g->gpu_characteristics.impl); | ||
31 | u32 lts_per_ltc; | ||
32 | u32 ways; | ||
33 | u32 sets; | ||
34 | u32 bytes_per_line; | ||
35 | u32 active_ltcs; | ||
36 | u32 cache_size; | ||
37 | |||
38 | u32 tmp; | ||
39 | u32 active_sets_value; | ||
40 | |||
41 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
42 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
43 | |||
44 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
45 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
46 | sets = 64; | ||
47 | } else if (active_sets_value == | ||
48 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
49 | sets = 32; | ||
50 | } else if (active_sets_value == | ||
51 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
52 | sets = 16; | ||
53 | } else { | ||
54 | dev_err(dev_from_gk20a(g), | ||
55 | "Unknown constant %u for active sets", | ||
56 | (unsigned)active_sets_value); | ||
57 | sets = 0; | ||
58 | } | ||
59 | |||
60 | active_ltcs = g->gr.num_fbps; | ||
61 | |||
62 | /* chip-specific values */ | ||
63 | switch (gpuid) { | ||
64 | case GK20A_GPUID_GK20A: | ||
65 | lts_per_ltc = 1; | ||
66 | bytes_per_line = 128; | ||
67 | break; | ||
68 | |||
69 | case GK20A_GPUID_GM20B: | ||
70 | lts_per_ltc = 2; | ||
71 | bytes_per_line = 128; | ||
72 | break; | ||
73 | |||
74 | default: | ||
75 | dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n", | ||
76 | (unsigned)gpuid); | ||
77 | lts_per_ltc = 0; | ||
78 | bytes_per_line = 0; | ||
79 | } | ||
80 | |||
81 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
82 | |||
83 | return cache_size; | ||
84 | } | ||
85 | |||
86 | /* | 27 | /* |
87 | * Set the maximum number of ways that can have the "EVIST_LAST" class. | 28 | * Set the maximum number of ways that can have the "EVIST_LAST" class. |
88 | */ | 29 | */ |
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 2794b3db..aa094dc7 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -224,6 +224,47 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | |||
224 | 224 | ||
225 | } | 225 | } |
226 | 226 | ||
227 | static int gk20a_determine_L2_size_bytes(struct gk20a *g) | ||
228 | { | ||
229 | u32 lts_per_ltc; | ||
230 | u32 ways; | ||
231 | u32 sets; | ||
232 | u32 bytes_per_line; | ||
233 | u32 active_ltcs; | ||
234 | u32 cache_size; | ||
235 | |||
236 | u32 tmp; | ||
237 | u32 active_sets_value; | ||
238 | |||
239 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
240 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
241 | |||
242 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
243 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
244 | sets = 64; | ||
245 | } else if (active_sets_value == | ||
246 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
247 | sets = 32; | ||
248 | } else if (active_sets_value == | ||
249 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
250 | sets = 16; | ||
251 | } else { | ||
252 | dev_err(dev_from_gk20a(g), | ||
253 | "Unknown constant %u for active sets", | ||
254 | (unsigned)active_sets_value); | ||
255 | sets = 0; | ||
256 | } | ||
257 | |||
258 | active_ltcs = g->gr.num_fbps; | ||
259 | |||
260 | /* chip-specific values */ | ||
261 | lts_per_ltc = 1; | ||
262 | bytes_per_line = 128; | ||
263 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
264 | |||
265 | return cache_size; | ||
266 | } | ||
267 | |||
227 | void gk20a_init_ltc(struct gpu_ops *gops) | 268 | void gk20a_init_ltc(struct gpu_ops *gops) |
228 | { | 269 | { |
229 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; | 270 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 96e5dbde..a089b59c 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -347,10 +347,51 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
347 | ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_f()); | 347 | ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_f()); |
348 | } | 348 | } |
349 | 349 | ||
350 | static int gm20b_determine_L2_size_bytes(struct gk20a *g) | ||
351 | { | ||
352 | u32 lts_per_ltc; | ||
353 | u32 ways; | ||
354 | u32 sets; | ||
355 | u32 bytes_per_line; | ||
356 | u32 active_ltcs; | ||
357 | u32 cache_size; | ||
358 | |||
359 | u32 tmp; | ||
360 | u32 active_sets_value; | ||
361 | |||
362 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
363 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
364 | |||
365 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
366 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
367 | sets = 64; | ||
368 | } else if (active_sets_value == | ||
369 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
370 | sets = 32; | ||
371 | } else if (active_sets_value == | ||
372 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
373 | sets = 16; | ||
374 | } else { | ||
375 | dev_err(dev_from_gk20a(g), | ||
376 | "Unknown constant %u for active sets", | ||
377 | (unsigned)active_sets_value); | ||
378 | sets = 0; | ||
379 | } | ||
380 | |||
381 | active_ltcs = g->gr.num_fbps; | ||
382 | |||
383 | /* chip-specific values */ | ||
384 | lts_per_ltc = 2; | ||
385 | bytes_per_line = 128; | ||
386 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
387 | |||
388 | return cache_size; | ||
389 | } | ||
390 | |||
350 | void gm20b_init_ltc(struct gpu_ops *gops) | 391 | void gm20b_init_ltc(struct gpu_ops *gops) |
351 | { | 392 | { |
352 | /* Gk20a reused ops. */ | 393 | /* Gk20a reused ops. */ |
353 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; | 394 | gops->ltc.determine_L2_size_bytes = gm20b_determine_L2_size_bytes; |
354 | gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last; | 395 | gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last; |
355 | gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; | 396 | gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; |
356 | gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; | 397 | gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; |