diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2016-04-15 21:12:34 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-19 18:58:24 -0400 |
commit | ce0fe5082ebb8a7e0ca5a8992e17ae4547d4db5e (patch) | |
tree | f7301c2993c78af2d69ad768e1aa6c35bede6cfc /drivers/gpu/nvgpu/gm20b | |
parent | 5ccaaa73af4683eabd4d135b5b08aec4a206b613 (diff) |
gpu: nvgpu: hwpm broadcast register support
Add support for hwpm broadcast registers (ltc and lts)
In gr_gk20a_find_priv_offset_in_buffer, replace "Unknown address type" error
with informational message: gr_gk20a_exec_ctx_ops calls
gk20a_get_ctx_buffer_offsets and if that fails,
calls gr_gk20a_get_pm_ctx_buffer_offsets; HWPM registers will fail the first
call, so an error or warning is overkill.
Bug 1648200
Change-Id: I197b82579e9894652add4ff254418f818981415a
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1131365
(cherry picked from commit 9f30a92c5d87f6dadd34cc37396a6b10e3a72751)
Reviewed-on: http://git-master/r/1133628
(cherry picked from commit 7eb7cfd998852ba7f7c4c40d3db286f66e83ab3a)
Reviewed-on: http://git-master/r/1127749
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 |
2 files changed, 98 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 0659eefd..3b0a399d 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "hw_fifo_gm20b.h" | 29 | #include "hw_fifo_gm20b.h" |
30 | #include "hw_fb_gm20b.h" | 30 | #include "hw_fb_gm20b.h" |
31 | #include "hw_top_gm20b.h" | 31 | #include "hw_top_gm20b.h" |
32 | #include "hw_ltc_gm20b.h" | ||
32 | #include "hw_ctxsw_prog_gm20b.h" | 33 | #include "hw_ctxsw_prog_gm20b.h" |
33 | #include "hw_fuse_gm20b.h" | 34 | #include "hw_fuse_gm20b.h" |
34 | #include "pmu_gm20b.h" | 35 | #include "pmu_gm20b.h" |
@@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g) | |||
1402 | return 0; | 1403 | return 0; |
1403 | } | 1404 | } |
1404 | 1405 | ||
1406 | static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) | ||
1407 | { | ||
1408 | u32 ltc_shared_base = ltc_ltcs_ltss_v(); | ||
1409 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1410 | |||
1411 | return (addr >= ltc_shared_base) && | ||
1412 | (addr < (ltc_shared_base + lts_stride)); | ||
1413 | } | ||
1414 | |||
1415 | static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) | ||
1416 | { | ||
1417 | u32 lts_shared_base = ltc_ltc0_ltss_v(); | ||
1418 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1419 | u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; | ||
1420 | u32 base_offset = lts_shared_base & addr_mask; | ||
1421 | u32 end_offset = base_offset + lts_stride; | ||
1422 | |||
1423 | return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && | ||
1424 | ((addr & addr_mask) >= base_offset) && | ||
1425 | ((addr & addr_mask) < end_offset); | ||
1426 | } | ||
1427 | |||
1428 | static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, | ||
1429 | u32 *priv_addr_table, | ||
1430 | u32 *priv_addr_table_index) | ||
1431 | { | ||
1432 | u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); | ||
1433 | u32 index = *priv_addr_table_index; | ||
1434 | u32 lts_num; | ||
1435 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1436 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1437 | |||
1438 | for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) | ||
1439 | priv_addr_table[index++] = ltc_ltc0_lts0_v() + | ||
1440 | ltc_num * ltc_stride + | ||
1441 | lts_num * lts_stride + | ||
1442 | (addr & (lts_stride - 1)); | ||
1443 | |||
1444 | *priv_addr_table_index = index; | ||
1445 | } | ||
1446 | |||
1447 | static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
1448 | u32 *priv_addr_table, | ||
1449 | u32 *priv_addr_table_index) | ||
1450 | { | ||
1451 | u32 num_ltc = g->ltc_count; | ||
1452 | u32 i, start, ltc_num = 0; | ||
1453 | u32 pltcg_base = ltc_pltcg_base_v(); | ||
1454 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1455 | |||
1456 | for (i = 0; i < num_ltc; i++) { | ||
1457 | start = pltcg_base + i * ltc_stride; | ||
1458 | if ((addr >= start) && (addr < (start + ltc_stride))) { | ||
1459 | ltc_num = i; | ||
1460 | break; | ||
1461 | } | ||
1462 | } | ||
1463 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, | ||
1464 | priv_addr_table_index); | ||
1465 | } | ||
1466 | |||
1467 | static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
1468 | u32 *priv_addr_table, | ||
1469 | u32 *priv_addr_table_index) | ||
1470 | { | ||
1471 | u32 num_ltc = g->ltc_count; | ||
1472 | u32 ltc_num; | ||
1473 | |||
1474 | for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) | ||
1475 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, | ||
1476 | priv_addr_table, priv_addr_table_index); | ||
1477 | } | ||
1478 | |||
1405 | void gm20b_init_gr(struct gpu_ops *gops) | 1479 | void gm20b_init_gr(struct gpu_ops *gops) |
1406 | { | 1480 | { |
1407 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1481 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1478 | gops->gr.fuse_override = gm20b_gr_fuse_override; | 1552 | gops->gr.fuse_override = gm20b_gr_fuse_override; |
1479 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; | 1553 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; |
1480 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; | 1554 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; |
1555 | gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr; | ||
1556 | gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; | ||
1557 | gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; | ||
1558 | gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; | ||
1481 | } | 1559 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index aa01e945..8c00520c 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | |||
@@ -50,6 +50,26 @@ | |||
50 | #ifndef _hw_ltc_gm20b_h_ | 50 | #ifndef _hw_ltc_gm20b_h_ |
51 | #define _hw_ltc_gm20b_h_ | 51 | #define _hw_ltc_gm20b_h_ |
52 | 52 | ||
53 | static inline u32 ltc_pltcg_base_v(void) | ||
54 | { | ||
55 | return 0x00140000; | ||
56 | } | ||
57 | static inline u32 ltc_pltcg_extent_v(void) | ||
58 | { | ||
59 | return 0x0017ffff; | ||
60 | } | ||
61 | static inline u32 ltc_ltc0_ltss_v(void) | ||
62 | { | ||
63 | return 0x00140200; | ||
64 | } | ||
65 | static inline u32 ltc_ltc0_lts0_v(void) | ||
66 | { | ||
67 | return 0x00140400; | ||
68 | } | ||
69 | static inline u32 ltc_ltcs_ltss_v(void) | ||
70 | { | ||
71 | return 0x0017e200; | ||
72 | } | ||
53 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) | 73 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) |
54 | { | 74 | { |
55 | return 0x0014046c; | 75 | return 0x0014046c; |