summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2016-04-15 21:12:34 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-19 18:58:24 -0400
commitce0fe5082ebb8a7e0ca5a8992e17ae4547d4db5e (patch)
treef7301c2993c78af2d69ad768e1aa6c35bede6cfc /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent5ccaaa73af4683eabd4d135b5b08aec4a206b613 (diff)
gpu: nvgpu: hwpm broadcast register support
Add support for hwpm broadcast registers (ltc and lts) In gr_gk20a_find_priv_offset_in_buffer, replace "Unknown address type" error with informational message: gr_gk20a_exec_ctx_ops calls gk20a_get_ctx_buffer_offsets and if that fails, calls gr_gk20a_get_pm_ctx_buffer_offsets; HWPM registers will fail the first call, so an error or warning is overkill. Bug 1648200 Change-Id: I197b82579e9894652add4ff254418f818981415a Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1131365 (cherry picked from commit 9f30a92c5d87f6dadd34cc37396a6b10e3a72751) Reviewed-on: http://git-master/r/1133628 (cherry picked from commit 7eb7cfd998852ba7f7c4c40d3db286f66e83ab3a) Reviewed-on: http://git-master/r/1127749 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c78
1 files changed, 78 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 0659eefd..3b0a399d 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -29,6 +29,7 @@
29#include "hw_fifo_gm20b.h" 29#include "hw_fifo_gm20b.h"
30#include "hw_fb_gm20b.h" 30#include "hw_fb_gm20b.h"
31#include "hw_top_gm20b.h" 31#include "hw_top_gm20b.h"
32#include "hw_ltc_gm20b.h"
32#include "hw_ctxsw_prog_gm20b.h" 33#include "hw_ctxsw_prog_gm20b.h"
33#include "hw_fuse_gm20b.h" 34#include "hw_fuse_gm20b.h"
34#include "pmu_gm20b.h" 35#include "pmu_gm20b.h"
@@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g)
1402 return 0; 1403 return 0;
1403} 1404}
1404 1405
1406static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
1407{
1408 u32 ltc_shared_base = ltc_ltcs_ltss_v();
1409 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1410
1411 return (addr >= ltc_shared_base) &&
1412 (addr < (ltc_shared_base + lts_stride));
1413}
1414
1415static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
1416{
1417 u32 lts_shared_base = ltc_ltc0_ltss_v();
1418 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1419 u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1;
1420 u32 base_offset = lts_shared_base & addr_mask;
1421 u32 end_offset = base_offset + lts_stride;
1422
1423 return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) &&
1424 ((addr & addr_mask) >= base_offset) &&
1425 ((addr & addr_mask) < end_offset);
1426}
1427
1428static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
1429 u32 *priv_addr_table,
1430 u32 *priv_addr_table_index)
1431{
1432 u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
1433 u32 index = *priv_addr_table_index;
1434 u32 lts_num;
1435 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1436 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1437
1438 for (lts_num = 0; lts_num < num_ltc_slices; lts_num++)
1439 priv_addr_table[index++] = ltc_ltc0_lts0_v() +
1440 ltc_num * ltc_stride +
1441 lts_num * lts_stride +
1442 (addr & (lts_stride - 1));
1443
1444 *priv_addr_table_index = index;
1445}
1446
1447static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
1448 u32 *priv_addr_table,
1449 u32 *priv_addr_table_index)
1450{
1451 u32 num_ltc = g->ltc_count;
1452 u32 i, start, ltc_num = 0;
1453 u32 pltcg_base = ltc_pltcg_base_v();
1454 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1455
1456 for (i = 0; i < num_ltc; i++) {
1457 start = pltcg_base + i * ltc_stride;
1458 if ((addr >= start) && (addr < (start + ltc_stride))) {
1459 ltc_num = i;
1460 break;
1461 }
1462 }
1463 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
1464 priv_addr_table_index);
1465}
1466
1467static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
1468 u32 *priv_addr_table,
1469 u32 *priv_addr_table_index)
1470{
1471 u32 num_ltc = g->ltc_count;
1472 u32 ltc_num;
1473
1474 for (ltc_num = 0; ltc_num < num_ltc; ltc_num++)
1475 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num,
1476 priv_addr_table, priv_addr_table_index);
1477}
1478
1405void gm20b_init_gr(struct gpu_ops *gops) 1479void gm20b_init_gr(struct gpu_ops *gops)
1406{ 1480{
1407 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1481 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
1478 gops->gr.fuse_override = gm20b_gr_fuse_override; 1552 gops->gr.fuse_override = gm20b_gr_fuse_override;
1479 gops->gr.load_smid_config = gr_gm20b_load_smid_config; 1553 gops->gr.load_smid_config = gr_gm20b_load_smid_config;
1480 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; 1554 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
1555 gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr;
1556 gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr;
1557 gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr;
1558 gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr;
1481} 1559}