From 78151bb6f9cf9f355c57a28df0c7e4cd867c3322 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 6 Apr 2018 18:38:18 +0530 Subject: gpu: nvgpu: use HAL for chiplet offset We currently use hard coded values of NV_PERF_PMMGPC_CHIPLET_OFFSET and NV_PMM_FBP_STRIDE which are incorrect for Volta Add new GR HAL get_pmm_per_chiplet_offset() to get correct value per-chip Set gr_gm20b_get_pmm_per_chiplet_offset() for older chips Set gr_gv11b_get_pmm_per_chiplet_offset() for Volta Use HAL instead of hard coded values wherever required Bug 200398811 Jira NVGPU-556 Change-Id: I947e7febd4f84fae740a1bc74f99d72e1df523aa Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1690028 Reviewed-by: svc-mobile-coverity Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 ++++---- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 8 +++++++- drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 3 ++- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 ++ drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++ drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 ++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 15 ++++++++++----- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 ++ drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_perf_gm20b.h | 10 +++++++++- drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_perf_gp106.h | 10 +++++++++- drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_perf_gp10b.h | 10 +++++++++- drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 2 ++ drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 2 ++ 16 files changed, 66 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index edc1c5ff..c2a8d6ce 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -470,6 +470,7 @@ struct gpu_ops { u32 addr, u32 *priv_addr_table, u32 *num_registers); + u32 (*get_pmm_per_chiplet_offset)(void); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 04d00e55..e4344b9f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -63,9 +63,7 @@ #include #define BLK_SIZE (256) -#define NV_PMM_FBP_STRIDE 0x1000 #define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200 -#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000 #define NV_PERF_PMMGPCROUTER_STRIDE 0x0200 #define NV_PCFG_BASE 0x00088000 #define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020 @@ -7563,7 +7561,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, count, offset, max_cnt, base, ~0)) return -EINVAL; - base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); + base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num); if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_gpc, count, offset, max_cnt, base, ~0)) @@ -7703,7 +7701,9 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) &g->gr.ctx_vars.ctxsw_regs.fbp, &count, &offset, hwpm_ctxsw_reg_count_max, 0, - g->gr.num_fbps, NV_PMM_FBP_STRIDE, ~0)) + g->gr.num_fbps, + g->ops.gr.get_pmm_per_chiplet_offset(), + ~0)) goto cleanup; /* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */ diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 08ac20e4..262957c5 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B GPC MMU * - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +41,7 @@ #include #include #include +#include void gr_gm20b_init_gpc_mmu(struct gk20a *g) { @@ -1541,3 +1542,8 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g) gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); } + +u32 gr_gm20b_get_pmm_per_chiplet_offset(void) +{ + return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1); +} diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 3f604028..aabf6d95 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -1,7 +1,7 @@ /* * GM20B GPC MMU * - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -135,4 +135,5 @@ void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table_index); void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_esr); +u32 gr_gm20b_get_pmm_per_chiplet_offset(void); #endif diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index ac1c7123..bbe2b100 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -322,6 +322,8 @@ static const struct gpu_ops gm20b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gm20b_get_pmm_per_chiplet_offset, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index f2ce4050..91bc614c 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -385,6 +385,8 @@ static const struct gpu_ops gp106_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gm20b_get_pmm_per_chiplet_offset, }, .fb = { .reset = gp106_fb_reset, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index a31418f7..5c10d79d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -353,6 +353,8 @@ static const struct gpu_ops gp10b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gm20b_get_pmm_per_chiplet_offset, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index c7c648a7..fe235f89 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -432,6 +432,8 @@ static const struct gpu_ops gv100_ops = { .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gv11b_get_pmm_per_chiplet_offset, }, .fb = { .reset = gv100_fb_reset, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 67603739..f8461f9d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -4513,10 +4513,16 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, return 0; } -static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset) +u32 gr_gv11b_get_pmm_per_chiplet_offset(void) +{ + return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1); +} + +static u32 gr_gv11b_pri_pmmgpc_addr(struct gk20a *g, u32 gpc_num, + u32 domain_idx, u32 offset) { return perf_pmmgpc_base_v() + - (gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) + + (gpc_num * g->ops.gr.get_pmm_per_chiplet_offset()) + (domain_idx * perf_pmmgpc_perdomain_offset_v()) + offset; } @@ -4531,8 +4537,7 @@ static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g, for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) { base = perf_pmmfbp_base_v() + - (fbp_num * - (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)); + (fbp_num * g->ops.gr.get_pmm_per_chiplet_offset()); for (domain_idx = domain_start; domain_idx < (domain_start + num_domains); @@ -4653,7 +4658,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, domain_idx < (pmm_domain_start + num_domains); domain_idx++) { priv_addr_table[t++] = - gr_gv11b_pri_pmmgpc_addr(gpc_num, + gr_gv11b_pri_pmmgpc_addr(g, gpc_num, domain_idx, offset); } } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 3c581326..1a3a851e 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -234,6 +234,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, int gr_gv11b_handle_ssync_hww(struct gk20a *g); u32 gv11b_gr_sm_offset(struct gk20a *g, u32 sm); +u32 gr_gv11b_get_pmm_per_chiplet_offset(void); int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, int *addr_type, u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index e39df1db..d0a564db 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -405,6 +405,8 @@ static const struct gpu_ops gv11b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gv11b_get_pmm_per_chiplet_offset, }, .fb = { .reset = gv11b_fb_reset, diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_perf_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_perf_gm20b.h index ae34cabd..a94ba307 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_perf_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_perf_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,6 +56,14 @@ #ifndef _hw_perf_gm20b_h_ #define _hw_perf_gm20b_h_ +static inline u32 perf_pmmsys_base_v(void) +{ + return 0x001b0000U; +} +static inline u32 perf_pmmsys_extent_v(void) +{ + return 0x001b0fffU; +} static inline u32 perf_pmasys_control_r(void) { return 0x001b4000U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_perf_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_perf_gp106.h index b0182789..334cd200 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_perf_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_perf_gp106.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,6 +56,14 @@ #ifndef _hw_perf_gp106_h_ #define _hw_perf_gp106_h_ +static inline u32 perf_pmmsys_base_v(void) +{ + return 0x001b0000U; +} +static inline u32 perf_pmmsys_extent_v(void) +{ + return 0x001b0fffU; +} static inline u32 perf_pmasys_control_r(void) { return 0x001b4000U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_perf_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_perf_gp10b.h index aa0fafe7..43424e13 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_perf_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_perf_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,6 +56,14 @@ #ifndef _hw_perf_gp10b_h_ #define _hw_perf_gp10b_h_ +static inline u32 perf_pmmsys_base_v(void) +{ + return 0x001b0000U; +} +static inline u32 perf_pmmsys_extent_v(void) +{ + return 0x001b0fffU; +} static inline u32 perf_pmasys_control_r(void) { return 0x001b4000U; diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index c56e900e..53ba1e14 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -227,6 +227,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gm20b_get_pmm_per_chiplet_offset, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index a4496044..0b9049dd 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -263,6 +263,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, + .get_pmm_per_chiplet_offset = + gr_gv11b_get_pmm_per_chiplet_offset, }, .fb = { .reset = gv11b_fb_reset, -- cgit v1.2.2