From ce0fe5082ebb8a7e0ca5a8992e17ae4547d4db5e Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Fri, 15 Apr 2016 18:12:34 -0700 Subject: gpu: nvgpu: hwpm broadcast register support Add support for hwpm broadcast registers (ltc and lts) In gr_gk20a_find_priv_offset_in_buffer, replace "Unknown address type" error with informational message: gr_gk20a_exec_ctx_ops calls gk20a_get_ctx_buffer_offsets and if that fails, calls gr_gk20a_get_pm_ctx_buffer_offsets; HWPM registers will fail the first call, so an error or warning is overkill. Bug 1648200 Change-Id: I197b82579e9894652add4ff254418f818981415a Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1131365 (cherry picked from commit 9f30a92c5d87f6dadd34cc37396a6b10e3a72751) Reviewed-on: http://git-master/r/1133628 (cherry picked from commit 7eb7cfd998852ba7f7c4c40d3db286f66e83ab3a) Reviewed-on: http://git-master/r/1127749 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a.h | 9 ++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 48 +++++++++++++++++++-- drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 23 +++++++--- drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | 8 ++++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 78 ++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 +++++++++ 6 files changed, 177 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d5310b02..275619c9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -178,6 +178,15 @@ struct gpu_ops { struct gr_zcull_info *zcull_params); bool (*is_tpc_addr)(struct gk20a *g, u32 addr); u32 (*get_tpc_num)(struct gk20a *g, u32 addr); + bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr); + bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr); + bool (*get_lts_in_ltc_shared_base)(void); + void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index); + void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index); void (*detect_sm_arch)(struct gk20a *g); int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *color_val, u32 index); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 901fea8c..943b4085 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -6318,6 +6318,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, } *be_num = pri_get_be_num(g, addr); return 0; + } else if (pri_is_ltc_addr(addr)) { + *addr_type = CTXSW_ADDR_TYPE_LTCS; + if (g->ops.gr.is_ltcs_ltss_addr(g, addr)) + *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS; + else if (g->ops.gr.is_ltcn_ltss_addr(g, addr)) + *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; + return 0; } else { *addr_type = CTXSW_ADDR_TYPE_SYS; return 0; @@ -6412,7 +6419,15 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, pri_gpc_addr(g, pri_gpccs_addr_mask(addr), gpc_num); } - } else { + } + + if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) { + g->ops.gr.split_lts_broadcast_addr(g, addr, + priv_addr_table, &t); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { + g->ops.gr.split_ltc_broadcast_addr(g, addr, + priv_addr_table, &t); + } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) for (tpc_num = 0; tpc_num < g->gr.gpc_tpc_count[gpc_num]; @@ -7296,8 +7311,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, num_tpcs) << 2); } } else { - gk20a_err(dev_from_gk20a(g), - " Unknown address type.\n"); + gk20a_dbg_fn("Unknown address type."); return -EINVAL; } err = gr_gk20a_process_context_buffer_priv_segment(g, @@ -8653,6 +8667,28 @@ static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g, return 0; } +static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr) +{ + return false; +} + +static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr) +{ + return false; +} + +static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ +} + +static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ +} + void gk20a_init_gr_ops(struct gpu_ops *gops) { gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; @@ -8723,4 +8759,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; + gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub; + gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub; + gops->gr.split_lts_broadcast_addr = + gr_gk20a_split_lts_broadcast_addr_stub; + gops->gr.split_ltc_broadcast_addr = + gr_gk20a_split_ltc_broadcast_addr_stub; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index 248fa291..62e276de 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h @@ -1,7 +1,7 @@ /* * GK20A Graphics Context Pri Register Addressing * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -167,12 +167,21 @@ static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc) ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr; } +/* + * LTC pri addressing + */ +static inline bool pri_is_ltc_addr(u32 addr) +{ + return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v())); +} + enum ctxsw_addr_type { - CTXSW_ADDR_TYPE_SYS = 0, - CTXSW_ADDR_TYPE_GPC = 1, - CTXSW_ADDR_TYPE_TPC = 2, - CTXSW_ADDR_TYPE_BE = 3, - CTXSW_ADDR_TYPE_PPC = 4 + CTXSW_ADDR_TYPE_SYS = 0, + CTXSW_ADDR_TYPE_GPC = 1, + CTXSW_ADDR_TYPE_TPC = 2, + CTXSW_ADDR_TYPE_BE = 3, + CTXSW_ADDR_TYPE_PPC = 4, + CTXSW_ADDR_TYPE_LTCS = 5 }; #define PRI_BROADCAST_FLAGS_NONE 0 @@ -180,5 +189,7 @@ enum ctxsw_addr_type { #define PRI_BROADCAST_FLAGS_TPC BIT(1) #define PRI_BROADCAST_FLAGS_BE BIT(2) #define PRI_BROADCAST_FLAGS_PPC BIT(3) +#define PRI_BROADCAST_FLAGS_LTCS BIT(4) +#define PRI_BROADCAST_FLAGS_LTSS BIT(5) #endif /* GR_PRI_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h index 94770431..84b9c9a6 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h @@ -50,6 +50,14 @@ #ifndef _hw_ltc_gk20a_h_ #define _hw_ltc_gk20a_h_ +static inline u32 ltc_pltcg_base_v(void) +{ + return 0x00140000; +} +static inline u32 ltc_pltcg_extent_v(void) +{ + return 0x0017ffff; +} static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) { return 0x001410c8; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 0659eefd..3b0a399d 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -29,6 +29,7 @@ #include "hw_fifo_gm20b.h" #include "hw_fb_gm20b.h" #include "hw_top_gm20b.h" +#include "hw_ltc_gm20b.h" #include "hw_ctxsw_prog_gm20b.h" #include "hw_fuse_gm20b.h" #include "pmu_gm20b.h" @@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g) return 0; } +static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) +{ + u32 ltc_shared_base = ltc_ltcs_ltss_v(); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + + return (addr >= ltc_shared_base) && + (addr < (ltc_shared_base + lts_stride)); +} + +static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) +{ + u32 lts_shared_base = ltc_ltc0_ltss_v(); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; + u32 base_offset = lts_shared_base & addr_mask; + u32 end_offset = base_offset + lts_stride; + + return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && + ((addr & addr_mask) >= base_offset) && + ((addr & addr_mask) < end_offset); +} + +static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ + u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); + u32 index = *priv_addr_table_index; + u32 lts_num; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + + for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) + priv_addr_table[index++] = ltc_ltc0_lts0_v() + + ltc_num * ltc_stride + + lts_num * lts_stride + + (addr & (lts_stride - 1)); + + *priv_addr_table_index = index; +} + +static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ + u32 num_ltc = g->ltc_count; + u32 i, start, ltc_num = 0; + u32 pltcg_base = ltc_pltcg_base_v(); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + + for (i = 0; i < num_ltc; i++) { + start = pltcg_base + i * ltc_stride; + if ((addr >= start) && (addr < (start + ltc_stride))) { + ltc_num = i; + break; + } + } + gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, + priv_addr_table_index); +} + +static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ + u32 num_ltc = g->ltc_count; + u32 ltc_num; + + for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) + gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, + priv_addr_table, priv_addr_table_index); +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.fuse_override = gm20b_gr_fuse_override; gops->gr.load_smid_config = gr_gm20b_load_smid_config; gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; + gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr; + gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; + gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; + gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index aa01e945..8c00520c 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h @@ -50,6 +50,26 @@ #ifndef _hw_ltc_gm20b_h_ #define _hw_ltc_gm20b_h_ +static inline u32 ltc_pltcg_base_v(void) +{ + return 0x00140000; +} +static inline u32 ltc_pltcg_extent_v(void) +{ + return 0x0017ffff; +} +static inline u32 ltc_ltc0_ltss_v(void) +{ + return 0x00140200; +} +static inline u32 ltc_ltc0_lts0_v(void) +{ + return 0x00140400; +} +static inline u32 ltc_ltcs_ltss_v(void) +{ + return 0x0017e200; +} static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) { return 0x0014046c; -- cgit v1.2.2