From c1064c27dfa26847234153652cf2b88167b90adf Mon Sep 17 00:00:00 2001 From: tk Date: Thu, 3 Nov 2016 15:36:24 +0530 Subject: gpu: nvgpu: FBPA broadcast support Add FBPA broadcast support to hwpm regops Bug 200249125 Change-Id: Iaf413a162a8985bcce94ff96ec6318e129609c4c Signed-off-by: Tejaswi K Reviewed-on: http://git-master/r/1247408 (cherry picked from commit 4e0a805f5a8762d1a90f3b5dd76902a04941d9ef) Reviewed-on: http://git-master/r/1252160 Tested-by: Peter Daifuku Reviewed-by: Seshendra Gadagottu GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 ++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 14 ++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 30 ++++++++++++++++++++++++++++-- drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 9 +++++++++ drivers/gpu/nvgpu/gm206/hw_proj_gm206.h | 14 +++++++++++++- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 9 +++++++++ 6 files changed, 75 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a263743f..dd64e3b2 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -119,6 +119,8 @@ enum gk20a_cbc_op { #define GPU_LIT_LTS_STRIDE 21 #define GPU_LIT_NUM_FBPAS 22 #define GPU_LIT_FBPA_STRIDE 23 +#define GPU_LIT_FBPA_BASE 24 +#define GPU_LIT_FBPA_SHARED_BASE 25 #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ee8b3b63..f5205d75 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -6567,6 +6567,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, else if (g->ops.gr.is_ltcn_ltss_addr(g, addr)) *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; return 0; + } else if (pri_is_fbpa_addr(g, addr)) { + *addr_type = CTXSW_ADDR_TYPE_FBPA; + if (pri_is_fbpa_addr_shared(g, addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_FBPA; + return 0; + } + return 0; } else { *addr_type = CTXSW_ADDR_TYPE_SYS; return 0; @@ -6609,6 +6616,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, u32 broadcast_flags; u32 t; int err; + u32 fbpa_num; t = 0; *num_registers = 0; @@ -6669,6 +6677,12 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { g->ops.gr.split_ltc_broadcast_addr(g, addr, priv_addr_table, &t); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { + for (fbpa_num = 0; + fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + fbpa_num++) + priv_addr_table[t++] = pri_fbpa_addr(g, + pri_fbpa_addr_mask(g, addr), fbpa_num); } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) for (tpc_num = 0; diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index 411430c7..c3ced432 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h @@ -109,6 +109,10 @@ static inline u32 pri_tpccs_addr_mask(u32 addr) { return addr & ((1 << pri_tpccs_addr_width()) - 1); } +static inline u32 pri_fbpa_addr_mask(struct gk20a *g, u32 addr) +{ + return addr & (nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE) - 1); +} static inline u32 pri_tpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc) { u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); @@ -127,7 +131,27 @@ static inline bool pri_is_tpc_addr_shared(struct gk20a *g, u32 addr) (addr < (tpc_in_gpc_shared_base + tpc_in_gpc_stride)); } - +static inline u32 pri_fbpa_addr(struct gk20a *g, u32 addr, u32 fbpa) +{ + return (nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE) + addr + + (fbpa * nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE))); +} +static inline bool pri_is_fbpa_addr_shared(struct gk20a *g, u32 addr) +{ + u32 fbpa_shared_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_SHARED_BASE); + u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE); + return ((addr >= fbpa_shared_base) && + (addr < (fbpa_shared_base + fbpa_stride))); +} +static inline bool pri_is_fbpa_addr(struct gk20a *g, u32 addr) +{ + u32 fbpa_base = nvgpu_get_litter_value(g, GPU_LIT_FBPA_BASE); + u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE); + u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + return (((addr >= fbpa_base) && + (addr < (fbpa_base + num_fbpas * fbpa_stride))) + || pri_is_fbpa_addr_shared(g, addr)); +} /* * BE pri addressing */ @@ -209,7 +233,8 @@ enum ctxsw_addr_type { CTXSW_ADDR_TYPE_TPC = 2, CTXSW_ADDR_TYPE_BE = 3, CTXSW_ADDR_TYPE_PPC = 4, - CTXSW_ADDR_TYPE_LTCS = 5 + CTXSW_ADDR_TYPE_LTCS = 5, + CTXSW_ADDR_TYPE_FBPA = 6, }; #define PRI_BROADCAST_FLAGS_NONE 0 @@ -219,5 +244,6 @@ enum ctxsw_addr_type { #define PRI_BROADCAST_FLAGS_PPC BIT(3) #define PRI_BROADCAST_FLAGS_LTCS BIT(4) #define PRI_BROADCAST_FLAGS_LTSS BIT(5) +#define PRI_BROADCAST_FLAGS_FBPA BIT(6) #endif /* GR_PRI_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 341c2e72..5b2b2552 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -34,6 +34,9 @@ #include "dbg_gpu_gk20a.h" #include "css_gr_gk20a.h" +#define GK20A_FBPA_BASE 0x00110000 +#define GK20A_FBPA_SHARED_BASE 0x0010F000 + static struct gpu_ops gk20a_ops = { .clock_gating = { .slcg_gr_load_gating_prod = @@ -124,6 +127,12 @@ static int gk20a_get_litter_value(struct gk20a *g, int value) case GPU_LIT_FBPA_STRIDE: ret = proj_fbpa_stride_v(); break; + case GPU_LIT_FBPA_BASE: + ret = GK20A_FBPA_BASE; + break; + case GPU_LIT_FBPA_SHARED_BASE: + ret = GK20A_FBPA_SHARED_BASE; + break; default: gk20a_err(dev_from_gk20a(g), "Missing definition %d", value); BUG(); diff --git a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h index 6c21b39a..bdca905f 100644 --- a/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h +++ b/drivers/gpu/nvgpu/gm206/hw_proj_gm206.h @@ -70,14 +70,26 @@ static inline u32 proj_lts_stride_v(void) { return 0x00000200; } +static inline u32 proj_fbpa_base_v(void) +{ + return 0x00110000; +} +static inline u32 proj_fbpa_shared_base_v(void) +{ + return 0x0010f000; +} static inline u32 proj_fbpa_stride_v(void) { - return 0x00004000; + return 0x00001000; } static inline u32 proj_ppc_in_gpc_base_v(void) { return 0x00003000; } +static inline u32 proj_ppc_in_gpc_shared_base_v(void) +{ + return 0x00003e00; +} static inline u32 proj_ppc_in_gpc_stride_v(void) { return 0x00000200; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 35747f27..71ffe288 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -45,6 +45,9 @@ #define PRIV_SECURITY_DISABLE 0x01 +#define GM20B_FBPA_BASE 0x00110000 +#define GM20B_FBPA_SHARED_BASE 0x0010F000 + static struct gpu_ops gm20b_ops = { .clock_gating = { .slcg_bus_load_gating_prod = @@ -169,6 +172,12 @@ static int gm20b_get_litter_value(struct gk20a *g, int value) case GPU_LIT_FBPA_STRIDE: ret = proj_fbpa_stride_v(); break; + case GPU_LIT_FBPA_BASE: + ret = GM20B_FBPA_BASE; + break; + case GPU_LIT_FBPA_SHARED_BASE: + ret = GM20B_FBPA_SHARED_BASE; + break; default: gk20a_err(dev_from_gk20a(g), "Missing definition %d", value); BUG(); -- cgit v1.2.2