summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-06-06 18:47:17 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-19 02:42:07 -0400
commitf36e2a234b39cf7622c57ad51359629f5c425340 (patch)
tree47136739b98de4024b72335b29b21d20f97d5903
parent92d476bf279f80e896fd7247a267ae2202b91550 (diff)
gpu: nvgpu: support context regoptype for egpc/etpc
- add is_egpc_addr, is_etpc_addr and get_egpc_etpc_num gr ops - add gr ops for decode and create egpc/etpc priv addr - add etpc as part of ctxsw_regs JIRA GPUT19X-49 Bug 200311674 Bug 1960226 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Change-Id: I9a8be1804a9354238de2441093b3b136321b7e53 Reviewed-on: https://git-master.nvidia.com/r/1522442 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h11
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c105
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h4
4 files changed, 106 insertions, 16 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 6fe29abe..ff8eb988 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -216,8 +216,19 @@ struct gpu_ops {
216 u32 mode); 216 u32 mode);
217 int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, 217 int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
218 struct gr_zcull_info *zcull_params); 218 struct gr_zcull_info *zcull_params);
219 int (*decode_egpc_addr)(struct gk20a *g,
220 u32 addr, int *addr_type,
221 u32 *gpc_num, u32 *tpc_num, u32 *broadcast_flags);
222 void (*egpc_etpc_priv_addr_table)(struct gk20a *g, u32 addr,
223 u32 gpc, u32 broadcast_flags, u32 *priv_addr_table,
224 u32 *priv_addr_table_index);
219 bool (*is_tpc_addr)(struct gk20a *g, u32 addr); 225 bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
226 bool (*is_egpc_addr)(struct gk20a *g, u32 addr);
227 bool (*is_etpc_addr)(struct gk20a *g, u32 addr);
228 void (*get_egpc_etpc_num)(struct gk20a *g, u32 addr,
229 u32 *gpc_num, u32 *tpc_num);
220 u32 (*get_tpc_num)(struct gk20a *g, u32 addr); 230 u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
231 u32 (*get_egpc_base)(struct gk20a *g);
221 bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr); 232 bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
222 bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr); 233 bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
223 bool (*get_lts_in_ltc_shared_base)(void); 234 bool (*get_lts_in_ltc_shared_base)(void);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
index 58e62d32..1676fb9a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a_sim.c
@@ -76,6 +76,8 @@ int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
76 &g->gr.ctx_vars.ctxsw_regs.ppc.count); 76 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
77 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0, 77 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ETPC_COUNT", 0,
78 &g->gr.ctx_vars.ctxsw_regs.etpc.count); 78 &g->gr.ctx_vars.ctxsw_regs.etpc.count);
79 gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
80 &g->gr.ctx_vars.ctxsw_regs.ppc.count);
79 81
80 err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst); 82 err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.inst);
81 err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data); 83 err |= !alloc_u32_list_gk20a(g, &g->gr.ctx_vars.ucode.fecs.data);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2aec662c..ff81e380 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6236,6 +6236,10 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6236 return 0; 6236 return 0;
6237 } 6237 }
6238 return 0; 6238 return 0;
6239 } else if (g->ops.gr.is_egpc_addr && g->ops.gr.is_egpc_addr(g, addr)) {
6240 return g->ops.gr.decode_egpc_addr(g,
6241 addr, addr_type, gpc_num,
6242 tpc_num, broadcast_flags);
6239 } else { 6243 } else {
6240 *addr_type = CTXSW_ADDR_TYPE_SYS; 6244 *addr_type = CTXSW_ADDR_TYPE_SYS;
6241 return 0; 6245 return 0;
@@ -6331,9 +6335,13 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6331 pri_gpc_addr(g, pri_gpccs_addr_mask(addr), 6335 pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
6332 gpc_num); 6336 gpc_num);
6333 } 6337 }
6334 } 6338 } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
6335 6339 (addr_type == CTXSW_ADDR_TYPE_ETPC)) &&
6336 if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) { 6340 g->ops.gr.egpc_etpc_priv_addr_table) {
6341 gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC");
6342 g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num,
6343 broadcast_flags, priv_addr_table, &t);
6344 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
6337 g->ops.gr.split_lts_broadcast_addr(g, addr, 6345 g->ops.gr.split_lts_broadcast_addr(g, addr,
6338 priv_addr_table, &t); 6346 priv_addr_table, &t);
6339 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { 6347 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
@@ -6354,8 +6362,8 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6354 pri_tpc_addr(g, pri_tpccs_addr_mask(addr), 6362 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6355 gpc_num, tpc_num); 6363 gpc_num, tpc_num);
6356 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) 6364 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
6357 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, 6365 err = gr_gk20a_split_ppc_broadcast_addr(g,
6358 priv_addr_table, &t); 6366 addr, gpc_num, priv_addr_table, &t);
6359 else 6367 else
6360 priv_addr_table[t++] = addr; 6368 priv_addr_table[t++] = addr;
6361 } 6369 }
@@ -6382,8 +6390,10 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6382 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6390 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6383 6391
6384 /* implementation is crossed-up if either of these happen */ 6392 /* implementation is crossed-up if either of these happen */
6385 if (max_offsets > potential_offsets) 6393 if (max_offsets > potential_offsets) {
6394 gk20a_dbg_fn("max_offsets > potential_offsets");
6386 return -EINVAL; 6395 return -EINVAL;
6396 }
6387 6397
6388 if (!g->gr.ctx_vars.golden_image_initialized) 6398 if (!g->gr.ctx_vars.golden_image_initialized)
6389 return -ENODEV; 6399 return -ENODEV;
@@ -6401,6 +6411,8 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6401 gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers); 6411 gr_gk20a_create_priv_addr_table(g, addr, &priv_registers[0], &num_registers);
6402 6412
6403 if ((max_offsets > 1) && (num_registers > max_offsets)) { 6413 if ((max_offsets > 1) && (num_registers > max_offsets)) {
6414 gk20a_dbg_fn("max_offsets = %d, num_registers = %d",
6415 max_offsets, num_registers);
6404 err = -EINVAL; 6416 err = -EINVAL;
6405 goto cleanup; 6417 goto cleanup;
6406 } 6418 }
@@ -6691,8 +6703,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6691 6703
6692 gk20a_dbg_info(" gpc = %d tpc = %d", 6704 gk20a_dbg_info(" gpc = %d tpc = %d",
6693 gpc_num, tpc_num); 6705 gpc_num, tpc_num);
6694 } else 6706 } else {
6707 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
6708 "extended region has tpc reg only");
6695 return -EINVAL; 6709 return -EINVAL;
6710 }
6696 6711
6697 buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); 6712 buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
6698 /* note below is in words/num_registers */ 6713 /* note below is in words/num_registers */
@@ -6937,8 +6952,42 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6937 } 6952 }
6938 } 6953 }
6939 } 6954 }
6955 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
6956 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
6957 if (!(g->ops.gr.get_egpc_base))
6958 return -EINVAL;
6959
6960 for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
6961 for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.etpc.count; i++) {
6962 reg = &g->gr.ctx_vars.ctxsw_regs.etpc.l[i];
6963 address = reg->addr;
6964 tpc_addr = pri_tpccs_addr_mask(address);
6965 base_address = g->ops.gr.get_egpc_base(g) +
6966 (gpc_num * gpc_stride) +
6967 tpc_in_gpc_base +
6968 (tpc_num * tpc_in_gpc_stride);
6969 address = base_address + tpc_addr;
6970 /*
6971 * The data for the TPCs is interleaved in the context buffer.
6972 * Example with num_tpcs = 2
6973 * 0 1 2 3 4 5 6 7 8 9 10 11 ...
6974 * 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
6975 */
6976 tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4);
6977
6978 if (pri_addr == address) {
6979 *priv_offset = tpc_offset;
6980 nvgpu_log(g,
6981 gpu_dbg_fn | gpu_dbg_gpu_dbg,
6982 "egpc/etpc priv_offset=0x%#08x",
6983 *priv_offset);
6984 return 0;
6985 }
6986 }
6987 }
6940 } 6988 }
6941 6989
6990
6942 /* Process the PPC segment. */ 6991 /* Process the PPC segment. */
6943 if (addr_type == CTXSW_ADDR_TYPE_PPC) { 6992 if (addr_type == CTXSW_ADDR_TYPE_PPC) {
6944 for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) { 6993 for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
@@ -6986,7 +7035,6 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6986 } 7035 }
6987 } 7036 }
6988 } 7037 }
6989
6990 return -EINVAL; 7038 return -EINVAL;
6991} 7039}
6992 7040
@@ -7046,6 +7094,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7046 err = gr_gk20a_decode_priv_addr(g, addr, &addr_type, 7094 err = gr_gk20a_decode_priv_addr(g, addr, &addr_type,
7047 &gpc_num, &tpc_num, &ppc_num, &be_num, 7095 &gpc_num, &tpc_num, &ppc_num, &be_num,
7048 &broadcast_flags); 7096 &broadcast_flags);
7097 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
7098 "addr_type = %d, broadcast_flags: %08x",
7099 addr_type, broadcast_flags);
7049 if (err) 7100 if (err)
7050 return err; 7101 return err;
7051 7102
@@ -7066,6 +7117,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7066 } 7117 }
7067 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); 7118 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7068 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7119 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7120 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
7069 7121
7070 /* If found in Ext buffer, ok. 7122 /* If found in Ext buffer, ok.
7071 * If it failed and we expected to find it there (quad offset) 7123 * If it failed and we expected to find it there (quad offset)
@@ -7074,8 +7126,12 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7074 err = gr_gk20a_find_priv_offset_in_ext_buffer(g, 7126 err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
7075 addr, is_quad, quad, context_buffer, 7127 addr, is_quad, quad, context_buffer,
7076 context_buffer_size, priv_offset); 7128 context_buffer_size, priv_offset);
7077 if (!err || (err && is_quad)) 7129 if (!err || (err && is_quad)) {
7130 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
7131 "err = %d, is_quad = %s",
7132 err, is_quad ? "true" : false);
7078 return err; 7133 return err;
7134 }
7079 7135
7080 if ((addr_type == CTXSW_ADDR_TYPE_SYS) || 7136 if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
7081 (addr_type == CTXSW_ADDR_TYPE_BE)) { 7137 (addr_type == CTXSW_ADDR_TYPE_BE)) {
@@ -7116,8 +7172,11 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7116 err = gr_gk20a_determine_ppc_configuration(g, context, 7172 err = gr_gk20a_determine_ppc_configuration(g, context,
7117 &num_ppcs, &ppc_mask, 7173 &num_ppcs, &ppc_mask,
7118 &reg_list_ppc_count); 7174 &reg_list_ppc_count);
7119 if (err) 7175 if (err) {
7176 nvgpu_err(g, "determine ppc configuration failed");
7120 return err; 7177 return err;
7178 }
7179
7121 7180
7122 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); 7181 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7123 7182
@@ -7130,16 +7189,28 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7130 7189
7131 /* Find the offset in the GPCCS segment.*/ 7190 /* Find the offset in the GPCCS segment.*/
7132 if (i == gpc_num) { 7191 if (i == gpc_num) {
7192 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
7193 "gpc_priv_offset 0x%#08x",
7194 gpc_priv_offset);
7133 offset_to_segment = gpc_priv_offset * 7195 offset_to_segment = gpc_priv_offset *
7134 ctxsw_prog_ucode_header_size_in_bytes(); 7196 ctxsw_prog_ucode_header_size_in_bytes();
7135 7197
7136 if (addr_type == CTXSW_ADDR_TYPE_TPC) { 7198 if (addr_type == CTXSW_ADDR_TYPE_TPC) {
7137 /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/ 7199 /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
7200 } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
7201 (addr_type == CTXSW_ADDR_TYPE_ETPC)) {
7202 nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
7203 "egpc etpc offset_to_segment 0x%#08x",
7204 offset_to_segment);
7205 offset_to_segment +=
7206 ((gr->ctx_vars.ctxsw_regs.tpc.count *
7207 num_tpcs) << 2);
7138 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { 7208 } else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
7139 /* The ucode stores TPC data before PPC data. 7209 /* The ucode stores TPC data before PPC data.
7140 * Advance offset past TPC data to PPC data. */ 7210 * Advance offset past TPC data to PPC data. */
7141 offset_to_segment += 7211 offset_to_segment +=
7142 ((gr->ctx_vars.ctxsw_regs.tpc.count * 7212 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7213 gr->ctx_vars.ctxsw_regs.etpc.count) *
7143 num_tpcs) << 2); 7214 num_tpcs) << 2);
7144 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { 7215 } else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
7145 /* The ucode stores TPC/PPC data before GPC data. 7216 /* The ucode stores TPC/PPC data before GPC data.
@@ -7149,13 +7220,15 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7149 GPU_LIT_NUM_PES_PER_GPC); 7220 GPU_LIT_NUM_PES_PER_GPC);
7150 if (num_pes_per_gpc > 1) { 7221 if (num_pes_per_gpc > 1) {
7151 offset_to_segment += 7222 offset_to_segment +=
7152 (((gr->ctx_vars.ctxsw_regs.tpc.count * 7223 ((((gr->ctx_vars.ctxsw_regs.tpc.count +
7153 num_tpcs) << 2) + 7224 gr->ctx_vars.ctxsw_regs.etpc.count) *
7154 ((reg_list_ppc_count * num_ppcs) << 2)); 7225 num_tpcs) << 2) +
7226 ((reg_list_ppc_count * num_ppcs) << 2));
7155 } else { 7227 } else {
7156 offset_to_segment += 7228 offset_to_segment +=
7157 ((gr->ctx_vars.ctxsw_regs.tpc.count * 7229 (((gr->ctx_vars.ctxsw_regs.tpc.count +
7158 num_tpcs) << 2); 7230 gr->ctx_vars.ctxsw_regs.etpc.count) *
7231 num_tpcs) << 2);
7159 } 7232 }
7160 } else { 7233 } else {
7161 gk20a_dbg_fn("Unknown address type."); 7234 gk20a_dbg_fn("Unknown address type.");
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
index b89124d6..535977f9 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
@@ -237,6 +237,8 @@ enum ctxsw_addr_type {
237 CTXSW_ADDR_TYPE_PPC = 4, 237 CTXSW_ADDR_TYPE_PPC = 4,
238 CTXSW_ADDR_TYPE_LTCS = 5, 238 CTXSW_ADDR_TYPE_LTCS = 5,
239 CTXSW_ADDR_TYPE_FBPA = 6, 239 CTXSW_ADDR_TYPE_FBPA = 6,
240 CTXSW_ADDR_TYPE_EGPC = 7,
241 CTXSW_ADDR_TYPE_ETPC = 8,
240}; 242};
241 243
242#define PRI_BROADCAST_FLAGS_NONE 0 244#define PRI_BROADCAST_FLAGS_NONE 0
@@ -247,5 +249,7 @@ enum ctxsw_addr_type {
247#define PRI_BROADCAST_FLAGS_LTCS BIT(4) 249#define PRI_BROADCAST_FLAGS_LTCS BIT(4)
248#define PRI_BROADCAST_FLAGS_LTSS BIT(5) 250#define PRI_BROADCAST_FLAGS_LTSS BIT(5)
249#define PRI_BROADCAST_FLAGS_FBPA BIT(6) 251#define PRI_BROADCAST_FLAGS_FBPA BIT(6)
252#define PRI_BROADCAST_FLAGS_EGPC BIT(7)
253#define PRI_BROADCAST_FLAGS_ETPC BIT(8)
250 254
251#endif /* GR_PRI_GK20A_H */ 255#endif /* GR_PRI_GK20A_H */