summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv100/gr_gv100.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-03-16 08:24:55 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-21 09:04:35 -0400
commit66751bc05d7a1efca3668d59a2820e3e92985f91 (patch)
treeab85f317d77c8c76d6a7430039d19d406b9eb8f5 /drivers/gpu/nvgpu/gv100/gr_gv100.c
parentc5ca711f1efbd30fa760df139f3b63aa471d28a9 (diff)
gpu: nvgpu: gv100: fix num_fbpas while adding ctxsw buffer entries
For LIST_nv_pm_fbpa_ctx_regs, we right now call add_ctxsw_buffer_map_entries_subunits() to add registers corresponding to all the FBPAs But while configuring total number of registers, we do not consider floorswept FBPAs and that causes misalignment in subsequent lists for GV100 Fix this by reading disabled/floorswept FBPAs from fuse and consider only those FBPAs which are active for GV100 Add new HAL (*add_ctxsw_reg_pm_fbpa) to support this setting and define a common HAL gr_gk20a_add_ctxsw_reg_pm_fbpa() for all chips except GV100 Define GV100 specific gr_gv100_add_ctxsw_reg_pm_fbpa() with above mentioned implementation to consider floorsweeping Bug 1998067 Change-Id: Id560551bb0b8142791c117b6d27864566c90b489 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1676654 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv100/gr_gv100.c')
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c44
1 files changed, 44 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index bf143dbb..3aed7a19 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -34,6 +34,8 @@
34 34
35#include <nvgpu/hw/gv100/hw_gr_gv100.h> 35#include <nvgpu/hw/gv100/hw_gr_gv100.h>
36#include <nvgpu/hw/gv100/hw_proj_gv100.h> 36#include <nvgpu/hw/gv100/hw_proj_gv100.h>
37#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
38
37 39
38/* 40/*
39 * Estimate performance if the given logical TPC in the given logical GPC were 41 * Estimate performance if the given logical TPC in the given logical GPC were
@@ -329,3 +331,45 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g)
329 331
330 return size; 332 return size;
331} 333}
334
335int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
336 struct ctxsw_buf_offset_map_entry *map,
337 struct aiv_list_gk20a *regs,
338 u32 *count, u32 *offset,
339 u32 max_cnt, u32 base,
340 u32 num_fbpas, u32 stride, u32 mask)
341{
342 u32 fbpa_id;
343 u32 idx;
344 u32 cnt = *count;
345 u32 off = *offset;
346 u32 active_fbpa_mask;
347
348 if ((cnt + (regs->count * num_fbpas)) > max_cnt)
349 return -EINVAL;
350
351 /*
352 * Read active fbpa mask from fuse
353 * Note that 0:enable and 1:disable in value read from fuse so we've to
354 * flip the bits.
355 * Also set unused bits to zero
356 */
357 active_fbpa_mask = nvgpu_readl(g, fuse_status_opt_fbio_r());
358 active_fbpa_mask = ~active_fbpa_mask;
359 active_fbpa_mask = active_fbpa_mask & ((1 << num_fbpas) - 1);
360
361 for (idx = 0; idx < regs->count; idx++) {
362 for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) {
363 if (active_fbpa_mask & BIT(fbpa_id)) {
364 map[cnt].addr = base +
365 (regs->l[idx].addr & mask) +
366 (fbpa_id * stride);
367 map[cnt++].offset = off;
368 off += 4;
369 }
370 }
371 }
372 *count = cnt;
373 *offset = off;
374 return 0;
375}