summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp106/acr_gp106.c
diff options
context:
space:
mode:
authorMahantesh Kumbar <mkumbar@nvidia.com>2018-05-20 11:08:33 -0400
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:07 -0400
commit25fc64b944d12c007771efe24badda78be4e4cb7 (patch)
tree7b2a5d7270824008eab68be1354a9c671cad6203 /drivers/gpu/nvgpu/gp106/acr_gp106.c
parent7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (diff)
gpu: nvgpu: Multiple WPR support
The WPR will be divided into several sub-WPRs, one for each Falcon and one common for sharing between Falcons which bootstrap falcons - Defined & used flag NVGPU_SUPPORT_MULTIPLE_WPR to know M-WPR support. - Added struct lsfm_sub_wpr to hold subWPR header info - Added struct lsf_shared_sub_wpr_header to hold subWPR info & copied to WPR blob after LSF_WPR_HEADER - Set NVGPU_SUPPORT_MULTIPLE_WPR to false for gp106, gv100 & gv11b. - Added methods to support to multiple WPR support & called by checking flag NVGPU_SUPPORT_MULTIPLE_WPR in ucode blob preparation flow. JIRA NVGPUTU10X / NVGPUT-99 Change-Id: I81d0490158390e79b6841374158805f7a84ee6cb Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1725369 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp106/acr_gp106.c')
-rw-r--r--drivers/gpu/nvgpu/gp106/acr_gp106.c118
1 files changed, 118 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 61b443e0..70296757 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -368,6 +368,50 @@ rel_sig:
368 return err; 368 return err;
369} 369}
370 370
371/*
372 * Discover all supported shared data falcon SUB WPRs
373 */
374static u32 lsfm_discover_and_add_sub_wprs(struct gk20a *g,
375 struct ls_flcn_mgr_v1 *plsfm)
376{
377 struct lsfm_sub_wpr *pnode;
378 u32 size_4K = 0;
379 u32 sub_wpr_index;
380
381 for (sub_wpr_index = 1;
382 sub_wpr_index <= LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX;
383 sub_wpr_index++) {
384
385 switch (sub_wpr_index) {
386 case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES:
387 size_4K = LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K;
388 break;
389 case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA:
390 size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K;
391 break;
392 default:
393 size_4K = 0; /* subWpr not supported */
394 break;
395 }
396
397 if (size_4K) {
398 pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr));
399 if (pnode == NULL)
400 return -ENOMEM;
401
402 pnode->sub_wpr_header.use_case_id = sub_wpr_index;
403 pnode->sub_wpr_header.size_4K = size_4K;
404
405 pnode->pnext = plsfm->psub_wpr_list;
406 plsfm->psub_wpr_list = pnode;
407
408 plsfm->managed_sub_wpr_count++;
409 }
410 }
411
412 return 0;
413}
414
371int gp106_prepare_ucode_blob(struct gk20a *g) 415int gp106_prepare_ucode_blob(struct gk20a *g)
372{ 416{
373 417
@@ -400,6 +444,9 @@ int gp106_prepare_ucode_blob(struct gk20a *g)
400 if (err) 444 if (err)
401 goto exit_err; 445 goto exit_err;
402 446
447 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
448 lsfm_discover_and_add_sub_wprs(g, plsfm);
449
403 if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) { 450 if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
404 /* Generate WPR requirements*/ 451 /* Generate WPR requirements*/
405 err = lsf_gen_wpr_requirements(g, plsfm); 452 err = lsf_gen_wpr_requirements(g, plsfm);
@@ -671,6 +718,40 @@ int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
671 return -ENOENT; 718 return -ENOENT;
672} 719}
673 720
721static u32 lsfm_init_sub_wpr_contents(struct gk20a *g,
722 struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
723{
724 struct lsfm_sub_wpr *psub_wpr_node;
725 struct lsf_shared_sub_wpr_header last_sub_wpr_header;
726 u32 temp_size = sizeof(struct lsf_shared_sub_wpr_header);
727 u32 sub_wpr_header_offset = 0;
728 u32 i = 0;
729
730 /* SubWpr headers are placed after WPR headers */
731 sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX;
732
733 /* Walk through the managed shared subWPRs headers
734 * and flush them to FB
735 */
736 psub_wpr_node = plsfm->psub_wpr_list;
737 i = 0;
738 while (psub_wpr_node) {
739 nvgpu_mem_wr_n(g, ucode,
740 sub_wpr_header_offset + (i * temp_size),
741 &psub_wpr_node->sub_wpr_header, temp_size);
742
743 psub_wpr_node = psub_wpr_node->pnext;
744 i++;
745 }
746 last_sub_wpr_header.use_case_id =
747 LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID;
748 nvgpu_mem_wr_n(g, ucode, sub_wpr_header_offset +
749 (plsfm->managed_sub_wpr_count * temp_size),
750 &last_sub_wpr_header, temp_size);
751
752 return 0;
753}
754
674/* Initialize WPR contents */ 755/* Initialize WPR contents */
675void lsfm_init_wpr_contents(struct gk20a *g, 756void lsfm_init_wpr_contents(struct gk20a *g,
676 struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode) 757 struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
@@ -684,6 +765,9 @@ void lsfm_init_wpr_contents(struct gk20a *g,
684 memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1)); 765 memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1));
685 i = 0; 766 i = 0;
686 767
768 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
769 lsfm_init_sub_wpr_contents(g, plsfm, ucode);
770
687 /* 771 /*
688 * Walk the managed falcons, flush WPR and LSB headers to FB. 772 * Walk the managed falcons, flush WPR and LSB headers to FB.
689 * flush any bl args to the storage area relative to the 773 * flush any bl args to the storage area relative to the
@@ -956,6 +1040,7 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
956 struct ls_flcn_mgr_v1 *plsfm) 1040 struct ls_flcn_mgr_v1 *plsfm)
957{ 1041{
958 struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list; 1042 struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
1043 struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list;
959 u32 wpr_offset; 1044 u32 wpr_offset;
960 1045
961 /* Calculate WPR size required */ 1046 /* Calculate WPR size required */
@@ -967,6 +1052,22 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
967 wpr_offset = sizeof(struct lsf_wpr_header_v1) * 1052 wpr_offset = sizeof(struct lsf_wpr_header_v1) *
968 (plsfm->managed_flcn_cnt+1); 1053 (plsfm->managed_flcn_cnt+1);
969 1054
1055 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
1056 wpr_offset = ALIGN_UP(wpr_offset,
1057 LSF_WPR_HEADERS_TOTAL_SIZE_MAX);
1058 /*
1059 * SUB WPR header is appended after
1060 * LSF_WPR_HEADER in WPR blob.
1061 * The size is allocated as per the managed
1062 * SUB WPR count.
1063 */
1064 wpr_offset = ALIGN_UP(wpr_offset,
1065 LSF_SUB_WPR_HEADER_ALIGNMENT);
1066 wpr_offset = wpr_offset +
1067 (sizeof(struct lsf_shared_sub_wpr_header) *
1068 (plsfm->managed_sub_wpr_count + 1));
1069 }
1070
970 /* Walk the managed falcons, accounting for the LSB structs 1071 /* Walk the managed falcons, accounting for the LSB structs
971 as well as the ucode images. */ 1072 as well as the ucode images. */
972 while (pnode) { 1073 while (pnode) {
@@ -1028,6 +1129,23 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
1028 } 1129 }
1029 pnode = pnode->next; 1130 pnode = pnode->next;
1030 } 1131 }
1132
1133 if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
1134 /* Walk through the sub wpr headers to accommodate
1135 * sub wprs in WPR request
1136 */
1137 while (pnode_sub_wpr) {
1138 wpr_offset = ALIGN_UP(wpr_offset,
1139 SUB_WPR_SIZE_ALIGNMENT);
1140 pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset;
1141 wpr_offset = wpr_offset +
1142 (pnode_sub_wpr->sub_wpr_header.size_4K
1143 << SHIFT_4KB);
1144 pnode_sub_wpr = pnode_sub_wpr->pnext;
1145 }
1146 wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT);
1147 }
1148
1031 plsfm->wpr_size = wpr_offset; 1149 plsfm->wpr_size = wpr_offset;
1032 return 0; 1150 return 0;
1033} 1151}