diff options
author | Mahantesh Kumbar <mkumbar@nvidia.com> | 2018-05-20 11:08:33 -0400 |
---|---|---|
committer | Tejal Kudav <tkudav@nvidia.com> | 2018-06-14 09:44:07 -0400 |
commit | 25fc64b944d12c007771efe24badda78be4e4cb7 (patch) | |
tree | 7b2a5d7270824008eab68be1354a9c671cad6203 /drivers/gpu/nvgpu/gp106/acr_gp106.c | |
parent | 7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (diff) |
gpu: nvgpu: Multiple WPR support
The WPR will be divided into several sub-WPRs,
one for each Falcon and one common for sharing
between Falcons which bootstrap falcons
- Defined & used flag NVGPU_SUPPORT_MULTIPLE_WPR
to know M-WPR support.
- Added struct lsfm_sub_wpr to hold subWPR header info
- Added struct lsf_shared_sub_wpr_header to hold subWPR
info & copied to WPR blob after LSF_WPR_HEADER
- Set NVGPU_SUPPORT_MULTIPLE_WPR to false for gp106,
gv100 & gv11b.
- Added methods to support to multiple WPR support &
called by checking flag NVGPU_SUPPORT_MULTIPLE_WPR
in ucode blob preparation flow.
JIRA NVGPUTU10X / NVGPUT-99
Change-Id: I81d0490158390e79b6841374158805f7a84ee6cb
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1725369
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp106/acr_gp106.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp106/acr_gp106.c | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c index 61b443e0..70296757 100644 --- a/drivers/gpu/nvgpu/gp106/acr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c | |||
@@ -368,6 +368,50 @@ rel_sig: | |||
368 | return err; | 368 | return err; |
369 | } | 369 | } |
370 | 370 | ||
371 | /* | ||
372 | * Discover all supported shared data falcon SUB WPRs | ||
373 | */ | ||
374 | static u32 lsfm_discover_and_add_sub_wprs(struct gk20a *g, | ||
375 | struct ls_flcn_mgr_v1 *plsfm) | ||
376 | { | ||
377 | struct lsfm_sub_wpr *pnode; | ||
378 | u32 size_4K = 0; | ||
379 | u32 sub_wpr_index; | ||
380 | |||
381 | for (sub_wpr_index = 1; | ||
382 | sub_wpr_index <= LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX; | ||
383 | sub_wpr_index++) { | ||
384 | |||
385 | switch (sub_wpr_index) { | ||
386 | case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES: | ||
387 | size_4K = LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K; | ||
388 | break; | ||
389 | case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA: | ||
390 | size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K; | ||
391 | break; | ||
392 | default: | ||
393 | size_4K = 0; /* subWpr not supported */ | ||
394 | break; | ||
395 | } | ||
396 | |||
397 | if (size_4K) { | ||
398 | pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr)); | ||
399 | if (pnode == NULL) | ||
400 | return -ENOMEM; | ||
401 | |||
402 | pnode->sub_wpr_header.use_case_id = sub_wpr_index; | ||
403 | pnode->sub_wpr_header.size_4K = size_4K; | ||
404 | |||
405 | pnode->pnext = plsfm->psub_wpr_list; | ||
406 | plsfm->psub_wpr_list = pnode; | ||
407 | |||
408 | plsfm->managed_sub_wpr_count++; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | return 0; | ||
413 | } | ||
414 | |||
371 | int gp106_prepare_ucode_blob(struct gk20a *g) | 415 | int gp106_prepare_ucode_blob(struct gk20a *g) |
372 | { | 416 | { |
373 | 417 | ||
@@ -400,6 +444,9 @@ int gp106_prepare_ucode_blob(struct gk20a *g) | |||
400 | if (err) | 444 | if (err) |
401 | goto exit_err; | 445 | goto exit_err; |
402 | 446 | ||
447 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) | ||
448 | lsfm_discover_and_add_sub_wprs(g, plsfm); | ||
449 | |||
403 | if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) { | 450 | if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) { |
404 | /* Generate WPR requirements*/ | 451 | /* Generate WPR requirements*/ |
405 | err = lsf_gen_wpr_requirements(g, plsfm); | 452 | err = lsf_gen_wpr_requirements(g, plsfm); |
@@ -671,6 +718,40 @@ int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, | |||
671 | return -ENOENT; | 718 | return -ENOENT; |
672 | } | 719 | } |
673 | 720 | ||
721 | static u32 lsfm_init_sub_wpr_contents(struct gk20a *g, | ||
722 | struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode) | ||
723 | { | ||
724 | struct lsfm_sub_wpr *psub_wpr_node; | ||
725 | struct lsf_shared_sub_wpr_header last_sub_wpr_header; | ||
726 | u32 temp_size = sizeof(struct lsf_shared_sub_wpr_header); | ||
727 | u32 sub_wpr_header_offset = 0; | ||
728 | u32 i = 0; | ||
729 | |||
730 | /* SubWpr headers are placed after WPR headers */ | ||
731 | sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX; | ||
732 | |||
733 | /* Walk through the managed shared subWPRs headers | ||
734 | * and flush them to FB | ||
735 | */ | ||
736 | psub_wpr_node = plsfm->psub_wpr_list; | ||
737 | i = 0; | ||
738 | while (psub_wpr_node) { | ||
739 | nvgpu_mem_wr_n(g, ucode, | ||
740 | sub_wpr_header_offset + (i * temp_size), | ||
741 | &psub_wpr_node->sub_wpr_header, temp_size); | ||
742 | |||
743 | psub_wpr_node = psub_wpr_node->pnext; | ||
744 | i++; | ||
745 | } | ||
746 | last_sub_wpr_header.use_case_id = | ||
747 | LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID; | ||
748 | nvgpu_mem_wr_n(g, ucode, sub_wpr_header_offset + | ||
749 | (plsfm->managed_sub_wpr_count * temp_size), | ||
750 | &last_sub_wpr_header, temp_size); | ||
751 | |||
752 | return 0; | ||
753 | } | ||
754 | |||
674 | /* Initialize WPR contents */ | 755 | /* Initialize WPR contents */ |
675 | void lsfm_init_wpr_contents(struct gk20a *g, | 756 | void lsfm_init_wpr_contents(struct gk20a *g, |
676 | struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode) | 757 | struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode) |
@@ -684,6 +765,9 @@ void lsfm_init_wpr_contents(struct gk20a *g, | |||
684 | memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1)); | 765 | memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1)); |
685 | i = 0; | 766 | i = 0; |
686 | 767 | ||
768 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) | ||
769 | lsfm_init_sub_wpr_contents(g, plsfm, ucode); | ||
770 | |||
687 | /* | 771 | /* |
688 | * Walk the managed falcons, flush WPR and LSB headers to FB. | 772 | * Walk the managed falcons, flush WPR and LSB headers to FB. |
689 | * flush any bl args to the storage area relative to the | 773 | * flush any bl args to the storage area relative to the |
@@ -956,6 +1040,7 @@ int lsf_gen_wpr_requirements(struct gk20a *g, | |||
956 | struct ls_flcn_mgr_v1 *plsfm) | 1040 | struct ls_flcn_mgr_v1 *plsfm) |
957 | { | 1041 | { |
958 | struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list; | 1042 | struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list; |
1043 | struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list; | ||
959 | u32 wpr_offset; | 1044 | u32 wpr_offset; |
960 | 1045 | ||
961 | /* Calculate WPR size required */ | 1046 | /* Calculate WPR size required */ |
@@ -967,6 +1052,22 @@ int lsf_gen_wpr_requirements(struct gk20a *g, | |||
967 | wpr_offset = sizeof(struct lsf_wpr_header_v1) * | 1052 | wpr_offset = sizeof(struct lsf_wpr_header_v1) * |
968 | (plsfm->managed_flcn_cnt+1); | 1053 | (plsfm->managed_flcn_cnt+1); |
969 | 1054 | ||
1055 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { | ||
1056 | wpr_offset = ALIGN_UP(wpr_offset, | ||
1057 | LSF_WPR_HEADERS_TOTAL_SIZE_MAX); | ||
1058 | /* | ||
1059 | * SUB WPR header is appended after | ||
1060 | * LSF_WPR_HEADER in WPR blob. | ||
1061 | * The size is allocated as per the managed | ||
1062 | * SUB WPR count. | ||
1063 | */ | ||
1064 | wpr_offset = ALIGN_UP(wpr_offset, | ||
1065 | LSF_SUB_WPR_HEADER_ALIGNMENT); | ||
1066 | wpr_offset = wpr_offset + | ||
1067 | (sizeof(struct lsf_shared_sub_wpr_header) * | ||
1068 | (plsfm->managed_sub_wpr_count + 1)); | ||
1069 | } | ||
1070 | |||
970 | /* Walk the managed falcons, accounting for the LSB structs | 1071 | /* Walk the managed falcons, accounting for the LSB structs |
971 | as well as the ucode images. */ | 1072 | as well as the ucode images. */ |
972 | while (pnode) { | 1073 | while (pnode) { |
@@ -1028,6 +1129,23 @@ int lsf_gen_wpr_requirements(struct gk20a *g, | |||
1028 | } | 1129 | } |
1029 | pnode = pnode->next; | 1130 | pnode = pnode->next; |
1030 | } | 1131 | } |
1132 | |||
1133 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) { | ||
1134 | /* Walk through the sub wpr headers to accommodate | ||
1135 | * sub wprs in WPR request | ||
1136 | */ | ||
1137 | while (pnode_sub_wpr) { | ||
1138 | wpr_offset = ALIGN_UP(wpr_offset, | ||
1139 | SUB_WPR_SIZE_ALIGNMENT); | ||
1140 | pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset; | ||
1141 | wpr_offset = wpr_offset + | ||
1142 | (pnode_sub_wpr->sub_wpr_header.size_4K | ||
1143 | << SHIFT_4KB); | ||
1144 | pnode_sub_wpr = pnode_sub_wpr->pnext; | ||
1145 | } | ||
1146 | wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT); | ||
1147 | } | ||
1148 | |||
1031 | plsfm->wpr_size = wpr_offset; | 1149 | plsfm->wpr_size = wpr_offset; |
1032 | return 0; | 1150 | return 0; |
1033 | } | 1151 | } |