gpu: nvgpu: Multiple WPR support

The WPR will be divided into several sub-WPRs, one for each Falcon and one common for sharing between Falcons which bootstrap falcons - Defined & used flag NVGPU_SUPPORT_MULTIPLE_WPR to know M-WPR support. - Added struct lsfm_sub_wpr to hold subWPR header info - Added struct lsf_shared_sub_wpr_header to hold subWPR info & copied to WPR blob after LSF_WPR_HEADER - Set NVGPU_SUPPORT_MULTIPLE_WPR to false for gp106, gv100 & gv11b. - Added methods to support to multiple WPR support & called by checking flag NVGPU_SUPPORT_MULTIPLE_WPR in ucode blob preparation flow. JIRA NVGPUTU10X / NVGPUT-99 Change-Id: I81d0490158390e79b6841374158805f7a84ee6cb Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1725369 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Mahantesh Kumbar <mkumbar@nvidia.com> 2018-05-20 11:08:33 -0400
committer: Tejal Kudav <tkudav@nvidia.com> 2018-06-14 09:44:07 -0400
commit: 25fc64b944d12c007771efe24badda78be4e4cb7 (patch)
tree: 7b2a5d7270824008eab68be1354a9c671cad6203 /drivers/gpu/nvgpu/gp106/acr_gp106.c
parent: 7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (diff)
1 files changed, 118 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 61b443e0..70296757 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -368,6 +368,50 @@ rel_sig:
        return err;
 }
+/*
+ * Discover all supported shared data falcon SUB WPRs
+ */
+static u32 lsfm_discover_and_add_sub_wprs(struct gk20a *g,
+                struct ls_flcn_mgr_v1 *plsfm)
+{
+        struct lsfm_sub_wpr *pnode;
+        u32 size_4K = 0;
+        u32 sub_wpr_index;
+        for (sub_wpr_index = 1;
+                sub_wpr_index <= LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX;
+                sub_wpr_index++) {
+                switch (sub_wpr_index) {
+                case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES:
+                        size_4K = LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K;
+                        break;
+                case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA:
+                        size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K;
+                        break;
+                default:
+                        size_4K = 0; /* subWpr not supported */
+                        break;
+                }
+                if (size_4K) {
+                        pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr));
+                        if (pnode == NULL)
+                                return -ENOMEM;
+                        pnode->sub_wpr_header.use_case_id = sub_wpr_index;
+                        pnode->sub_wpr_header.size_4K = size_4K;
+                        pnode->pnext = plsfm->psub_wpr_list;
+                        plsfm->psub_wpr_list = pnode;
+                        plsfm->managed_sub_wpr_count++;
+                }
+        }
+        return 0;
+}
 int gp106_prepare_ucode_blob(struct gk20a *g)
 {
@@ -400,6 +444,9 @@ int gp106_prepare_ucode_blob(struct gk20a *g)
        if (err)
                goto exit_err;
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
+                lsfm_discover_and_add_sub_wprs(g, plsfm);
        if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
                /* Generate WPR requirements*/
                err = lsf_gen_wpr_requirements(g, plsfm);
@@ -671,6 +718,40 @@ int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
        return -ENOENT;
 }
+static u32 lsfm_init_sub_wpr_contents(struct gk20a *g,
+        struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
+{
+        struct lsfm_sub_wpr *psub_wpr_node;
+        struct lsf_shared_sub_wpr_header last_sub_wpr_header;
+        u32 temp_size = sizeof(struct lsf_shared_sub_wpr_header);
+        u32 sub_wpr_header_offset = 0;
+        u32 i = 0;
+        /* SubWpr headers are placed after WPR headers */
+        sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX;
+        /* Walk through the managed shared subWPRs headers
+         * and flush them to FB
+         */
+        psub_wpr_node = plsfm->psub_wpr_list;
+        i = 0;
+        while (psub_wpr_node) {
+                nvgpu_mem_wr_n(g, ucode,
+                        sub_wpr_header_offset + (i * temp_size),
+                        &psub_wpr_node->sub_wpr_header, temp_size);
+                psub_wpr_node = psub_wpr_node->pnext;
+                i++;
+        }
+        last_sub_wpr_header.use_case_id =
+                LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID;
+        nvgpu_mem_wr_n(g, ucode, sub_wpr_header_offset +
+                (plsfm->managed_sub_wpr_count * temp_size),
+                &last_sub_wpr_header, temp_size);
+        return 0;
+}
 /* Initialize WPR contents */
 void lsfm_init_wpr_contents(struct gk20a *g,
                struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
@@ -684,6 +765,9 @@ void lsfm_init_wpr_contents(struct gk20a *g,
        memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1));
        i = 0;
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
+                lsfm_init_sub_wpr_contents(g, plsfm, ucode);
        /*
         * Walk the managed falcons, flush WPR and LSB headers to FB.
         * flush any bl args to the storage area relative to the
@@ -956,6 +1040,7 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
                struct ls_flcn_mgr_v1 *plsfm)
 {
        struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
+        struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list;
        u32 wpr_offset;
        /* Calculate WPR size required */
@@ -967,6 +1052,22 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
        wpr_offset = sizeof(struct lsf_wpr_header_v1) *
                (plsfm->managed_flcn_cnt+1);
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
+                wpr_offset = ALIGN_UP(wpr_offset,
+                        LSF_WPR_HEADERS_TOTAL_SIZE_MAX);
+                /*
+                 * SUB WPR header is appended after
+                 * LSF_WPR_HEADER in WPR blob.
+                 * The size is allocated as per the managed
+                 * SUB WPR count.
+                 */
+                wpr_offset = ALIGN_UP(wpr_offset,
+                        LSF_SUB_WPR_HEADER_ALIGNMENT);
+                wpr_offset = wpr_offset +
+                        (sizeof(struct lsf_shared_sub_wpr_header) *
+                        (plsfm->managed_sub_wpr_count + 1));
+        }
        /* Walk the managed falcons, accounting for the LSB structs
        as well as the ucode images. */
        while (pnode) {
@@ -1028,6 +1129,23 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
                }
                pnode = pnode->next;
        }
+        if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
+                /* Walk through the sub wpr headers to accommodate
+                 * sub wprs in WPR request
+                 */
+                while (pnode_sub_wpr) {
+                        wpr_offset = ALIGN_UP(wpr_offset,
+                                        SUB_WPR_SIZE_ALIGNMENT);
+                        pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset;
+                        wpr_offset = wpr_offset +
+                                (pnode_sub_wpr->sub_wpr_header.size_4K
+                                << SHIFT_4KB);
+                        pnode_sub_wpr = pnode_sub_wpr->pnext;
+                }
+                wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT);
+        }
        plsfm->wpr_size = wpr_offset;
        return 0;
 }
author	Mahantesh Kumbar <mkumbar@nvidia.com>	2018-05-20 11:08:33 -0400
committer	Tejal Kudav <tkudav@nvidia.com>	2018-06-14 09:44:07 -0400
commit	25fc64b944d12c007771efe24badda78be4e4cb7 (patch)
tree	7b2a5d7270824008eab68be1354a9c671cad6203 /drivers/gpu/nvgpu/gp106/acr_gp106.c
parent	7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (diff)

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c index 61b443e0..70296757 100644 --- a/drivers/gpu/nvgpu/gp106/acr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -368,6 +368,50 @@ rel_sig:
368	return err;	368	return err;
369	}	369	}
370		370
		371	/*
		372	* Discover all supported shared data falcon SUB WPRs
		373	*/
		374	static u32 lsfm_discover_and_add_sub_wprs(struct gk20a *g,
		375	struct ls_flcn_mgr_v1 *plsfm)
		376	{
		377	struct lsfm_sub_wpr *pnode;
		378	u32 size_4K = 0;
		379	u32 sub_wpr_index;
		380
		381	for (sub_wpr_index = 1;
		382	sub_wpr_index <= LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX;
		383	sub_wpr_index++) {
		384
		385	switch (sub_wpr_index) {
		386	case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES:
		387	size_4K = LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K;
		388	break;
		389	case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA:
		390	size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K;
		391	break;
		392	default:
		393	size_4K = 0; /* subWpr not supported */
		394	break;
		395	}
		396
		397	if (size_4K) {
		398	pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr));
		399	if (pnode == NULL)
		400	return -ENOMEM;
		401
		402	pnode->sub_wpr_header.use_case_id = sub_wpr_index;
		403	pnode->sub_wpr_header.size_4K = size_4K;
		404
		405	pnode->pnext = plsfm->psub_wpr_list;
		406	plsfm->psub_wpr_list = pnode;
		407
		408	plsfm->managed_sub_wpr_count++;
		409	}
		410	}
		411
		412	return 0;
		413	}
		414
371	int gp106_prepare_ucode_blob(struct gk20a *g)	415	int gp106_prepare_ucode_blob(struct gk20a *g)
372	{	416	{
373		417
@@ -400,6 +444,9 @@ int gp106_prepare_ucode_blob(struct gk20a *g)
400	if (err)	444	if (err)
401	goto exit_err;	445	goto exit_err;
402		446
		447	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
		448	lsfm_discover_and_add_sub_wprs(g, plsfm);
		449
403	if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {	450	if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
404	/* Generate WPR requirements*/	451	/* Generate WPR requirements*/
405	err = lsf_gen_wpr_requirements(g, plsfm);	452	err = lsf_gen_wpr_requirements(g, plsfm);
@@ -671,6 +718,40 @@ int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
671	return -ENOENT;	718	return -ENOENT;
672	}	719	}
673		720
		721	static u32 lsfm_init_sub_wpr_contents(struct gk20a *g,
		722	struct ls_flcn_mgr_v1 plsfm, struct nvgpu_mem ucode)
		723	{
		724	struct lsfm_sub_wpr *psub_wpr_node;
		725	struct lsf_shared_sub_wpr_header last_sub_wpr_header;
		726	u32 temp_size = sizeof(struct lsf_shared_sub_wpr_header);
		727	u32 sub_wpr_header_offset = 0;
		728	u32 i = 0;
		729
		730	/* SubWpr headers are placed after WPR headers */
		731	sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX;
		732
		733	/* Walk through the managed shared subWPRs headers
		734	* and flush them to FB
		735	*/
		736	psub_wpr_node = plsfm->psub_wpr_list;
		737	i = 0;
		738	while (psub_wpr_node) {
		739	nvgpu_mem_wr_n(g, ucode,
		740	sub_wpr_header_offset + (i * temp_size),
		741	&psub_wpr_node->sub_wpr_header, temp_size);
		742
		743	psub_wpr_node = psub_wpr_node->pnext;
		744	i++;
		745	}
		746	last_sub_wpr_header.use_case_id =
		747	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID;
		748	nvgpu_mem_wr_n(g, ucode, sub_wpr_header_offset +
		749	(plsfm->managed_sub_wpr_count * temp_size),
		750	&last_sub_wpr_header, temp_size);
		751
		752	return 0;
		753	}
		754
674	/* Initialize WPR contents */	755	/* Initialize WPR contents */
675	void lsfm_init_wpr_contents(struct gk20a *g,	756	void lsfm_init_wpr_contents(struct gk20a *g,
676	struct ls_flcn_mgr_v1 plsfm, struct nvgpu_mem ucode)	757	struct ls_flcn_mgr_v1 plsfm, struct nvgpu_mem ucode)
@@ -684,6 +765,9 @@ void lsfm_init_wpr_contents(struct gk20a *g,
684	memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1));	765	memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1));
685	i = 0;	766	i = 0;
686		767
		768	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
		769	lsfm_init_sub_wpr_contents(g, plsfm, ucode);
		770
687	/*	771	/*
688	* Walk the managed falcons, flush WPR and LSB headers to FB.	772	* Walk the managed falcons, flush WPR and LSB headers to FB.
689	* flush any bl args to the storage area relative to the	773	* flush any bl args to the storage area relative to the
@@ -956,6 +1040,7 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
956	struct ls_flcn_mgr_v1 *plsfm)	1040	struct ls_flcn_mgr_v1 *plsfm)
957	{	1041	{
958	struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;	1042	struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
		1043	struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list;
959	u32 wpr_offset;	1044	u32 wpr_offset;
960		1045
961	/* Calculate WPR size required */	1046	/* Calculate WPR size required */
@@ -967,6 +1052,22 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
967	wpr_offset = sizeof(struct lsf_wpr_header_v1) *	1052	wpr_offset = sizeof(struct lsf_wpr_header_v1) *
968	(plsfm->managed_flcn_cnt+1);	1053	(plsfm->managed_flcn_cnt+1);
969		1054
		1055	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
		1056	wpr_offset = ALIGN_UP(wpr_offset,
		1057	LSF_WPR_HEADERS_TOTAL_SIZE_MAX);
		1058	/*
		1059	* SUB WPR header is appended after
		1060	* LSF_WPR_HEADER in WPR blob.
		1061	* The size is allocated as per the managed
		1062	* SUB WPR count.
		1063	*/
		1064	wpr_offset = ALIGN_UP(wpr_offset,
		1065	LSF_SUB_WPR_HEADER_ALIGNMENT);
		1066	wpr_offset = wpr_offset +
		1067	(sizeof(struct lsf_shared_sub_wpr_header) *
		1068	(plsfm->managed_sub_wpr_count + 1));
		1069	}
		1070
970	/* Walk the managed falcons, accounting for the LSB structs	1071	/* Walk the managed falcons, accounting for the LSB structs
971	as well as the ucode images. */	1072	as well as the ucode images. */
972	while (pnode) {	1073	while (pnode) {
@@ -1028,6 +1129,23 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
1028	}	1129	}
1029	pnode = pnode->next;	1130	pnode = pnode->next;
1030	}	1131	}
		1132
		1133	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
		1134	/* Walk through the sub wpr headers to accommodate
		1135	* sub wprs in WPR request
		1136	*/
		1137	while (pnode_sub_wpr) {
		1138	wpr_offset = ALIGN_UP(wpr_offset,
		1139	SUB_WPR_SIZE_ALIGNMENT);
		1140	pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset;
		1141	wpr_offset = wpr_offset +
		1142	(pnode_sub_wpr->sub_wpr_header.size_4K
		1143	<< SHIFT_4KB);
		1144	pnode_sub_wpr = pnode_sub_wpr->pnext;
		1145	}
		1146	wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT);
		1147	}
		1148
1031	plsfm->wpr_size = wpr_offset;	1149	plsfm->wpr_size = wpr_offset;
1032	return 0;	1150	return 0;
1033	}	1151	}