From 25fc64b944d12c007771efe24badda78be4e4cb7 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Sun, 20 May 2018 20:38:33 +0530
Subject: gpu: nvgpu: Multiple WPR support

The WPR will be divided into several sub-WPRs,
one for each Falcon and one common for sharing
between Falcons which bootstrap falcons

- Defined & used flag NVGPU_SUPPORT_MULTIPLE_WPR
  to know M-WPR support.
- Added struct lsfm_sub_wpr to hold subWPR header info
- Added struct lsf_shared_sub_wpr_header to hold subWPR
  info & copied to WPR blob after LSF_WPR_HEADER
- Set NVGPU_SUPPORT_MULTIPLE_WPR to false for gp106,
  gv100 & gv11b.
- Added methods to support to multiple WPR support &
  called by checking flag NVGPU_SUPPORT_MULTIPLE_WPR
  in ucode blob preparation flow.

JIRA NVGPUTU10X / NVGPUT-99

Change-Id: I81d0490158390e79b6841374158805f7a84ee6cb
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1725369
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gp106/acr_gp106.c               | 118 ++++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/hal_gp106.c               |   1 +
 drivers/gpu/nvgpu/gv100/hal_gv100.c               |   1 +
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c               |   2 +
 drivers/gpu/nvgpu/include/nvgpu/acr/acr_lsfm.h    |  89 +++++++++++++++-
 drivers/gpu/nvgpu/include/nvgpu/acr/acr_objlsfm.h |  14 ++-
 drivers/gpu/nvgpu/include/nvgpu/enabled.h         |   5 +-
 7 files changed, 223 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 61b443e0..70296757 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -368,6 +368,50 @@ rel_sig:
 	return err;
 }
 
+/*
+ * Discover all supported shared data falcon SUB WPRs
+ */
+static u32 lsfm_discover_and_add_sub_wprs(struct gk20a *g,
+		struct ls_flcn_mgr_v1 *plsfm)
+{
+	struct lsfm_sub_wpr *pnode;
+	u32 size_4K = 0;
+	u32 sub_wpr_index;
+
+	for (sub_wpr_index = 1;
+		sub_wpr_index <= LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX;
+		sub_wpr_index++) {
+
+		switch (sub_wpr_index) {
+		case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES:
+			size_4K = LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K;
+			break;
+		case LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA:
+			size_4K = LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K;
+			break;
+		default:
+			size_4K = 0; /* subWpr not supported */
+			break;
+		}
+
+		if (size_4K) {
+			pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_sub_wpr));
+			if (pnode == NULL)
+				return -ENOMEM;
+
+			pnode->sub_wpr_header.use_case_id = sub_wpr_index;
+			pnode->sub_wpr_header.size_4K = size_4K;
+
+			pnode->pnext = plsfm->psub_wpr_list;
+			plsfm->psub_wpr_list = pnode;
+
+			plsfm->managed_sub_wpr_count++;
+		}
+	}
+
+	return 0;
+}
+
 int gp106_prepare_ucode_blob(struct gk20a *g)
 {
 
@@ -400,6 +444,9 @@ int gp106_prepare_ucode_blob(struct gk20a *g)
 	if (err)
 		goto exit_err;
 
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
+		lsfm_discover_and_add_sub_wprs(g, plsfm);
+
 	if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
 		/* Generate WPR requirements*/
 		err = lsf_gen_wpr_requirements(g, plsfm);
@@ -671,6 +718,40 @@ int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
 	return -ENOENT;
 }
 
+static u32 lsfm_init_sub_wpr_contents(struct gk20a *g,
+	struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
+{
+	struct lsfm_sub_wpr *psub_wpr_node;
+	struct lsf_shared_sub_wpr_header last_sub_wpr_header;
+	u32 temp_size = sizeof(struct lsf_shared_sub_wpr_header);
+	u32 sub_wpr_header_offset = 0;
+	u32 i = 0;
+
+	/* SubWpr headers are placed after WPR headers */
+	sub_wpr_header_offset = LSF_WPR_HEADERS_TOTAL_SIZE_MAX;
+
+	/* Walk through the managed shared subWPRs headers
+	 * and flush them to FB
+	 */
+	psub_wpr_node = plsfm->psub_wpr_list;
+	i = 0;
+	while (psub_wpr_node) {
+		nvgpu_mem_wr_n(g, ucode,
+			sub_wpr_header_offset + (i * temp_size),
+			&psub_wpr_node->sub_wpr_header, temp_size);
+
+		psub_wpr_node = psub_wpr_node->pnext;
+		i++;
+	}
+	last_sub_wpr_header.use_case_id =
+		LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID;
+	nvgpu_mem_wr_n(g, ucode, sub_wpr_header_offset +
+		(plsfm->managed_sub_wpr_count * temp_size),
+		&last_sub_wpr_header, temp_size);
+
+	return 0;
+}
+
 /* Initialize WPR contents */
 void lsfm_init_wpr_contents(struct gk20a *g,
 		struct ls_flcn_mgr_v1 *plsfm, struct nvgpu_mem *ucode)
@@ -684,6 +765,9 @@ void lsfm_init_wpr_contents(struct gk20a *g,
 	memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v1));
 	i = 0;
 
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR))
+		lsfm_init_sub_wpr_contents(g, plsfm, ucode);
+
 	/*
 	 * Walk the managed falcons, flush WPR and LSB headers to FB.
 	 * flush any bl args to the storage area relative to the
@@ -956,6 +1040,7 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
 		struct ls_flcn_mgr_v1 *plsfm)
 {
 	struct lsfm_managed_ucode_img_v2 *pnode = plsfm->ucode_img_list;
+	struct lsfm_sub_wpr *pnode_sub_wpr = plsfm->psub_wpr_list;
 	u32 wpr_offset;
 
 	/* Calculate WPR size required */
@@ -967,6 +1052,22 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
 	wpr_offset = sizeof(struct lsf_wpr_header_v1) *
 		(plsfm->managed_flcn_cnt+1);
 
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
+		wpr_offset = ALIGN_UP(wpr_offset,
+			LSF_WPR_HEADERS_TOTAL_SIZE_MAX);
+		/*
+		 * SUB WPR header is appended after
+		 * LSF_WPR_HEADER in WPR blob.
+		 * The size is allocated as per the managed
+		 * SUB WPR count.
+		 */
+		wpr_offset = ALIGN_UP(wpr_offset,
+			LSF_SUB_WPR_HEADER_ALIGNMENT);
+		wpr_offset = wpr_offset +
+			(sizeof(struct lsf_shared_sub_wpr_header) *
+			(plsfm->managed_sub_wpr_count + 1));
+	}
+
 	/* Walk the managed falcons, accounting for the LSB structs
 	as well as the ucode images. */
 	while (pnode) {
@@ -1028,6 +1129,23 @@ int lsf_gen_wpr_requirements(struct gk20a *g,
 		}
 		pnode = pnode->next;
 	}
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR)) {
+		/* Walk through the sub wpr headers to accommodate
+		 * sub wprs in WPR request
+		 */
+		while (pnode_sub_wpr) {
+			wpr_offset = ALIGN_UP(wpr_offset,
+					SUB_WPR_SIZE_ALIGNMENT);
+			pnode_sub_wpr->sub_wpr_header.start_addr = wpr_offset;
+			wpr_offset = wpr_offset +
+				(pnode_sub_wpr->sub_wpr_header.size_4K
+				<< SHIFT_4KB);
+			pnode_sub_wpr = pnode_sub_wpr->pnext;
+		}
+		wpr_offset = ALIGN_UP(wpr_offset, SUB_WPR_SIZE_ALIGNMENT);
+	}
+
 	plsfm->wpr_size = wpr_offset;
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 67a7968a..a525d117 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -830,6 +830,7 @@ int gp106_init_hal(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
 	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
 
 	/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
 	if (gops->fuse.check_priv_security(g))
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 56429975..2f5cf7f3 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -911,6 +911,7 @@ int gv100_init_hal(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
 	__nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, true);
 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
 	/* for now */
 	__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
 
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 58912a30..cae55609 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -841,6 +841,8 @@ int gv11b_init_hal(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
 	g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
 
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
+
 	g->name = "gv11b";
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/acr/acr_lsfm.h b/drivers/gpu/nvgpu/include/nvgpu/acr/acr_lsfm.h
index 70184934..90d2d20d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/acr/acr_lsfm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/acr/acr_lsfm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,22 @@
 	"Include nvgpu_acr.h instead of acr_xxx.h to get access to ACR interfaces"
 #endif
 
+/*
+ * READ/WRITE masks for WPR region
+ */
+/* Readable only from level 2 and 3 client */
+#define LSF_WPR_REGION_RMASK	(0xC)
+/* Writable only from level 2 and 3 client */
+#define LSF_WPR_REGION_WMASK	(0xC)
+/* Readable only from level 3 client */
+#define LSF_WPR_REGION_RMASK_SUB_WPR_ENABLED	(0x8)
+/* Writable only from level 3 client */
+#define LSF_WPR_REGION_WMASK_SUB_WPR_ENABLED	(0x8)
+/* Disallow read mis-match for all clients */
+#define LSF_WPR_REGION_ALLOW_READ_MISMATCH_NO	(0x0)
+/* Disallow write mis-match for all clients */
+#define LSF_WPR_REGION_ALLOW_WRITE_MISMATCH_NO	(0x0)
+
 /*
  * Falcon Id Defines
  * Defines a common Light Secure Falcon identifier.
@@ -84,6 +100,42 @@ struct lsf_wpr_header_v1 {
 	u32 bin_version;
 	u32 status;
 };
+
+
+/*
+ * LSF shared SubWpr Header
+ *
+ * use_case_id - Shared SubWpr use case ID (updated by nvgpu)
+ * start_addr  - start address of subWpr (updated by nvgpu)
+ * size_4K     - size of subWpr in 4K (updated by nvgpu)
+ */
+struct lsf_shared_sub_wpr_header {
+	u32 use_case_id;
+	u32 start_addr;
+	u32 size_4K;
+};
+
+/* shared sub_wpr use case IDs */
+enum {
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES	= 1,
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA = 2
+};
+
+#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX \
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA
+
+#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID	(0xFFFFFFFF)
+
+#define MAX_SUPPORTED_SHARED_SUB_WPR_USE_CASES	\
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX
+
+/* Static sizes of shared subWPRs */
+/* Minimum granularity supported is 4K */
+/* 1MB in 4K */
+#define LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K	(0x100)
+/* 4K */
+#define LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K	(0x1)
+
 /*
  * Bootstrap Owner Defines
  */
@@ -147,13 +199,40 @@ struct lsf_lsb_header_v1 {
 /*
  * Light Secure WPR Content Alignments
  */
-#define LSF_LSB_HEADER_ALIGNMENT    256
-#define LSF_BL_DATA_ALIGNMENT       256
-#define LSF_BL_DATA_SIZE_ALIGNMENT  256
-#define LSF_BL_CODE_SIZE_ALIGNMENT  256
+#define LSF_WPR_HEADER_ALIGNMENT        (256U)
+#define LSF_SUB_WPR_HEADER_ALIGNMENT    (256U)
+#define LSF_LSB_HEADER_ALIGNMENT        (256U)
+#define LSF_BL_DATA_ALIGNMENT           (256U)
+#define LSF_BL_DATA_SIZE_ALIGNMENT      (256U)
+#define LSF_BL_CODE_SIZE_ALIGNMENT      (256U)
+#define LSF_DATA_SIZE_ALIGNMENT         (256U)
+#define LSF_CODE_SIZE_ALIGNMENT         (256U)
+
+/* MMU excepts sub_wpr sizes in units of 4K */
+#define SUB_WPR_SIZE_ALIGNMENT	(4096U)
+
+/*
+ * Maximum WPR Header size
+ */
+#define LSF_WPR_HEADERS_TOTAL_SIZE_MAX	\
+	(ALIGN_UP((sizeof(struct lsf_wpr_header_v1) * LSF_FALCON_ID_END), \
+		LSF_WPR_HEADER_ALIGNMENT))
+#define LSF_LSB_HEADER_TOTAL_SIZE_MAX	(\
+	ALIGN_UP(sizeof(struct lsf_lsb_header_v1), LSF_LSB_HEADER_ALIGNMENT))
+
+/* Maximum SUB WPR header size */
+#define LSF_SUB_WPR_HEADERS_TOTAL_SIZE_MAX	(ALIGN_UP( \
+	(sizeof(struct lsf_shared_sub_wpr_header) * \
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX), \
+	LSF_SUB_WPR_HEADER_ALIGNMENT))
+
 
 #define LSF_UCODE_DATA_ALIGNMENT 4096
 
+/* Defined for 1MB alignment */
+#define SHIFT_1MB	(20)
+#define SHIFT_4KB	(12)
+
 /*
  * Supporting maximum of 2 regions.
  * This is needed to pre-allocate space in DMEM
diff --git a/drivers/gpu/nvgpu/include/nvgpu/acr/acr_objlsfm.h b/drivers/gpu/nvgpu/include/nvgpu/acr/acr_objlsfm.h
index cde8707a..96fb9f19 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/acr/acr_objlsfm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/acr/acr_objlsfm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -73,12 +73,24 @@ struct ls_flcn_mgr {
 	void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
 };
 
+/*
+ * LSFM SUB WPRs struct
+ * pnext        : Next entry in the list, NULL if last
+ * sub_wpr_header : SubWpr Header struct
+ */
+struct lsfm_sub_wpr {
+	struct lsfm_sub_wpr *pnext;
+	struct lsf_shared_sub_wpr_header sub_wpr_header;
+};
+
 struct ls_flcn_mgr_v1 {
 	u16 managed_flcn_cnt;
 	u32 wpr_size;
 	u32 disable_mask;
 	struct lsfm_managed_ucode_img_v2 *ucode_img_list;
 	void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
+	u16 managed_sub_wpr_count;
+	struct lsfm_sub_wpr *psub_wpr_list;
 };
 
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index a76db09e..0ffb0488 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -163,10 +163,13 @@ struct gk20a;
 /* USERMODE enable bit */
 #define NVGPU_SUPPORT_USERMODE_SUBMIT		67
 
+/* Multiple WPR support */
+#define NVGPU_SUPPORT_MULTIPLE_WPR	68
+
 /*
  * Must be greater than the largest bit offset in the above list.
  */
-#define NVGPU_MAX_ENABLED_BITS			68
+#define NVGPU_MAX_ENABLED_BITS			69
 
 /**
  * nvgpu_is_enabled - Check if the passed flag is enabled.
-- 
cgit v1.2.2