From 30d399de307befc4edc2b8ca66c36ad2440d34f1 Mon Sep 17 00:00:00 2001
From: Vijayakumar <vsubbu@nvidia.com>
Date: Wed, 24 Jun 2015 12:56:50 +0530
Subject: gpu: nvgpu: load secure gpccs using dma

bug 200080684

use new cmd defined in ucode for loading
GR falcons. flip PRIV load flag in lsb
header to indicate using dma. use pmu msg
as cmd completion for new cmd instead of
polling fecs mailbox. also move
check for using dma in non secure boot path
to hal.

Change-Id: I22582a705bd1ae0603f858e1fe200d72e6794a81
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/761625
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h        |  3 ++
 drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c |  3 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c     |  4 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c    |  1 +
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.h    | 23 ++++++++
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c    | 55 ++++++++++++++++---
 drivers/gpu/nvgpu/gm20b/acr_gm20b.h    |  2 +
 drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c |  3 +-
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c     | 96 +++++++++++++---------------------
 drivers/gpu/nvgpu/gm20b/pmu_gm20b.c    | 63 +++++++++++++++++++---
 drivers/gpu/nvgpu/gm20b/pmu_gm20b.h    |  2 +-
 11 files changed, 175 insertions(+), 80 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d8e3586f..9a183e44 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -333,6 +333,7 @@ struct gpu_ops {
 	struct {
 		int (*get_netlist_name)(int index, char *name);
 		bool (*is_fw_defined)(void);
+		bool use_dma_for_fw_bootstrap;
 	} gr_ctx;
 	struct {
 		bool (*support_sparse)(struct gk20a *g);
@@ -383,7 +384,9 @@ struct gpu_ops {
 		int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
 		int (*pmu_setup_elpg)(struct gk20a *g);
 		int (*init_wpr_region)(struct gk20a *g);
+		int (*load_lsfalcon_ucode)(struct gk20a *g, u32 falconidmask);
 		u32  lspmuwprinitdone;
+		u32  lsfloadedfalconid;
 		bool fecsbootstrapdone;
 	} pmu;
 	struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
index cd9a9fca..94dba7b6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c
@@ -3,7 +3,7 @@
  *
  * GK20A Graphics Context
  *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -345,4 +345,5 @@ void gk20a_init_gr_ctx(struct gpu_ops *gops)
 {
 	gops->gr_ctx.get_netlist_name = gr_gk20a_get_netlist_name;
 	gops->gr_ctx.is_fw_defined = gr_gk20a_is_firmware_defined;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index edd4c6c8..e232bf17 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2128,7 +2128,7 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
 	 * In case bootloader is not supported, revert to the old way of
 	 * loading gr ucode, without the faster bootstrap routine.
 	 */
-	if (g->gpu_characteristics.arch > NVGPU_GPU_ARCH_GM200) {
+	if (!g->ops.gr_ctx.use_dma_for_fw_bootstrap) {
 		gr_gk20a_load_falcon_dmem(g);
 		gr_gk20a_load_falcon_imem(g);
 		gr_gk20a_start_falcon_ucode(g);
@@ -2161,7 +2161,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
 		return ret;
 	}
 
-	if (!(g->gpu_characteristics.arch > NVGPU_GPU_ARCH_GM200))
+	if (g->ops.gr_ctx.use_dma_for_fw_bootstrap || g->ops.securegpccs)
 		gk20a_writel(g, gr_fecs_current_ctx_r(),
 			gr_fecs_current_ctx_valid_false_f());
 
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 11322293..1f21555c 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2647,6 +2647,7 @@ void gk20a_init_pmu_ops(struct gpu_ops *gops)
 	gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
 	gops->pmu.pmu_setup_elpg = NULL;
 	gops->pmu.init_wpr_region = NULL;
+	gops->pmu.load_lsfalcon_ucode = NULL;
 }
 
 int gk20a_init_pmu_support(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index f29c810e..85403767 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -304,6 +304,11 @@ enum {
 	GK20A_PMU_DMAIDX_END		= 7
 };
 
+struct falc_u64 {
+	u32 lo;
+	u32 hi;
+};
+
 struct falc_dma_addr {
 	u32 dma_base;
 	/*dma_base1 is 9-bit MSB for FB Base
@@ -708,6 +713,8 @@ struct pmu_pg_cmd {
 enum {
 	PMU_ACR_CMD_ID_INIT_WPR_REGION = 0x0          ,
 	PMU_ACR_CMD_ID_BOOTSTRAP_FALCON,
+	PMU_ACR_CMD_ID_RESERVED,
+	PMU_ACR_CMD_ID_BOOTSTRAP_MULTIPLE_FALCONS,
 };
 
 /*
@@ -729,14 +736,27 @@ struct pmu_acr_cmd_bootstrap_falcon {
 	u32 falconid;
 };
 
+/*
+ * falcon ID to bootstrap
+ */
+struct pmu_acr_cmd_bootstrap_multiple_falcons {
+	u8 cmd_type;
+	u32 flags;
+	u32 falconidmask;
+	u32 usevamask;
+	struct falc_u64 wprvirtualbase;
+};
+
 #define PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_NO  1
 #define PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES 0
 
+
 struct pmu_acr_cmd {
 	union {
 		u8 cmd_type;
 		struct pmu_acr_cmd_bootstrap_falcon bootstrap_falcon;
 		struct pmu_acr_cmd_init_wpr_details init_wpr;
+		struct pmu_acr_cmd_bootstrap_multiple_falcons boot_falcons;
 	};
 };
 
@@ -1177,6 +1197,7 @@ struct pmu_gk20a {
 	/* TBD: remove this if ZBC seq is fixed */
 	struct mem_desc seq_buf;
 	struct mem_desc trace_buf;
+	struct mem_desc wpr_buf;
 	bool buf_loaded;
 
 	struct pmu_sha1_gid gid_info;
@@ -1294,4 +1315,6 @@ int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
 void pmu_enable_irq(struct pmu_gk20a *pmu, bool enable);
 int pmu_wait_message_cond(struct pmu_gk20a *pmu, u32 timeout,
 				 u32 *var, u32 val);
+void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
+				void *param, u32 handle, u32 status);
 #endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index ba47d235..a238c523 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -284,9 +284,17 @@ rel_sig:
 
 int prepare_ucode_blob(struct gk20a *g)
 {
+
 	int err;
 	struct ls_flcn_mgr lsfm_l, *plsfm;
 	struct pmu_gk20a *pmu = &g->pmu;
+	phys_addr_t wpr_addr;
+	u32 wprsize;
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct mc_carveout_info inf;
+	struct sg_table *sgt;
+	struct page *page;
 
 	if (g->acr.ucode_blob.cpu_va) {
 		/*Recovery case, we do not need to form
@@ -304,22 +312,46 @@ int prepare_ucode_blob(struct gk20a *g)
 	gm20b_mm_mmu_vpr_info_fetch(g);
 	gr_gk20a_init_ctxsw_ucode(g);
 
+	mc_get_carveout_info(&inf, NULL, MC_SECURITY_CARVEOUT2);
+	gm20b_dbg_pmu("wpr carveout base:%llx\n", inf.base);
+	wpr_addr = (phys_addr_t)inf.base;
+	gm20b_dbg_pmu("wpr carveout size :%llx\n", inf.size);
+	wprsize = (u32)inf.size;
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt) {
+		gk20a_err(dev_from_gk20a(g), "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+	err = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g), "failed to allocate sg_table\n");
+		goto free_sgt;
+	}
+	page = phys_to_page(wpr_addr);
+	sg_set_page(sgt->sgl, page, wprsize, 0);
+	/* This bypasses SMMU for WPR during gmmu_map. */
+	sg_dma_address(sgt->sgl) = 0;
+
+	g->pmu.wpr_buf.gpu_va = gk20a_gmmu_map(vm, &sgt, wprsize,
+						0, gk20a_mem_flag_none);
+	gm20b_dbg_pmu("wpr mapped gpu va :%llx\n", g->pmu.wpr_buf.gpu_va);
+
 	/* Discover all managed falcons*/
 	err = lsfm_discover_ucode_images(g, plsfm);
 	gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
 	if (err)
-		return err;
+		goto free_sgt;
 
 	if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
 		/* Generate WPR requirements*/
 		err = lsf_gen_wpr_requirements(g, plsfm);
 		if (err)
-			return err;
+			goto free_sgt;
 
 		/*Alloc memory to hold ucode blob contents*/
 		err = gk20a_gmmu_alloc(g, plsfm->wpr_size, &g->acr.ucode_blob);
 		if (err)
-			return err;
+			goto free_sgt;
 
 		gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
 			plsfm->managed_flcn_cnt, plsfm->wpr_size);
@@ -329,7 +361,9 @@ int prepare_ucode_blob(struct gk20a *g)
 	}
 	gm20b_dbg_pmu("prepare ucode blob return 0\n");
 	free_acr_resources(g, plsfm);
-	return 0;
+ free_sgt:
+	kfree(sgt);
+	return err;
 }
 
 static u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
@@ -495,7 +529,8 @@ static int pmu_populate_loader_cfg(struct gk20a *g,
 
 static int flcn_populate_bl_dmem_desc(struct gk20a *g,
 	struct lsfm_managed_ucode_img *lsfm,
-	union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
+	union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size,
+	u32 falconid)
 {
 	struct mc_carveout_info inf;
 	struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
@@ -520,7 +555,10 @@ static int flcn_populate_bl_dmem_desc(struct gk20a *g,
 	*/
 	addr_base = lsfm->lsb_header.ucode_off;
 	mc_get_carveout_info(&inf, NULL, MC_SECURITY_CARVEOUT2);
-	addr_base += inf.base;
+	if (falconid == LSF_FALCON_ID_GPCCS)
+		addr_base += g->pmu.wpr_buf.gpu_va;
+	else
+		addr_base += inf.base;
 	gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
 		lsfm->wpr_header.falcon_id);
 	addr_code = u64_lo32((addr_base +
@@ -555,7 +593,8 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
 	if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
 		gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
 		flcn_populate_bl_dmem_desc(g, pnode, &pnode->bl_gen_desc,
-				&pnode->bl_gen_desc_size);
+					&pnode->bl_gen_desc_size,
+					pnode->wpr_header.falcon_id);
 		return 0;
 	}
 
@@ -797,7 +836,7 @@ static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
 		}
 		if (falcon_id == LSF_FALCON_ID_GPCCS) {
 			pnode->lsb_header.flags |=
-				NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
+				NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE;
 		}
 	}
 }
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
index 3a5fa7d0..bd3b633a 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -50,8 +50,10 @@
  * Defines a common Light Secure Falcon identifier.
  */
 #define LSF_FALCON_ID_PMU       (0)
+#define LSF_FALCON_ID_RESERVED  (1)
 #define LSF_FALCON_ID_FECS      (2)
 #define LSF_FALCON_ID_GPCCS     (3)
+#define LSF_FALCON_ID_END       (4)
 #define LSF_FALCON_ID_INVALID   (0xFFFFFFFF)
 
 /*!
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
index 2a654760..01cc1f16 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
@@ -3,7 +3,7 @@
  *
  * GM20B Graphics Context
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -69,4 +69,5 @@ static bool gr_gm20b_is_firmware_defined(void)
 void gm20b_init_gr_ctx(struct gpu_ops *gops) {
 	gops->gr_ctx.get_netlist_name = gr_gm20b_get_netlist_name;
 	gops->gr_ctx.is_fw_defined = gr_gm20b_is_firmware_defined;
+	gops->gr_ctx.use_dma_for_fw_bootstrap = true;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index e1204dad..6c7831d5 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -688,29 +688,9 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
 		gr_fecs_falcon_hwcfg_r());
 }
 
-static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
-{
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
-	unsigned long delay = GR_FECS_POLL_INTERVAL;
-	u32 reg;
-
-	gk20a_dbg_fn("");
-	reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
-	do {
-		reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
-		if (reg == val)
-			return 0;
-		udelay(delay);
-	} while (time_before(jiffies, end_jiffies) ||
-			!tegra_platform_is_silicon());
-
-	return -ETIMEDOUT;
-}
-
 static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 {
-	u32 err;
-	unsigned long timeout = gk20a_get_gr_idle_timeout(g);
+	u32 err, flags;
 	u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
 	  gr_fecs_falcon_hwcfg_r();
 
@@ -723,63 +703,57 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 			gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
 	}
 
+	flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+	g->ops.pmu.lsfloadedfalconid = 0;
 	if (g->ops.pmu.fecsbootstrapdone) {
-		gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
-		gm20b_pmu_load_lsf(g, LSF_FALCON_ID_FECS);
-		err = gr_gm20b_ctx_wait_lsf_ready(g, timeout, 0x55AA55AA);
+		/* this must be recovery so bootstrap fecs and gpccs */
+		if (!g->ops.securegpccs) {
+			gr_gm20b_load_gpccs_with_bootloader(g);
+			err = g->ops.pmu.load_lsfalcon_ucode(g,
+					(1 << LSF_FALCON_ID_FECS));
+		} else {
+			/* bind WPR VA inst block */
+			gr_gk20a_load_falcon_bind_instblk(g);
+			err = g->ops.pmu.load_lsfalcon_ucode(g,
+				(1 << LSF_FALCON_ID_FECS) |
+				(1 << LSF_FALCON_ID_GPCCS));
+		}
 		if (err) {
-			gk20a_err(dev_from_gk20a(g), "Unable to recover FECS");
+			gk20a_err(dev_from_gk20a(g),
+				"Unable to recover GR falcon");
 			return err;
-		} else {
-			if (!g->ops.securegpccs) {
-				gr_gm20b_load_gpccs_with_bootloader(g);
-				gk20a_writel(g, gr_gpccs_dmactl_r(),
-					gr_gpccs_dmactl_require_ctx_f(0));
-				gk20a_writel(g, gr_gpccs_cpuctl_r(),
-					gr_gpccs_cpuctl_startcpu_f(1));
-			} else {
-				gk20a_writel(g,
-					gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
-				gm20b_pmu_load_lsf(g, LSF_FALCON_ID_GPCCS);
-				err = gr_gm20b_ctx_wait_lsf_ready(g, timeout,
-						0x55AA55AA);
-				gk20a_writel(g, reg_offset +
-					gr_fecs_cpuctl_alias_r(),
-					gr_gpccs_cpuctl_startcpu_f(1));
-			}
 		}
+
 	} else {
+		/* cold boot or rg exit */
 		g->ops.pmu.fecsbootstrapdone = true;
 		if (!g->ops.securegpccs) {
 			gr_gm20b_load_gpccs_with_bootloader(g);
-			gk20a_writel(g, gr_gpccs_dmactl_r(),
-			       gr_gpccs_dmactl_require_ctx_f(0));
-			gk20a_writel(g, gr_gpccs_cpuctl_r(),
-			       gr_gpccs_cpuctl_startcpu_f(1));
 		} else {
-			pmu_wait_message_cond(&g->pmu,
-					gk20a_get_gr_idle_timeout(g),
-					&g->ops.pmu.lspmuwprinitdone, 1);
-			if (!g->ops.pmu.lspmuwprinitdone) {
-				gk20a_err(dev_from_gk20a(g),
-					"PMU WPR needed but not ready yet");
-				return -ETIMEDOUT;
-			}
-			gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
-			gm20b_pmu_load_lsf(g, LSF_FALCON_ID_GPCCS);
-			err = gr_gm20b_ctx_wait_lsf_ready(g, timeout,
-							0x55AA55AA);
+			/* bind WPR VA inst block */
+			gr_gk20a_load_falcon_bind_instblk(g);
+			err = g->ops.pmu.load_lsfalcon_ucode(g,
+					(1 << LSF_FALCON_ID_GPCCS));
 			if (err) {
 				gk20a_err(dev_from_gk20a(g),
 						"Unable to boot GPCCS\n");
 				return err;
 			}
-			gk20a_writel(g, reg_offset +
-				gr_fecs_cpuctl_alias_r(),
-				gr_gpccs_cpuctl_startcpu_f(1));
 		}
 	}
 
+	/*start gpccs */
+	if (g->ops.securegpccs) {
+		gk20a_writel(g, reg_offset +
+			gr_fecs_cpuctl_alias_r(),
+			gr_gpccs_cpuctl_startcpu_f(1));
+	} else {
+		gk20a_writel(g, gr_gpccs_dmactl_r(),
+			gr_gpccs_dmactl_require_ctx_f(0));
+		gk20a_writel(g, gr_gpccs_cpuctl_r(),
+			gr_gpccs_cpuctl_startcpu_f(1));
+	}
+	/* start fecs */
 	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
 	gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1);
 	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff);
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
index 28b40b1c..ac19e99c 100644
--- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -18,6 +18,7 @@
 #include "gk20a/pmu_gk20a.h"
 #include "acr_gm20b.h"
 #include "pmu_gm20b.h"
+#include "hw_gr_gm20b.h"
 
 /*!
  * Structure/object which single register write need to be done during PG init
@@ -190,21 +191,40 @@ int gm20b_pmu_init_acr(struct gk20a *g)
 	return 0;
 }
 
-static void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
+void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
 			void *param, u32 handle, u32 status)
 {
 
 	gk20a_dbg_fn("");
 
 
-	if (msg->msg.acr.acrmsg.falconid == LSF_FALCON_ID_FECS)
-		gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON");
+	gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON");
 
 	gm20b_dbg_pmu("response code = %x\n", msg->msg.acr.acrmsg.falconid);
+	g->ops.pmu.lsfloadedfalconid = msg->msg.acr.acrmsg.falconid;
 	gk20a_dbg_fn("done");
 }
 
-void gm20b_pmu_load_lsf(struct gk20a *g, u8 falcon_id)
+static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
+{
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	unsigned long delay = GR_FECS_POLL_INTERVAL;
+	u32 reg;
+
+	gk20a_dbg_fn("");
+	reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+	do {
+		reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+		if (reg == val)
+			return 0;
+		udelay(delay);
+	} while (time_before(jiffies, end_jiffies) ||
+			!tegra_platform_is_silicon());
+
+	return -ETIMEDOUT;
+}
+
+void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
 	struct pmu_cmd cmd;
@@ -221,8 +241,7 @@ void gm20b_pmu_load_lsf(struct gk20a *g, u8 falcon_id)
 		  sizeof(struct pmu_acr_cmd_bootstrap_falcon);
 		cmd.cmd.acr.bootstrap_falcon.cmd_type =
 		  PMU_ACR_CMD_ID_BOOTSTRAP_FALCON;
-		cmd.cmd.acr.bootstrap_falcon.flags =
-		  PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+		cmd.cmd.acr.bootstrap_falcon.flags = flags;
 		cmd.cmd.acr.bootstrap_falcon.falconid = falcon_id;
 		gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_BOOTSTRAP_FALCON: %x\n",
 				falcon_id);
@@ -234,13 +253,45 @@ void gm20b_pmu_load_lsf(struct gk20a *g, u8 falcon_id)
 	return;
 }
 
+int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
+{
+	u32  err = 0;
+	u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+	unsigned long timeout = gk20a_get_gr_idle_timeout(g);
+
+	/* GM20B PMU supports loading FECS only */
+	if (!(falconidmask == (1 << LSF_FALCON_ID_FECS)))
+		return -EINVAL;
+	/* check whether pmu is ready to bootstrap lsf if not wait for it */
+	if (!g->ops.pmu.lspmuwprinitdone) {
+		pmu_wait_message_cond(&g->pmu,
+				gk20a_get_gr_idle_timeout(g),
+				&g->ops.pmu.lspmuwprinitdone, 1);
+		/* check again if it still not ready indicate an error */
+		if (!g->ops.pmu.lspmuwprinitdone) {
+			gk20a_err(dev_from_gk20a(g),
+				"PMU not ready to load LSF");
+			return -ETIMEDOUT;
+		}
+	}
+	/* load FECS */
+	gk20a_writel(g,
+		gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
+	gm20b_pmu_load_lsf(g, LSF_FALCON_ID_FECS, flags);
+	err = pmu_gm20b_ctx_wait_lsf_ready(g, timeout,
+			0x55AA55AA);
+	return err;
+}
+
 void gm20b_init_pmu_ops(struct gpu_ops *gops)
 {
 	if (gops->privsecurity) {
 		gm20b_init_secure_pmu(gops);
 		gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+		gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
 	} else {
 		gk20a_init_pmu_ops(gops);
+		gops->pmu.load_lsfalcon_ucode = NULL;
 		gops->pmu.init_wpr_region = NULL;
 	}
 	gops->pmu.pmu_setup_elpg = gm20b_pmu_setup_elpg;
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
index 93745498..68f342cc 100644
--- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -17,7 +17,7 @@
 #define __PMU_GM20B_H_
 
 void gm20b_init_pmu_ops(struct gpu_ops *gops);
-void gm20b_pmu_load_lsf(struct gk20a *g, u8 falcon_id);
+void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags);
 int gm20b_pmu_init_acr(struct gk20a *g);
 
 #endif /*__PMU_GM20B_H_*/
-- 
cgit v1.2.2