From b7793a493a1fa292a22d5ce84c43ee342b9824b2 Mon Sep 17 00:00:00 2001
From: Supriya <ssharatkumar@nvidia.com>
Date: Fri, 13 Jun 2014 12:44:27 +0530
Subject: nvgpu: Host side changes to support HS mode

GM20B changes in PMU boot sequence to support booting in
HS mode and LS mode

Bug 1509680

Change-Id: I2832eda0efe17dd5e3a8f11dd06e7d4da267be70
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/423140
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/Kconfig                 |    9 +
 drivers/gpu/nvgpu/gk20a/gk20a.h           |    9 +
 drivers/gpu/nvgpu/gk20a/hal_gk20a.c       |    2 +
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c       |   37 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.h       |   17 +-
 drivers/gpu/nvgpu/gm20b/Makefile          |    2 +
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c       | 1325 +++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gm20b/acr_gm20b.h       |  377 ++++++++
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c       |    2 +
 drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h     |   20 +
 drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h     |    8 +
 drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h    |   80 ++
 drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h |   22 +
 drivers/gpu/nvgpu/gm20b/mm_gm20b.c        |   63 +-
 drivers/gpu/nvgpu/gm20b/mm_gm20b.h        |    2 +
 drivers/gpu/nvgpu/gm20b/pmu_gm20b.c       |   26 +
 drivers/gpu/nvgpu/gm20b/pmu_gm20b.h       |   19 +
 17 files changed, 2002 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/gm20b/acr_gm20b.c
 create mode 100644 drivers/gpu/nvgpu/gm20b/acr_gm20b.h
 create mode 100644 drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
 create mode 100644 drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
 create mode 100644 drivers/gpu/nvgpu/gm20b/pmu_gm20b.h

diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
index 238d87e2..315c4683 100644
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -70,3 +70,12 @@ config TEGRA_GK20A
 	  Enable support for the GK20A graphics engine on Tegra
 	  by adding a Tegra platfrom interface to the GK20A driver.
 	  The Tegra platform interface requires TEGRA_GRHOST (host1x).
+
+config TEGRA_ACR
+	bool "Enable HS bin support on GM20B GPU on Tegra"
+	depends on GK20A_PMU
+	default n
+	help
+	  Enable Support for Loading High Secure binary, and using
+	  Write Protected Regions (WPR) for storing ucodes, and bootstrap
+	  PMU, FECS and GPCCS in Low Secure mode.
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 726994ff..da5cc917 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -28,6 +28,7 @@ struct channel_gk20a;
 struct gr_gk20a;
 struct sim_gk20a;
 struct gk20a_ctxsw_ucode_segments;
+struct acr_gm20b;
 
 #include <linux/sched.h>
 #include <linux/spinlock.h>
@@ -45,6 +46,7 @@ struct gk20a_ctxsw_ucode_segments;
 #include "priv_ring_gk20a.h"
 #include "therm_gk20a.h"
 #include "platform_gk20a.h"
+#include "gm20b/acr_gm20b.h"
 
 extern struct platform_device tegra_gk20a_device;
 
@@ -205,6 +207,8 @@ struct gpu_ops {
 				struct pmu_sequence *seq);
 		void *(*get_pmu_seq_out_a_ptr)(
 				struct pmu_sequence *seq);
+		void (*set_pmu_cmdline_args_secure_mode)(struct pmu_gk20a *pmu,
+			u32 val);
 	} pmu_ver;
 	struct {
 		int (*get_netlist_name)(int index, char *name);
@@ -214,6 +218,10 @@ struct gpu_ops {
 		int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
 			       u32 num_pages, u32 pgsz_idx);
 	} mm;
+	struct {
+		int (*pmu_setup_sw)(struct gk20a *g);
+		int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
+	} pmu;
 };
 
 struct gk20a {
@@ -236,6 +244,7 @@ struct gk20a {
 	struct sim_gk20a sim;
 	struct mm_gk20a mm;
 	struct pmu_gk20a pmu;
+	struct acr_gm20b acr;
 	struct cooling_device_gk20a gk20a_cdev;
 
 	/* Save pmu fw here so that it lives cross suspend/resume.
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 66bc47a9..ad0a3dc7 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -23,6 +23,7 @@
 #include "channel_gk20a.h"
 #include "gr_ctx_gk20a.h"
 #include "mm_gk20a.h"
+#include "pmu_gk20a.h"
 
 struct gpu_ops gk20a_ops = {
 	.clock_gating = {
@@ -48,6 +49,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
 	gk20a_init_fifo(gops);
 	gk20a_init_gr_ctx(gops);
 	gk20a_init_mm(gops);
+	gk20a_init_pmu_ops(gops);
 	gops->name = "gk20a";
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index b784b9a6..9b1ecea1 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -38,10 +38,8 @@
 #define gk20a_dbg_pmu(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
 
-static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
-static void pmu_setup_hw(struct work_struct *work);
 static void ap_callback_init_and_enable_ctrl(
 		struct gk20a *g, struct pmu_msg *msg,
 		void *param, u32 seq_desc, u32 status);
@@ -62,6 +60,10 @@ static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
 {
 	pmu->args_v1.cpu_freq_hz = freq;
 }
+static void set_pmu_cmdline_args_secure_mode_v1(struct pmu_gk20a *pmu, u32 val)
+{
+	pmu->args_v1.secure_mode = val;
+}
 
 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
 {
@@ -482,10 +484,12 @@ static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
 	return (void *)(&seq->out_v0);
 }
 
-static int gk20a_init_pmu(struct pmu_gk20a *pmu)
+int gk20a_init_pmu(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = pmu->g;
 	switch (pmu->desc->app_version) {
+	case APP_VERSION_GM20B_1:
+	case APP_VERSION_GM20B:
 	case APP_VERSION_1:
 	case APP_VERSION_2:
 		g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
@@ -493,6 +497,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 			pmu_cmdline_size_v1;
 		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
 			set_pmu_cmdline_args_cpufreq_v1;
+		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+			set_pmu_cmdline_args_secure_mode_v1;
 		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
 			get_pmu_cmdline_args_ptr_v1;
 		g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -558,6 +564,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 			pmu_cmdline_size_v0;
 		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
 			set_pmu_cmdline_args_cpufreq_v0;
+		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+			NULL;
 		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
 			get_pmu_cmdline_args_ptr_v0;
 		g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -627,7 +635,7 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 	return 0;
 }
 
-static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
+void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 		u32 src, u8 *dst, u32 size, u8 port)
 {
 	struct gk20a *g = pmu->g;
@@ -673,7 +681,7 @@ static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 	return;
 }
 
-static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
+void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
 		u32 dst, u8 *src, u32 size, u8 port)
 {
 	struct gk20a *g = pmu->g;
@@ -887,7 +895,7 @@ static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
 	return 0;
 }
 
-static int pmu_reset(struct pmu_gk20a *pmu)
+int pmu_reset(struct pmu_gk20a *pmu)
 {
 	int err;
 
@@ -999,7 +1007,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
 	return 0;
 }
 
-static void pmu_seq_init(struct pmu_gk20a *pmu)
+void pmu_seq_init(struct pmu_gk20a *pmu)
 {
 	u32 i;
 
@@ -1784,7 +1792,7 @@ static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
 static void pmu_setup_hw_load_zbc(struct gk20a *g);
 static void pmu_setup_hw_enable_elpg(struct gk20a *g);
 
-static void pmu_setup_hw(struct work_struct *work)
+void pmu_setup_hw(struct work_struct *work)
 {
 	struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
 	struct gk20a *g = pmu->g;
@@ -1967,6 +1975,12 @@ static void pmu_setup_hw_enable_elpg(struct gk20a *g)
 	}
 }
 
+void gk20a_init_pmu_ops(struct gpu_ops *gops)
+{
+	gops->pmu.pmu_setup_sw = gk20a_init_pmu_setup_sw;
+	gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
+}
+
 int gk20a_init_pmu_support(struct gk20a *g)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
@@ -1984,11 +1998,10 @@ int gk20a_init_pmu_support(struct gk20a *g)
 		return err;
 
 	if (support_gk20a_pmu()) {
-		err = gk20a_init_pmu_setup_sw(g);
+		err = g->ops.pmu.pmu_setup_sw(g);
 		if (err)
 			return err;
-
-		err = gk20a_init_pmu_setup_hw1(g);
+		err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g);
 		if (err)
 			return err;
 	}
@@ -2724,7 +2737,7 @@ static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
 	*/
 }
 
-static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
+void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = pmu->g;
 	int i;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 2843d483..e9567e14 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -51,6 +51,8 @@
 /* Mapping between AP_CTRLs and Idle counters */
 #define PMU_AP_IDLE_MASK_GRAPHICS	(PMU_AP_IDLE_MASK_HIST_IDX_1)
 
+#define APP_VERSION_GM20B_1 18547257
+#define APP_VERSION_GM20B 17615280
 #define APP_VERSION_2 18542378
 #define APP_VERSION_1 17997577
 #define APP_VERSION_0 16856675
@@ -1058,6 +1060,8 @@ struct pmu_gk20a {
 	};
 	unsigned long perfmon_events_cnt;
 	bool perfmon_sampling_enabled;
+	u8 pmu_mode; /*Added for GM20b, and ACR*/
+	u32 falcon_id;
 };
 
 int gk20a_init_pmu_support(struct gk20a *g);
@@ -1086,5 +1090,16 @@ int gk20a_pmu_debugfs_init(struct platform_device *dev);
 void gk20a_pmu_reset_load_counters(struct gk20a *g);
 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
 		u32 *total_cycles);
-
+void gk20a_init_pmu_ops(struct gpu_ops *gops);
+
+void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
+		u32 dst, u8 *src, u32 size, u8 port);
+void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
+		u32 src, u8 *dst, u32 size, u8 port);
+int pmu_reset(struct pmu_gk20a *pmu);
+int gk20a_init_pmu(struct pmu_gk20a *pmu);
+void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
+void gk20a_remove_pmu_support(struct pmu_gk20a *pmu);
+void pmu_setup_hw(struct work_struct *work);
+void pmu_seq_init(struct pmu_gk20a *pmu);
 #endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/Makefile b/drivers/gpu/nvgpu/gm20b/Makefile
index 237ce6f9..20328e9e 100644
--- a/drivers/gpu/nvgpu/gm20b/Makefile
+++ b/drivers/gpu/nvgpu/gm20b/Makefile
@@ -11,4 +11,6 @@ obj-$(CONFIG_GK20A)  = \
 	fifo_gm20b.o \
 	gr_ctx_gm20b.o \
 	gm20b_gating_reglist.o \
+	acr_gm20b.o \
+	pmu_gm20b.o \
 	mm_gm20b.o
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
new file mode 100644
index 00000000..df1bc429
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -0,0 +1,1325 @@
+/*
+ * GM20B ACR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/delay.h>	/* for mdelay */
+#include <linux/firmware.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include "../../../../arch/arm/mach-tegra/iomap.h"
+
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "hw_pwr_gm20b.h"
+#include "mc_carveout_reg.h"
+
+/*Defines*/
+#define gm20b_dbg_pmu(fmt, arg...) \
+	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+#define GPU_TIMEOUT_DEFAULT 10000
+
+typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata);
+
+/*Externs*/
+
+/*Forwards*/
+static int lsfm_discover_ucode_images(struct gk20a *g,
+	struct ls_flcn_mgr *plsfm);
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+	struct flcn_ucode_img *ucode_image, u32 falcon_id);
+static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
+static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
+static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
+static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+	void *nonwpr_addr);
+static int acr_ucode_patch_sig(struct gk20a *g,
+		unsigned int *p_img,
+		unsigned int *p_prod_sig,
+		unsigned int *p_dbg_sig,
+		unsigned int *p_patch_loc,
+		unsigned int *p_patch_ind);
+
+/*Globals*/
+static void __iomem *mc = IO_ADDRESS(TEGRA_MC_BASE);
+get_ucode_details pmu_acr_supp_ucode_list[MAX_SUPPORTED_LSFM] = {
+	pmu_ucode_details,
+};
+
+/*Once is LS mode, cpuctl_alias is only accessible*/
+void start_gm20b_pmu(struct gk20a *g)
+{
+	gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
+		pwr_falcon_cpuctl_startcpu_f(1));
+}
+
+void gm20b_init_secure_pmu(struct gpu_ops *gops)
+{
+	gops->pmu.pmu_setup_sw = gm20b_pmu_setup_sw;
+	gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
+}
+
+static void free_blob_res(struct gk20a *g)
+{
+	/*TODO */
+}
+
+int gm20b_pmu_setup_sw(struct gk20a *g)
+{
+	/*from pmu_gk20a.c*/
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct device *d = dev_from_gk20a(g);
+	int i, err = 0;
+	u8 *ptr;
+	struct sg_table *sgt_seq_buf;
+	dma_addr_t iova;
+
+	gk20a_dbg_fn("");
+	/* Make any ACR structure settings here if ever need be*/
+
+	if (pmu->sw_ready) {
+		for (i = 0; i < pmu->mutex_cnt; i++) {
+			pmu->mutex[i].id = i;
+			pmu->mutex[i].index = i;
+		}
+		pmu_seq_init(pmu);
+
+		mutex_init(&pmu->elpg_mutex);
+		mutex_init(&pmu->isr_mutex);
+		mutex_init(&pmu->pmu_copy_lock);
+		mutex_init(&pmu->pmu_seq_lock);
+		gk20a_dbg_fn("skip init");
+		goto skip_init;
+	}
+	gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 2\n");
+
+	/* TBD: sysmon subtask */
+
+	if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
+		pmu->perfmon_sampling_enabled = true;
+
+	pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
+	pmu->mutex = kzalloc(pmu->mutex_cnt *
+		sizeof(struct pmu_mutex), GFP_KERNEL);
+	if (!pmu->mutex) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < pmu->mutex_cnt; i++) {
+		pmu->mutex[i].id = i;
+		pmu->mutex[i].index = i;
+	}
+	gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 3\n");
+
+	pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
+		sizeof(struct pmu_sequence), GFP_KERNEL);
+	if (!pmu->seq) {
+		err = -ENOMEM;
+		goto err_free_mutex;
+	}
+
+	pmu_seq_init(pmu);
+	mutex_init(&pmu->elpg_mutex);
+	mutex_init(&pmu->isr_mutex);
+	mutex_init(&pmu->pmu_copy_lock);
+	mutex_init(&pmu->pmu_seq_lock);
+
+	err = prepare_ucode_blob(g);
+	if (err)
+		goto err_free_seq;
+	INIT_WORK(&pmu->pg_init, pmu_setup_hw);
+	pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
+					&iova,
+					GFP_KERNEL);
+	if (!pmu->seq_buf.cpuva) {
+		gk20a_err(d, "failed to allocate memory\n");
+		err = -ENOMEM;
+		goto err_free_blob_res;
+	}
+
+	pmu->seq_buf.iova = iova;
+	err = gk20a_get_sgtable(d, &sgt_seq_buf,
+				pmu->seq_buf.cpuva,
+				pmu->seq_buf.iova,
+				GK20A_PMU_SEQ_BUF_SIZE);
+	if (err) {
+		gk20a_err(d, "failed to allocate sg table\n");
+		goto err_free_seq_buf;
+	}
+
+	pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
+					GK20A_PMU_SEQ_BUF_SIZE,
+					0, /* flags */
+					gk20a_mem_flag_none);
+	if (!pmu->seq_buf.pmu_va) {
+		gk20a_err(d, "failed to map pmu ucode memory!!");
+		goto err_free_seq_buf_sgt;
+	}
+
+	ptr = (u8 *)pmu->seq_buf.cpuva;
+	if (!ptr) {
+		gk20a_err(d, "failed to map cpu ptr for zbc buffer");
+		goto err_unmap_seq_buf;
+	}
+
+	/* TBD: remove this if ZBC save/restore is handled by PMU
+	 * end an empty ZBC sequence for now */
+	ptr[0] = 0x16; /* opcode EXIT */
+	ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
+	ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
+
+	pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
+
+	gk20a_dbg_fn("done");
+	gk20a_free_sgtable(&sgt_seq_buf);
+
+	pmu->sw_ready = true;
+
+skip_init:
+	pmu->perfmon_counter.index = 3; /* GR & CE2 */
+	pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
+
+	pmu->remove_support = gk20a_remove_pmu_support;
+	err = gk20a_init_pmu(pmu);
+	if (err) {
+		gk20a_err(d, "failed to set function pointers\n");
+		goto  err_unmap_seq_buf;
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+
+ err_unmap_seq_buf:
+	gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
+		GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
+ err_free_seq_buf_sgt:
+	gk20a_free_sgtable(&sgt_seq_buf);
+ err_free_seq_buf:
+	dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
+		pmu->seq_buf.cpuva, pmu->seq_buf.iova);
+	pmu->seq_buf.cpuva = NULL;
+	pmu->seq_buf.iova = 0;
+ err_free_blob_res:
+	free_blob_res(g);
+ err_free_seq:
+	kfree(pmu->seq);
+ err_free_mutex:
+	kfree(pmu->mutex);
+ err:
+	gk20a_dbg_fn("fail");
+	return err;
+}
+
+int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
+{
+	const struct firmware *pmu_fw;
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct lsf_ucode_desc *lsf_desc;
+	int err;
+	gm20b_dbg_pmu("requesting PMU ucode in GM20B\n");
+	pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
+	if (!pmu_fw) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
+		gm20b_dbg_pmu("requesting PMU ucode in GM20B failed\n");
+		return -ENOENT;
+	}
+	gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation");
+
+	pmu->desc = (struct pmu_ucode_desc *)pmu_fw->data;
+	pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
+			pmu->desc->descriptor_size);
+	err = gk20a_init_pmu(pmu);
+	if (err) {
+		gm20b_dbg_pmu("failed to set function pointers\n");
+		return err;
+	}
+
+	lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL);
+	if (!lsf_desc)
+		return -ENOMEM;
+	lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
+
+	p_img->desc = pmu->desc;
+	p_img->data = pmu->ucode_image;
+	p_img->data_size = pmu->desc->image_size;
+	p_img->fw_ver = NULL;
+	p_img->header = NULL;
+	p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
+	gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
+	return 0;
+}
+
+int prepare_ucode_blob(struct gk20a *g)
+{
+	struct device *d = dev_from_gk20a(g);
+	dma_addr_t iova;
+	u32 status;
+	void *nonwpr_addr;
+	u64 nonwpr_pmu_va;
+	struct ls_flcn_mgr lsfm_l, *plsfm;
+	struct sg_table *sgt_nonwpr;
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+
+	plsfm = &lsfm_l;
+	memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
+	gm20b_dbg_pmu("fetching GMMU regs\n");
+	gm20b_mm_mmu_vpr_info_fetch(g);
+
+	/* Discover all managed falcons*/
+	status = lsfm_discover_ucode_images(g, plsfm);
+	gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
+	if (status != 0)
+		return status;
+
+	if (plsfm->managed_flcn_cnt) {
+		/* Generate WPR requirements*/
+		status = lsf_gen_wpr_requirements(g, plsfm);
+		if (status != 0)
+			return status;
+
+		/*Alloc memory to hold ucode blob contents*/
+		nonwpr_addr = dma_alloc_coherent(d, plsfm->wpr_size, &iova,
+			GFP_KERNEL);
+		if (nonwpr_addr == NULL)
+			return -ENOMEM;
+		status = gk20a_get_sgtable(d, &sgt_nonwpr,
+				nonwpr_addr,
+				iova,
+				plsfm->wpr_size);
+		if (status) {
+			gk20a_err(d, "failed allocate sg table for nonwpr\n");
+			status = -ENOMEM;
+			goto err_free_nonwpr_addr;
+		}
+
+		nonwpr_pmu_va = gk20a_gmmu_map(vm, &sgt_nonwpr,
+				plsfm->wpr_size,
+				0, /* flags */
+				gk20a_mem_flag_read_only);
+		if (!nonwpr_pmu_va) {
+			gk20a_err(d, "failed to map pmu ucode memory!!");
+			status = -ENOMEM;
+			goto err_free_nonwpr_sgt;
+		}
+		gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
+			plsfm->managed_flcn_cnt, plsfm->wpr_size);
+		lsfm_init_wpr_contents(g, plsfm, nonwpr_addr);
+		g->acr.ucode_blob_start = nonwpr_pmu_va;
+		g->acr.ucode_blob_size = plsfm->wpr_size;
+		gm20b_dbg_pmu("32 bit ucode_start %x, size %d\n",
+			(u32)nonwpr_pmu_va, plsfm->wpr_size);
+		gm20b_dbg_pmu("base reg carveout 2:%x\n",
+		readl(mc + MC_SECURITY_CARVEOUT2_BOM_0));
+		gm20b_dbg_pmu("base reg carveout 3:%x\n",
+		readl(mc + MC_SECURITY_CARVEOUT3_BOM_0));
+	} else {
+		gm20b_dbg_pmu("LSFM is managing no falcons.\n");
+	}
+	gm20b_dbg_pmu("prepare ucode blob return 0\n");
+	return 0;
+err_free_nonwpr_sgt:
+	gk20a_free_sgtable(&sgt_nonwpr);
+err_free_nonwpr_addr:
+	dma_free_coherent(d, plsfm->wpr_size,
+			nonwpr_addr, iova);
+	nonwpr_addr = NULL;
+	iova = 0;
+	gm20b_dbg_pmu("prepare ucode blob return %x\n", status);
+	return status;
+}
+
+u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+	u32 falcon_id)
+{
+	return (plsfm->disable_mask >> falcon_id) & 0x1;
+}
+
+/* Discover all managed falcon ucode images */
+static int lsfm_discover_ucode_images(struct gk20a *g,
+	struct ls_flcn_mgr *plsfm)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct flcn_ucode_img ucode_img;
+	u32 falcon_id;
+	u32 i;
+	int status;
+
+	/* LSFM requires a secure PMU, discover it first.*/
+	/* Obtain the PMU ucode image and add it to the list if required*/
+	memset(&ucode_img, 0, sizeof(ucode_img));
+	status = pmu_ucode_details(g, &ucode_img);
+	if (status == 0) {
+		if (ucode_img.lsf_desc != NULL) {
+			/* The falonId is formed by grabbing the static base
+			 * falonId from the image and adding the
+			 * engine-designated falcon instance.*/
+			pmu->pmu_mode |= PMU_SECURE_MODE;
+			falcon_id = ucode_img.lsf_desc->falcon_id +
+				ucode_img.flcn_inst;
+
+			if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
+				pmu->falcon_id = falcon_id;
+				if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
+					pmu->falcon_id) == 0)
+					pmu->pmu_mode |= PMU_LSFM_MANAGED;
+
+				plsfm->managed_flcn_cnt++;
+			} else {
+				gm20b_dbg_pmu("id not managed %d\n",
+					ucode_img.lsf_desc->falcon_id);
+			}
+		}
+
+		/*Free any ucode image resources if not managing this falcon*/
+		if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
+			gm20b_dbg_pmu("pmu is not LSFM managed\n");
+			lsfm_free_ucode_img_res(&ucode_img);
+		}
+	}
+
+	/* Enumerate all constructed falcon objects,
+	 as we need the ucode image info and total falcon count.*/
+
+	/*0th index is always PMU which is already handled in earlier
+	if condition*/
+	for (i = 1; i < MAX_SUPPORTED_LSFM; i++) {
+		memset(&ucode_img, 0, sizeof(ucode_img));
+		if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
+			if (ucode_img.lsf_desc != NULL) {
+				/* We have engine sigs, ensure that this falcon
+				is aware of the secure mode expectations
+				(ACR status)*/
+
+				/* falon_id is formed by grabbing the static
+				base falonId from the image and adding the
+				engine-designated falcon instance. */
+				falcon_id = ucode_img.lsf_desc->falcon_id +
+					ucode_img.flcn_inst;
+
+				if (!lsfm_falcon_disabled(g, plsfm,
+					falcon_id)) {
+					/* Do not manage non-FB ucode*/
+					if (lsfm_add_ucode_img(g,
+						plsfm, &ucode_img, falcon_id)
+						== 0)
+						plsfm->managed_flcn_cnt++;
+				} else {
+					gm20b_dbg_pmu("not managed %d\n",
+						ucode_img.lsf_desc->falcon_id);
+					lsfm_free_nonpmu_ucode_img_res(
+						&ucode_img);
+				}
+			}
+		} else {
+			/* Consumed all available falcon objects */
+			gm20b_dbg_pmu("Done checking for ucodes %d\n", i);
+			break;
+		}
+	}
+	return 0;
+}
+
+
+int pmu_populate_loader_cfg(struct gk20a *g,
+	struct lsfm_managed_ucode_img *lsfm,
+	union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
+	struct loader_config *ldr_cfg =
+		(struct loader_config *)(&p_bl_gen_desc->loader_cfg);
+	struct gk20a_platform *platform = platform_get_drvdata(g->dev);
+	u64 addr_base;
+	struct pmu_ucode_desc *desc;
+	u64 addr_code, addr_data;
+	u32 addr_args;
+
+	if (p_img->desc == NULL) /*This means its a header based ucode,
+				  and so we do not fill BL gen desc structure*/
+		return -EINVAL;
+	desc = p_img->desc;
+	/*
+	 Calculate physical and virtual addresses for various portions of
+	 the PMU ucode image
+	 Calculate the 32-bit addresses for the application code, application
+	 data, and bootloader code. These values are all based on IM_BASE.
+	 The 32-bit addresses will be the upper 32-bits of the virtual or
+	 physical addresses of each respective segment.
+	*/
+	addr_base = lsfm->lsb_header.ucode_off;
+	addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
+	gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
+	/*From linux*/
+	addr_code = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_code_offset) >> 8);
+	gm20b_dbg_pmu("app start %d app res code off %d\n",
+		desc->app_start_offset, desc->app_resident_code_offset);
+	addr_data = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_data_offset) >> 8);
+	gm20b_dbg_pmu("app res data offset%d\n",
+		desc->app_resident_data_offset);
+	gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
+
+	addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
+			gk20a_readl(g, pwr_falcon_hwcfg_r())))
+			<< GK20A_PMU_DMEM_BLKSIZE2);
+	addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
+
+	gm20b_dbg_pmu("addr_args %x\n", addr_args);
+
+	/* Populate the loader_config state*/
+	ldr_cfg->dma_idx = 2;
+	ldr_cfg->code_dma_base = addr_code;
+	ldr_cfg->code_size_total = desc->app_size;
+	ldr_cfg->code_size_to_load = desc->app_resident_code_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	ldr_cfg->data_dma_base = addr_data;
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	ldr_cfg->overlay_dma_base = addr_code;
+
+	/* Update the argc/argv members*/
+	ldr_cfg->argc = 1;
+	ldr_cfg->argv = addr_args;
+
+	/*Copying pmu cmdline args*/
+	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
+				clk_get_rate(platform->clk[1]));
+	g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
+	pmu_copy_to_dmem(pmu, addr_args,
+			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
+			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+	*p_bl_gen_desc_size = sizeof(p_bl_gen_desc->loader_cfg);
+	return 0;
+}
+
+int flcn_populate_bl_dmem_desc(struct gk20a *g,
+	struct lsfm_managed_ucode_img *lsfm,
+	union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
+{
+
+	struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
+	struct flcn_bl_dmem_desc *ldr_cfg =
+		(struct flcn_bl_dmem_desc *)(&p_bl_gen_desc->loader_cfg);
+	u64 addr_base;
+	struct pmu_ucode_desc *desc;
+	u64 addr_code, addr_data;
+
+	if (p_img->desc == NULL) /*This means its a header based ucode,
+				  and so we do not fill BL gen desc structure*/
+		return -EINVAL;
+	desc = p_img->desc;
+
+	/*
+	 Calculate physical and virtual addresses for various portions of
+	 the PMU ucode image
+	 Calculate the 32-bit addresses for the application code, application
+	 data, and bootloader code. These values are all based on IM_BASE.
+	 The 32-bit addresses will be the upper 32-bits of the virtual or
+	 physical addresses of each respective segment.
+	*/
+	addr_base = lsfm->lsb_header.ucode_off;
+	addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
+	gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
+		lsfm->wpr_header.falcon_id);
+	addr_code = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_code_offset) >> 8);
+	addr_data = u64_lo32((addr_base +
+				desc->app_start_offset +
+				desc->app_resident_data_offset) >> 8);
+
+	gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
+		(u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
+		lsfm->wpr_header.falcon_id);
+
+	/* Populate the LOADER_CONFIG state */
+	memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc));
+	ldr_cfg->ctx_dma = 0;
+	ldr_cfg->code_dma_base = addr_code;
+	ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
+	ldr_cfg->data_dma_base = addr_data;
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	*p_bl_gen_desc_size = sizeof(p_bl_gen_desc->bl_dmem_desc);
+	return 0;
+}
+
+/* Populate falcon boot loader generic desc.*/
+static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
+		struct lsfm_managed_ucode_img *pnode)
+{
+
+	struct pmu_gk20a *pmu = &g->pmu;
+	if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
+		gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
+		flcn_populate_bl_dmem_desc(g, pnode, &pnode->bl_gen_desc,
+				&pnode->bl_gen_desc_size);
+		return 0;
+	}
+
+	if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
+		gm20b_dbg_pmu("pmu write flcn bl gen desc\n");
+		if (pnode->wpr_header.falcon_id == pmu->falcon_id)
+			return pmu_populate_loader_cfg(g, pnode,
+				&pnode->bl_gen_desc, &pnode->bl_gen_desc_size);
+	}
+
+	/* Failed to find the falcon requested. */
+	return -ENOENT;
+}
+
+/* Initialize WPR contents */
+static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+	void *nonwpr_addr)
+{
+
+	int status = 0;
+	union flcn_bl_generic_desc *nonwpr_bl_gen_desc;
+	if (nonwpr_addr == NULL) {
+		status = -ENOMEM;
+	} else {
+		struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
+		struct lsf_wpr_header *wpr_hdr;
+		struct lsf_lsb_header *lsb_hdr;
+		void *ucode_off;
+		u32 i;
+
+		/* The WPR array is at the base of the WPR */
+		wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
+		pnode = plsfm->ucode_img_list;
+		i = 0;
+
+		/*
+		 * Walk the managed falcons, flush WPR and LSB headers to FB.
+		 * flush any bl args to the storage area relative to the
+		 * ucode image (appended on the end as a DMEM area).
+		 */
+		while (pnode) {
+			/* Flush WPR header to memory*/
+			memcpy(&wpr_hdr[i], &pnode->wpr_header,
+					sizeof(struct lsf_wpr_header));
+			gm20b_dbg_pmu("wpr header as in memory and pnode\n");
+			gm20b_dbg_pmu("falconid :%d %d\n",
+				pnode->wpr_header.falcon_id,
+				wpr_hdr[i].falcon_id);
+			gm20b_dbg_pmu("lsb_offset :%x %x\n",
+				pnode->wpr_header.lsb_offset,
+				wpr_hdr[i].lsb_offset);
+			gm20b_dbg_pmu("bootstrap_owner :%d %d\n",
+				pnode->wpr_header.bootstrap_owner,
+				wpr_hdr[i].bootstrap_owner);
+			gm20b_dbg_pmu("lazy_bootstrap :%d %d\n",
+				pnode->wpr_header.lazy_bootstrap,
+				wpr_hdr[i].lazy_bootstrap);
+			gm20b_dbg_pmu("status :%d %d\n",
+				pnode->wpr_header.status, wpr_hdr[i].status);
+
+			/*Flush LSB header to memory*/
+			lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr +
+					pnode->wpr_header.lsb_offset);
+			memcpy(lsb_hdr, &pnode->lsb_header,
+					sizeof(struct lsf_lsb_header));
+			gm20b_dbg_pmu("lsb header as in memory and pnode\n");
+			gm20b_dbg_pmu("ucode_off :%x %x\n",
+				pnode->lsb_header.ucode_off,
+				lsb_hdr->ucode_off);
+			gm20b_dbg_pmu("ucode_size :%x %x\n",
+				pnode->lsb_header.ucode_size,
+				lsb_hdr->ucode_size);
+			gm20b_dbg_pmu("data_size :%x %x\n",
+				pnode->lsb_header.data_size,
+				lsb_hdr->data_size);
+			gm20b_dbg_pmu("bl_code_size :%x %x\n",
+				pnode->lsb_header.bl_code_size,
+				lsb_hdr->bl_code_size);
+			gm20b_dbg_pmu("bl_imem_off :%x %x\n",
+				pnode->lsb_header.bl_imem_off,
+				lsb_hdr->bl_imem_off);
+			gm20b_dbg_pmu("bl_data_off :%x %x\n",
+				pnode->lsb_header.bl_data_off,
+				lsb_hdr->bl_data_off);
+			gm20b_dbg_pmu("bl_data_size :%x %x\n",
+				pnode->lsb_header.bl_data_size,
+				lsb_hdr->bl_data_size);
+			gm20b_dbg_pmu("flags :%x %x\n",
+				pnode->lsb_header.flags, lsb_hdr->flags);
+
+			/*If this falcon has a boot loader and related args,
+			 * flush them.*/
+			if (!pnode->ucode_img.header) {
+				nonwpr_bl_gen_desc =
+					(union flcn_bl_generic_desc *)
+					((u8 *)nonwpr_addr +
+					pnode->lsb_header.bl_data_off);
+
+				/*Populate gen bl and flush to memory*/
+				lsfm_fill_flcn_bl_gen_desc(g, pnode);
+				memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
+					pnode->bl_gen_desc_size);
+			}
+			ucode_off = (void *)(pnode->lsb_header.ucode_off +
+				(u8 *)nonwpr_addr);
+			/*Copying of ucode*/
+			memcpy(ucode_off, pnode->ucode_img.data,
+				pnode->ucode_img.data_size);
+			pnode = pnode->next;
+			i++;
+		}
+
+		/* Tag the terminator WPR header with an invalid falcon ID. */
+		gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id,
+			1, LSF_FALCON_ID_INVALID);
+	}
+	return status;
+}
+
+/*!
+ * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
+ *
+ * @param[in] p_ucodehdr : UCODE header
+ * @param[out] lsb_hdr : updates values in LSB header
+ *
+ * @return 0
+ */
+static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
+	struct lsf_lsb_header *lsb_hdr)
+{
+
+	u32 code_size = 0;
+	u32 data_size = 0;
+	u32 i = 0;
+	u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
+
+	/* Lets calculate code size*/
+	code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+	for (i = 0; i < total_apps; i++) {
+		code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
+			(total_apps, i)];
+	}
+	code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
+
+	/* Calculate data size*/
+	data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+	for (i = 0; i < total_apps; i++) {
+		data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
+			(total_apps, i)];
+	}
+
+	lsb_hdr->ucode_size = code_size;
+	lsb_hdr->data_size = data_size;
+	lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+	lsb_hdr->bl_imem_off = 0;
+	lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
+	lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+	return 0;
+}
+
+/*!
+ * @brief lsfm_fill_static_lsb_hdr_info
+ * Populate static LSB header infomation using the provided ucode image
+ */
+static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
+	u32 falcon_id, struct lsfm_managed_ucode_img *pnode)
+{
+
+	struct pmu_gk20a *pmu = &g->pmu;
+	u32 data = 0;
+
+	if (pnode->ucode_img.lsf_desc)
+		memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
+			sizeof(struct lsf_ucode_desc));
+	pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
+
+	/* The remainder of the LSB depends on the loader usage */
+	if (pnode->ucode_img.header) {
+		/* Does not use a loader */
+		pnode->lsb_header.data_size = 0;
+		pnode->lsb_header.bl_code_size = 0;
+		pnode->lsb_header.bl_data_off = 0;
+		pnode->lsb_header.bl_data_size = 0;
+
+		lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
+			&(pnode->lsb_header));
+
+		/* Load the first 256 bytes of IMEM. */
+		/* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
+		True for all method based falcons */
+		data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
+			NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+		pnode->lsb_header.flags = data;
+	} else {
+		/* Uses a loader. that is has a desc */
+		pnode->lsb_header.data_size = 0;
+
+		/* The loader code size is already aligned (padded) such that
+		the code following it is aligned, but the size in the image
+		desc is not, bloat it up to be on a 256 byte alignment. */
+		pnode->lsb_header.bl_code_size = ALIGN(
+			pnode->ucode_img.desc->bootloader_size,
+			LSF_BL_CODE_SIZE_ALIGNMENT);
+		/* Though the BL is located at 0th offset of the image, the VA
+		is different to make sure that it doesnt collide the actual OS
+		VA range */
+		pnode->lsb_header.bl_imem_off =
+			pnode->ucode_img.desc->bootloader_imem_offset;
+
+		/* TODO: OBJFLCN should export properties using which the below
+			flags should be populated.*/
+		pnode->lsb_header.flags = 0;
+
+		if (falcon_id == pmu->falcon_id) {
+			data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+			pnode->lsb_header.flags = data;
+		}
+	}
+}
+
+/* Adds a ucode image to the list of managed ucode images managed. */
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+	struct flcn_ucode_img *ucode_image, u32 falcon_id)
+{
+
+	struct lsfm_managed_ucode_img *pnode;
+	pnode = kzalloc(sizeof(struct lsfm_managed_ucode_img), GFP_KERNEL);
+	if (pnode == NULL)
+		return -ENOMEM;
+
+	/* Keep a copy of the ucode image info locally */
+	memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img));
+
+	/* Fill in static WPR header info*/
+	pnode->wpr_header.falcon_id = falcon_id;
+	pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+	pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
+
+	/*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
+	Android */
+	/* Fill in static LSB header info elsewhere */
+	lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
+	pnode->next = plsfm->ucode_img_list;
+	plsfm->ucode_img_list = pnode;
+	return 0;
+}
+
+/* Free any ucode image structure resources*/
+static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		kfree(p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+}
+
+/* Free any ucode image structure resources*/
+static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		kfree(p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+	if (p_img->desc != NULL) {
+		kfree(p_img->desc);
+		p_img->desc = NULL;
+	}
+}
+
+
+/* Generate WPR requirements for ACR allocation request */
+static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
+{
+	struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
+	u32 wpr_offset;
+
+	/* Calculate WPR size required */
+
+	/* Start with an array of WPR headers at the base of the WPR.
+	 The expectation here is that the secure falcon will do a single DMA
+	 read of this array and cache it internally so it's OK to pack these.
+	 Also, we add 1 to the falcon count to indicate the end of the array.*/
+	wpr_offset = sizeof(struct lsf_wpr_header) *
+		(plsfm->managed_flcn_cnt+1);
+
+	/* Walk the managed falcons, accounting for the LSB structs
+	as well as the ucode images. */
+	while (pnode) {
+		/* Align, save off, and include an LSB header size */
+		wpr_offset = ALIGN(wpr_offset,
+			LSF_LSB_HEADER_ALIGNMENT);
+		pnode->wpr_header.lsb_offset = wpr_offset;
+		wpr_offset += sizeof(struct lsf_lsb_header);
+
+		/* Align, save off, and include the original (static)
+		ucode image size */
+		wpr_offset = ALIGN(wpr_offset,
+			LSF_UCODE_DATA_ALIGNMENT);
+		pnode->lsb_header.ucode_off = wpr_offset;
+		wpr_offset += pnode->ucode_img.data_size;
+
+		/* For falcons that use a boot loader (BL), we append a loader
+		desc structure on the end of the ucode image and consider this
+		the boot loader data. The host will then copy the loader desc
+		args to this space within the WPR region (before locking down)
+		and the HS bin will then copy them to DMEM 0 for the loader. */
+		if (!pnode->ucode_img.header) {
+			/* Track the size for LSB details filled in later
+			 Note that at this point we don't know what kind of i
+			boot loader desc, so we just take the size of the
+			generic one, which is the largest it will will ever be.
+			*/
+			/* Align (size bloat) and save off generic
+			descriptor size*/
+			pnode->lsb_header.bl_data_size = ALIGN(
+				sizeof(pnode->bl_gen_desc),
+				LSF_BL_DATA_SIZE_ALIGNMENT);
+
+			/*Align, save off, and include the additional BL data*/
+			wpr_offset = ALIGN(wpr_offset,
+				LSF_BL_DATA_ALIGNMENT);
+			pnode->lsb_header.bl_data_off = wpr_offset;
+			wpr_offset += pnode->lsb_header.bl_data_size;
+		} else {
+			/* bl_data_off is already assigned in static
+			information. But that is from start of the image */
+			pnode->lsb_header.bl_data_off +=
+				(wpr_offset - pnode->ucode_img.data_size);
+		}
+
+		/* Finally, update ucode surface size to include updates */
+		pnode->full_ucode_size = wpr_offset -
+			pnode->lsb_header.ucode_off;
+		pnode = pnode->next;
+	}
+	plsfm->wpr_size = wpr_offset;
+	return 0;
+}
+
+/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
+ * start and end are addresses of ucode blob in non-WPR region*/
+int gm20b_bootstrap_hs_flcn(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct device *d = dev_from_gk20a(g);
+	int i, err = 0;
+	struct sg_table *sgt_pmu_ucode;
+	dma_addr_t iova;
+	u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va, *acr_dmem;
+	u32 img_size_in_bytes;
+	struct flcn_bl_dmem_desc bl_dmem_desc;
+	u32 status, start, size;
+	const struct firmware *acr_fw;
+	struct acr_gm20b *acr = &g->acr;
+	u32 *acr_ucode_header_t210_load;
+	u32 *acr_ucode_data_t210_load;
+
+	start = g->acr.ucode_blob_start;
+	size = g->acr.ucode_blob_size;
+
+	gm20b_dbg_pmu("");
+
+	acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
+	if (!acr_fw) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
+		return -ENOENT;
+	}
+	acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
+	acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
+		acr->hsbin_hdr->header_offset);
+	acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
+		acr->hsbin_hdr->data_offset);
+	acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
+		acr->fw_hdr->hdr_offset);
+	img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
+
+	/* Lets patch the signatures first.. */
+	if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
+		(u32 *)(acr_fw->data + acr->fw_hdr->sig_prod_offset),
+		(u32 *)(acr_fw->data + acr->fw_hdr->sig_dbg_offset),
+		(u32 *)(acr_fw->data + acr->fw_hdr->patch_loc),
+		(u32 *)(acr_fw->data + acr->fw_hdr->patch_sig)) < 0)
+		return -1;
+	pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, &iova,
+			GFP_KERNEL);
+	if (!pacr_ucode_cpuva)
+		return -ENOMEM;
+
+	err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
+				pacr_ucode_cpuva,
+				iova,
+				img_size_in_bytes);
+	if (err) {
+		gk20a_err(d, "failed to allocate sg table\n");
+		err = -ENOMEM;
+		goto err_free_acr_buf;
+	}
+	pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
+					img_size_in_bytes,
+					0, /* flags */
+					gk20a_mem_flag_read_only);
+	if (!pacr_ucode_pmu_va) {
+		gk20a_err(d, "failed to map pmu ucode memory!!");
+		err = -ENOMEM;
+		goto err_free_ucode_sgt;
+	}
+	acr_dmem = (u64 *)
+		&(((u8 *)acr_ucode_data_t210_load)[
+		acr_ucode_header_t210_load[2]]);
+	((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
+		start;
+	((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
+		size;
+	((struct flcn_acr_desc *)acr_dmem)->wpr_region_id = 2;
+	((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
+	((struct flcn_acr_desc *)acr_dmem)->regions.region_props[0].region_id
+								= 2;
+	((struct flcn_acr_desc *)acr_dmem)->regions.region_props[1].region_id
+								= 3;
+	((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
+
+	for (i = 0; i < (img_size_in_bytes/4); i++) {
+		gk20a_mem_wr32(pacr_ucode_cpuva, i,
+			acr_ucode_data_t210_load[i]);
+	}
+	/*
+	 * In order to execute this binary, we will be using PMU HAL to run
+	 * a bootloader which will load this image into PMU IMEM/DMEM.
+	 * Fill up the bootloader descriptor for PMU HAL to use..
+	 * TODO: Use standard descriptor which the generic bootloader is
+	 * checked in.
+	 */
+
+	bl_dmem_desc.signature[0] = 0;
+	bl_dmem_desc.signature[1] = 0;
+	bl_dmem_desc.signature[2] = 0;
+	bl_dmem_desc.signature[3] = 0;
+	bl_dmem_desc.ctx_dma = GK20A_PMU_DMAIDX_UCODE;
+	bl_dmem_desc.code_dma_base =
+		(unsigned int)(((u64)pacr_ucode_pmu_va >> 8));
+	bl_dmem_desc.non_sec_code_off  = acr_ucode_header_t210_load[0];
+	bl_dmem_desc.non_sec_code_size = acr_ucode_header_t210_load[1];
+	bl_dmem_desc.sec_code_off = acr_ucode_header_t210_load[5];
+	bl_dmem_desc.sec_code_size = acr_ucode_header_t210_load[6];
+	bl_dmem_desc.code_entry_point = 0; /* Start at 0th offset */
+	bl_dmem_desc.data_dma_base =
+				bl_dmem_desc.code_dma_base +
+				((acr_ucode_header_t210_load[2]) >> 8);
+	bl_dmem_desc.data_size = acr_ucode_header_t210_load[3];
+	status = pmu_exec_gen_bl(g, &bl_dmem_desc, 1);
+	if (status != 0) {
+		err = status;
+		goto err_free_ucode_map;
+	}
+	return 0;
+err_free_ucode_map:
+	gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va,
+			img_size_in_bytes, gk20a_mem_flag_none);
+err_free_ucode_sgt:
+	gk20a_free_sgtable(&sgt_pmu_ucode);
+err_free_acr_buf:
+	dma_free_coherent(d, img_size_in_bytes,
+		pacr_ucode_cpuva, iova);
+	return err;
+}
+
+u8 pmu_is_debug_mode_en(struct gk20a *g)
+{
+	int ctl_stat =  gk20a_readl(g, pwr_pmu_scpctl_stat_r());
+	return 1;
+/*TODO return (ctl_stat & pwr_pmu_scpctl_stat_debug_mode_m());*/
+}
+
+/*
+ * @brief Patch signatures into ucode image
+ */
+static int
+acr_ucode_patch_sig(struct gk20a *g,
+		unsigned int *p_img,
+		unsigned int *p_prod_sig,
+		unsigned int *p_dbg_sig,
+		unsigned int *p_patch_loc,
+		unsigned int *p_patch_ind)
+{
+	int i, *p_sig;
+	gm20b_dbg_pmu("");
+
+	if (!pmu_is_debug_mode_en(g)) {
+		p_sig = p_prod_sig;
+		gm20b_dbg_pmu("PRODUCTION MODE\n");
+	} else {
+		p_sig = p_dbg_sig;
+		gm20b_dbg_pmu("DEBUG MODE\n");
+	}
+
+	/* Patching logic:*/
+	for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) {
+		p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)];
+		p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1];
+		p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2];
+		p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3];
+	}
+	return 0;
+}
+
+static int bl_bootstrap(struct pmu_gk20a *pmu,
+	struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz)
+{
+	struct gk20a *g = pmu->g;
+	struct mm_gk20a *mm = &g->mm;
+	struct pmu_ucode_desc *desc = pmu->desc;
+	u32 imem_dst_blk = 0;
+	u32 virt_addr = 0;
+	u32 tag = 0;
+	u32 index = 0;
+	struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
+	u32 *bl_ucode;
+
+	gk20a_dbg_fn("");
+	gk20a_writel(g, pwr_falcon_itfen_r(),
+			gk20a_readl(g, pwr_falcon_itfen_r()) |
+			pwr_falcon_itfen_ctxen_enable_f());
+	gk20a_writel(g, pwr_pmu_new_instblk_r(),
+			pwr_pmu_new_instblk_ptr_f(
+				mm->pmu.inst_block.cpu_pa >> 12) |
+			pwr_pmu_new_instblk_valid_f(1) |
+			pwr_pmu_new_instblk_target_sys_coh_f());
+
+	/* TBD: load all other surfaces */
+	/*copy bootloader interface structure to dmem*/
+	gk20a_writel(g, pwr_falcon_dmemc_r(0),
+			pwr_falcon_dmemc_offs_f(0) |
+			pwr_falcon_dmemc_blk_f(0)  |
+			pwr_falcon_dmemc_aincw_f(1));
+	pmu_copy_to_dmem(pmu, 0, (u8 *)pbl_desc,
+		sizeof(struct flcn_bl_dmem_desc), 0);
+	/*TODO This had to be copied to bl_desc_dmem_load_off, but since
+	 * this is 0, so ok for now*/
+
+	/* Now copy bootloader to TOP of IMEM */
+	imem_dst_blk = (pwr_falcon_hwcfg_imem_size_v(
+			gk20a_readl(g, pwr_falcon_hwcfg_r()))) - bl_sz/256;
+
+	/* Set Auto-Increment on write */
+	gk20a_writel(g, pwr_falcon_imemc_r(0),
+			pwr_falcon_imemc_offs_f(0) |
+			pwr_falcon_imemc_blk_f(imem_dst_blk)  |
+			pwr_falcon_imemc_aincw_f(1));
+	virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
+	tag = virt_addr >> 8; /* tag is always 256B aligned */
+	bl_ucode = (u32 *)(pmu->ucode.cpuva);
+	for (index = 0; index < bl_sz/4; index++) {
+		if ((index % 64) == 0) {
+			gk20a_writel(g, pwr_falcon_imemt_r(0),
+				(tag & 0xffff) << 0);
+			tag++;
+		}
+		gk20a_writel(g, pwr_falcon_imemd_r(0),
+				bl_ucode[index] & 0xffffffff);
+	}
+
+	gk20a_writel(g, pwr_falcon_imemt_r(0), (0 & 0xffff) << 0);
+	gm20b_dbg_pmu("Before starting falcon with BL\n");
+
+	gk20a_writel(g, pwr_falcon_bootvec_r(),
+			pwr_falcon_bootvec_vec_f(virt_addr));
+
+	gk20a_writel(g, pwr_falcon_cpuctl_r(),
+			pwr_falcon_cpuctl_startcpu_f(1));
+
+	gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
+
+	return 0;
+}
+
+int gm20b_init_pmu_setup_hw1(struct gk20a *g, struct flcn_bl_dmem_desc *desc,
+		u32 bl_sz)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	int err;
+
+	gk20a_dbg_fn("");
+	pmu_reset(pmu);
+
+	/* setup apertures - virtual */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+			pwr_fbif_transcfg_mem_type_virtual_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+			pwr_fbif_transcfg_mem_type_virtual_f());
+	/* setup apertures - physical */
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_local_fb_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_coherent_sysmem_f());
+	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+			pwr_fbif_transcfg_mem_type_physical_f() |
+			pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+
+	err = bl_bootstrap(pmu, desc, bl_sz);
+	if (err)
+		return err;
+	return 0;
+}
+
+/*
+* Executes a generic bootloader and wait for PMU to halt.
+* This BL will be used for those binaries that are loaded
+* and executed at times other than RM PMU Binary execution.
+*
+* @param[in] g			gk20a pointer
+* @param[in] desc		Bootloader descriptor
+* @param[in] dma_idx		DMA Index
+* @param[in] b_wait_for_halt	Wait for PMU to HALT
+*/
+int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
+{
+	struct pmu_gk20a *pmu = &g->pmu;
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct device *d = dev_from_gk20a(g);
+	int i, err = 0;
+	struct sg_table *sgt_pmu_ucode;
+	dma_addr_t iova;
+	u32 bl_sz;
+	void *bl_cpuva;
+	u64 bl_pmu_va;
+	const struct firmware *hsbl_fw;
+	struct acr_gm20b *acr = &g->acr;
+	struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
+	u32 *pmu_bl_gm10x = NULL;
+	DEFINE_DMA_ATTRS(attrs);
+	gm20b_dbg_pmu("");
+
+	hsbl_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_BL_UCODE_IMAGE);
+	if (!hsbl_fw) {
+		gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
+		return -ENOENT;
+	}
+	acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data;
+	acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data +
+		acr->bl_bin_hdr->header_offset);
+	pmu_bl_gm10x_desc = acr->pmu_hsbl_desc;
+	pmu_bl_gm10x = (u32 *)(hsbl_fw->data + acr->bl_bin_hdr->data_offset);
+	bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size,
+			256);
+	gm20b_dbg_pmu("Executing Generic Bootloader\n");
+
+	/*TODO in code verify that enable PMU is done, scrubbing etc is done*/
+	/*TODO in code verify that gmmu vm init is done*/
+	/*
+	 * Disable interrupts to avoid kernel hitting breakpoint due
+	 * to PMU halt
+	 */
+
+	gk20a_writel(g, pwr_falcon_irqsclr_r(),
+		gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
+
+	dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
+	bl_cpuva = dma_alloc_attrs(d, bl_sz,
+			&iova,
+			GFP_KERNEL,
+			&attrs);
+	gm20b_dbg_pmu("bl size is %x\n", bl_sz);
+	if (!bl_cpuva) {
+		gk20a_err(d, "failed to allocate memory\n");
+		err = -ENOMEM;
+		goto err_done;
+	}
+
+	err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
+			bl_cpuva,
+			iova,
+			bl_sz);
+	if (err) {
+		gk20a_err(d, "failed to allocate sg table\n");
+		goto err_free_cpu_va;
+	}
+
+	bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
+			bl_sz,
+			0, /* flags */
+			gk20a_mem_flag_read_only);
+	if (!bl_pmu_va) {
+		gk20a_err(d, "failed to map pmu ucode memory!!");
+		goto err_free_ucode_sgt;
+	}
+
+	for (i = 0; i < (bl_sz) >> 2; i++)
+		gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]);
+	gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
+	pmu->ucode.cpuva = bl_cpuva;
+	pmu->ucode.pmu_va = bl_pmu_va;
+	gm20b_init_pmu_setup_hw1(g, desc, bl_sz);
+	/* Poll for HALT */
+	if (b_wait_for_halt) {
+		err = pmu_wait_for_halt(g, GPU_TIMEOUT_DEFAULT);
+		if (err == 0)
+			/* Clear the HALT interrupt */
+			gk20a_writel(g, pwr_falcon_irqsclr_r(),
+			gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
+		else
+			goto err_unmap_bl;
+	}
+	gm20b_dbg_pmu("after waiting for halt, err %x\n", err);
+	gm20b_dbg_pmu("err reg :%x\n", readl(mc +
+		MC_ERR_GENERALIZED_CARVEOUT_STATUS_0));
+	gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
+		pwr_falcon_mmu_phys_sec_r()));
+	gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
+	start_gm20b_pmu(g);
+	err = 0;
+err_unmap_bl:
+	gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
+			bl_sz, gk20a_mem_flag_none);
+err_free_ucode_sgt:
+	gk20a_free_sgtable(&sgt_pmu_ucode);
+err_free_cpu_va:
+	dma_free_attrs(d, bl_sz,
+			bl_cpuva, iova, &attrs);
+err_done:
+	return err;
+}
+
+/*!
+*	Wait for PMU to halt
+*	@param[in]	g		GPU object pointer
+*	@param[in]	timeout_us	Timeout in Us for PMU to halt
+*	@return '0' if PMU halts
+*/
+int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout)
+{
+	u32 data = 0;
+	udelay(10);
+	data = gk20a_readl(g, pwr_falcon_cpuctl_r());
+	gm20b_dbg_pmu("bef while cpuctl %xi, timeout %d\n", data, timeout);
+	while (timeout != 0) {
+		data = gk20a_readl(g, pwr_falcon_cpuctl_r());
+		if (data & pwr_falcon_cpuctl_halt_intr_m())
+			/*CPU is halted break*/
+			break;
+		timeout--;
+		udelay(1);
+	}
+	if (timeout == 0)
+		return -EBUSY;
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
new file mode 100644
index 00000000..e0dd50d0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -0,0 +1,377 @@
+/*
+ * GM20B ACR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ACR_GM20B_H_
+#define __ACR_GM20B_H_
+#include "gk20a/gk20a.h"
+#include "mm_gm20b.h"
+
+/*Defines*/
+
+/*chip specific defines*/
+#define MAX_SUPPORTED_LSFM 1 /*PMU, FECS, GPCCS*/
+#define LSF_UCODE_DATA_ALIGNMENT 4096
+
+#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode.bin"
+#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
+#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
+
+#define LSFM_DISABLE_MASK_NONE (0x00000000) /*Disable all LS falcons*/
+#define LSFM_DISABLE_MASK_ALL (0xFFFFFFFF) /*Enable all LS falcons*/
+
+#define PMU_SECURE_MODE (0x1)
+#define PMU_LSFM_MANAGED (0x2)
+
+/*ACR load related*/
+/*!
+ * Supporting maximum of 2 regions.
+ * This is needed to pre-allocate space in DMEM
+ */
+#define T210_FLCN_ACR_MAX_REGIONS                  (2)
+#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE   (0x200)
+
+/*!
+ * Falcon Id Defines
+ * Defines a common Light Secure Falcon identifier.
+ */
+#define LSF_FALCON_ID_PMU       (0)
+#define LSF_FALCON_ID_FECS      (2)
+#define LSF_FALCON_ID_GPCCS     (3)
+#define LSF_FALCON_ID_INVALID   (0xFFFFFFFF)
+
+/*!
+ * Bootstrap Owner Defines
+ */
+#define LSF_BOOTSTRAP_OWNER_DEFAULT (LSF_FALCON_ID_PMU)
+
+/*!
+ * Image Status Defines
+ */
+#define LSF_IMAGE_STATUS_NONE               (0)
+#define LSF_IMAGE_STATUS_COPY               (1)
+#define LSF_IMAGE_STATUS_VALIDATION         (2)
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY    (3)
+
+/*LSB header related defines*/
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        1
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        4
+
+/*!
+ * Light Secure WPR Content Alignments
+ */
+#define LSF_LSB_HEADER_ALIGNMENT    256
+#define LSF_BL_DATA_ALIGNMENT       256
+#define LSF_BL_DATA_SIZE_ALIGNMENT  256
+#define LSF_BL_CODE_SIZE_ALIGNMENT  256
+
+/*!
+ * Falcon UCODE header index.
+ */
+#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND              (0)
+#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND             (1)
+#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND              (2)
+#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND             (3)
+#define FLCN_NL_UCODE_HDR_NUM_APPS_IND                 (4)
+/*!
+ * There are total N number of Apps with code and offset defined in UCODE header
+ * This macro provides the CODE and DATA offset and size of Ath application.
+ */
+#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND           (5)
+#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2))
+#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2) + 1)
+#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (N*2) - 1)
+
+#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1)
+#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2))
+#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2) + 1)
+#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (N*2) - 1)
+
+#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1)
+#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2)
+
+/*Externs*/
+
+/*Structs*/
+
+/*!
+ * Light Secure Falcon Ucode Description Defines
+ * This stucture is prelim and may change as the ucode signing flow evolves.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/*!
+ * Light Secure WPR Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ *
+ * falcon_id       - LS falcon ID
+ * lsb_offset      - Offset into WPR region holding LSB header
+ * bootstrap_owner - Bootstrap OWNER (either PMU or SEC2)
+ * lazy_bootstrap - Skip bootstrapping by ACR
+ * status         - Bootstrapping status
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+	u32  lazy_bootstrap;
+	u32  status;
+};
+
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 flags;
+};
+
+/*!
+ * Structure used by the boot-loader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ *
+ * signature         - 16B signature for secure code. 0s if no secure code
+ * ctx_dma           - CtxDma to be used by BL while loading code/data
+ * code_dma_base     - 256B aligned Physical FB Address where code is located
+ * non_sec_code_off  - Offset from code_dma_base where the nonSecure code is
+ *                     located. The offset must be multiple of 256 to help perf
+ * non_sec_code_size - The size of the nonSecure code part.
+ * sec_code_size     - Offset from code_dma_base where the secure code is
+ *                     located. The offset must be multiple of 256 to help perf
+ * code_entry_point  - Code entry point which will be invoked by BL after
+ *			code is loaded.
+ * data_dma_base     - 256B aligned Physical FB Address where data is located.
+ * data_size         - Size of data block. Should be multiple of 256B
+ */
+struct flcn_bl_dmem_desc {
+	u32    signature[4];        /*Should be the first element..*/
+	u32    ctx_dma;
+	u32    code_dma_base;
+	u32    non_sec_code_off;
+	u32    non_sec_code_size;
+	u32    sec_code_off;
+	u32    sec_code_size;
+	u32    code_entry_point;
+	u32    data_dma_base;
+	u32    data_size;
+};
+
+/*!
+ * Legacy structure used by the current PMU/DPU bootloader.
+ */
+struct loader_config {
+	u32 dma_idx;
+	u32 code_dma_base;     /*<! upper 32-bits of 40-bit dma address*/
+	u32 code_size_total;
+	u32 code_size_to_load;
+	u32 code_entry_point;
+	u32 data_dma_base;     /*<! upper 32-bits of 40-bit dma address*/
+	u32 data_size;        /*<! initialized data of the application */
+	u32 overlay_dma_base;  /*<! upper 32-bits of the 40-bit dma address*/
+	u32 argc;
+	u32 argv;
+};
+
+/*!
+ * Union of all supported structures used by bootloaders.
+ */
+union flcn_bl_generic_desc {
+	struct flcn_bl_dmem_desc bl_dmem_desc;
+	struct loader_config loader_cfg;
+};
+
+struct flcn_ucode_img {
+	u32 *header; /*only some falcons have header*/
+	u32 *data;
+	struct pmu_ucode_desc *desc;  /*only some falcons have descriptor*/
+	u32 data_size;
+	void *fw_ver; /*NV2080_CTRL_GPU_GET_FIRMWARE_VERSION_PARAMS struct*/
+	u8 load_entire_os_data; /* load the whole osData section at boot time.*/
+	struct lsf_ucode_desc *lsf_desc; /* NULL if not a light secure falcon.*/
+	u8 free_res_allocs;/*True if there a resources to freed by the client.*/
+	u32 flcn_inst;
+};
+
+/*!
+ * LSFM Managed Ucode Image
+ * next             : Next image the list, NULL if last.
+ * wpr_header         : WPR header for this ucode image
+ * lsb_header         : LSB header for this ucode image
+ * bl_gen_desc     : Bootloader generic desc structure for this ucode image
+ * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
+ * full_ucode_size  : Surface size required for final ucode image
+ * ucode_img        : Ucode image info
+ */
+struct lsfm_managed_ucode_img {
+	struct lsfm_managed_ucode_img *next;
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+	union flcn_bl_generic_desc bl_gen_desc;
+	u32 bl_gen_desc_size;
+	u32 full_ucode_size;
+	struct flcn_ucode_img ucode_img;
+};
+
+struct ls_flcn_mgr {
+	u16 managed_flcn_cnt;
+	u32 wpr_size;
+	u32 disable_mask;
+	struct lsfm_managed_ucode_img *ucode_img_list;
+	void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
+};
+
+/*ACR related structs*/
+/*!
+ * start_addr     - Starting address of region
+ * end_addr       - Ending address of region
+ * region_id      - Region ID
+ * read_mask      - Read Mask
+ * write_mask     - WriteMask
+ * client_mask    - Bit map of all clients currently using this region
+ */
+struct flcn_acr_region_prop {
+	u32   start_addr;
+	u32   end_addr;
+	u32   region_id;
+	u32   read_mask;
+	u32   write_mask;
+	u32   client_mask;
+};
+
+/*!
+ * no_regions   - Number of regions used.
+ * region_props   - Region properties
+ */
+struct flcn_acr_regions {
+	u32                     no_regions;
+	struct flcn_acr_region_prop   region_props[T210_FLCN_ACR_MAX_REGIONS];
+};
+
+/*!
+ * reserved_dmem-When the bootstrap owner has done bootstrapping other falcons,
+ *                and need to switch into LS mode, it needs to have its own
+ *                actual DMEM image copied into DMEM as part of LS setup. If
+ *                ACR desc is at location 0, it will definitely get overwritten
+ *                causing data corruption. Hence we are reserving 0x200 bytes
+ *                to give room for any loading data. NOTE: This has to be the
+ *                first member always
+ * signature    - Signature of ACR ucode.
+ * wpr_region_id - Region ID holding the WPR header and its details
+ * wpr_offset    - Offset from the WPR region holding the wpr header
+ * regions       - Region descriptors
+ * nonwpr_ucode_blob_start -stores non-WPR start where kernel stores ucode blob
+ * nonwpr_ucode_blob_end   -stores non-WPR end where kernel stores ucode blob
+ */
+struct flcn_acr_desc {
+	u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
+	/*Always 1st*/
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	struct flcn_acr_regions regions;
+	u32 nonwpr_ucode_blob_start;
+	u32 nonwpr_ucode_blob_size;
+};
+
+/*!
+ * The header used by RM to figure out code and data sections of bootloader.
+ *
+ * bl_code_off        - Offset of code section in the image
+ * bl_code_size          - Size of code section in the image
+ * bl_data_off        - Offset of data section in the image
+ * bl_data_size          - Size of data section in the image
+ */
+struct hsflcn_bl_img_hdr {
+	u32 bl_code_off;
+	u32 bl_code_size;
+	u32 bl_data_off;
+	u32 bl_data_size;
+};
+
+/*!
+ * The descriptor used by RM to figure out the requirements of boot loader.
+ *
+ * bl_start_tag          - Starting tag of bootloader
+ * bl_desc_dmem_load_off   - Dmem offset where _def_rm_flcn_bl_dmem_desc
+ to be loaded
+ * bl_img_hdr         - Description of the image
+ */
+struct hsflcn_bl_desc {
+	u32 bl_start_tag;
+	u32 bl_desc_dmem_load_off;
+	struct hsflcn_bl_img_hdr bl_img_hdr;
+};
+
+struct bin_hdr {
+	u32 bin_magic;      /* 0x10de */
+	u32 bin_ver;          /* versioning of bin format */
+	u32 bin_size;         /* entire image size including this header */
+	u32 header_offset; /* Header offset of executable binary metadata,
+				start @ offset- 0x100 */
+	u32 data_offset; /* Start of executable binary data, start @
+				offset- 0x200 */
+	u32 data_size; /* Size ofexecutable binary */
+};
+
+struct acr_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset; /*this header points to acr_ucode_header_t210_load*/
+	u32 hdr_size; /*size of above header*/
+};
+
+struct acr_gm20b {
+	u64 ucode_blob_start;
+	u32 ucode_blob_size;
+	struct bin_hdr *bl_bin_hdr;
+	struct hsflcn_bl_desc *pmu_hsbl_desc;
+	struct bin_hdr *hsbin_hdr;
+	struct acr_fw_header *fw_hdr;
+};
+
+void gm20b_init_secure_pmu(struct gpu_ops *gops);
+int prepare_ucode_blob(struct gk20a *g);
+int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int gm20b_bootstrap_hs_flcn(struct gk20a *g);
+int gm20b_pmu_setup_sw(struct gk20a *g);
+int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
+int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_us);
+#endif /*__ACR_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 0fc5fe99..0d6b0447 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -25,6 +25,7 @@
 #include "fifo_gm20b.h"
 #include "gr_ctx_gm20b.h"
 #include "mm_gm20b.h"
+#include "pmu_gm20b.h"
 
 struct gpu_ops gm20b_ops = {
 	.clock_gating = {
@@ -51,6 +52,7 @@ int gm20b_init_hal(struct gpu_ops *gops)
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
 	gm20b_init_mm(gops);
+	gm20b_init_pmu_ops(gops);
 	gops->name = "gm20b";
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
index 39259516..bf0b1ffd 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -202,4 +202,24 @@ static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 fb_mmu_vpr_info_r(void)
+{
+	return 0x00100cd0;
+}
+static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
+{
+	return 0x00000001;
+}
 #endif
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index a4ae1ec0..eb6cf4ad 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -322,6 +322,14 @@ static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
 {
 	return (v & 0x1) << 1;
 }
+static inline u32 gr_fecs_cpuctl_alias_r(void)
+{
+	return 0x00409130;
+}
+static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
 static inline u32 gr_fecs_dmactl_r(void)
 {
 	return 0x0040910c;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
index 3af9cda8..384a9ab5 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
@@ -290,6 +290,86 @@ static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
 {
 	return (v & 0x1) << 1;
 }
+static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_alias_r(void)
+{
+	return 0x0010a130;
+}
+static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_pmu_scpctl_stat_r(void)
+{
+	return 0x0010ac08;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 pwr_falcon_imemc_r(u32 i)
+{
+	return 0x0010a180 + i*16;
+}
+static inline u32 pwr_falcon_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_imemd_r(u32 i)
+{
+	return 0x0010a184 + i*16;
+}
+static inline u32 pwr_falcon_imemt_r(u32 i)
+{
+	return 0x0010a188 + i*16;
+}
+static inline u32 pwr_falcon_sctl_r(void)
+{
+	return 0x0010a240;
+}
+static inline u32 pwr_falcon_mmu_phys_sec_r(void)
+{
+	return 0x00100ce4;
+}
 static inline u32 pwr_falcon_bootvec_r(void)
 {
 	return 0x0010a104;
diff --git a/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h b/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
new file mode 100644
index 00000000..a9273a62
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
@@ -0,0 +1,22 @@
+/*
+ * GM20B MC registers used by ACR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _MC_CARVEOUT_REG_H_
+#define  _MC_CARVEOUT_REG_H_
+
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT3_BOM_0		0xcac
+#define MC_ERR_GENERALIZED_CARVEOUT_STATUS_0		0xc00
+#endif /*_MC_CARVEOUT_REG_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 67d61569..2c211a57 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -13,9 +13,11 @@
  * more details.
  */
 
+#include <linux/pm_runtime.h>
 #include "gk20a/gk20a.h"
 #include "mm_gm20b.h"
 #include "hw_gmmu_gm20b.h"
+#include "hw_fb_gm20b.h"
 
 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
@@ -24,8 +26,8 @@ static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
 
 static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
-				   enum gmmu_pgsz_gk20a pgsz_idx,
-				   u64 first_vaddr, u64 last_vaddr)
+				enum gmmu_pgsz_gk20a pgsz_idx,
+				u64 first_vaddr, u64 last_vaddr)
 {
 	int err;
 	u32 pte_lo, pte_hi;
@@ -39,10 +41,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	gk20a_dbg_fn("");
 
 	pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
-				   &pde_lo, &pde_hi);
+					&pde_lo, &pde_hi);
 
 	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
-		   pgsz_idx, pde_lo, pde_hi);
+			pgsz_idx, pde_lo, pde_hi);
 
 	/* Expect ptes of the same pde */
 	BUG_ON(pde_lo != pde_hi);
@@ -185,7 +187,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
 			vaddr_pde_start = (u64)i << pde_shift;
 			allocate_gmmu_ptes_sparse(vm, pgsz_idx,
 				vaddr_pde_start,
-				PDE_ADDR_END(vaddr_pde_start, pde_shift));
+				PDE_ADDR_END(vaddr_pde_start,
+				pde_shift));
 		} else {
 			/* Check leading and trailing spaces which doesn't fit
 			 * into entire pde. */
@@ -212,6 +215,56 @@ fail:
 	return err;
 }
 
+static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
+		const unsigned int msec)
+{
+	unsigned long timeout;
+
+	if (tegra_platform_is_silicon())
+		timeout = jiffies + msecs_to_jiffies(msec);
+	else
+		timeout = msecs_to_jiffies(msec);
+
+	while (1) {
+		u32 val;
+		val = gk20a_readl(g, fb_mmu_vpr_info_r());
+		if (fb_mmu_vpr_info_fetch_v(val) ==
+				fb_mmu_vpr_info_fetch_false_v())
+			break;
+		if (tegra_platform_is_silicon()) {
+			if (WARN_ON(time_after(jiffies, timeout)))
+				return -ETIME;
+		} else if (--timeout == 0)
+			return -ETIME;
+	}
+	return 0;
+}
+
+int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
+{
+	int ret = 0;
+
+	gk20a_busy_noresume(g->dev);
+#ifdef CONFIG_PM_RUNTIME
+	if (!pm_runtime_active(&g->dev->dev))
+		goto fail;
+#endif
+
+	if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
+		ret = -ETIME;
+		goto fail;
+	}
+
+	gk20a_writel(g, fb_mmu_vpr_info_r(),
+			fb_mmu_vpr_info_fetch_true_v());
+
+	ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
+
+fail:
+	gk20a_idle(g->dev);
+	return ret;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.set_sparse = gm20b_vm_put_sparse;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
index 0f94d2bf..6939fc1a 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -19,6 +19,8 @@ struct gk20a;
 
 #define PDE_ADDR_START(x, y)	((x) &  ~((0x1UL << (y)) - 1))
 #define PDE_ADDR_END(x, y)	((x) | ((0x1UL << (y)) - 1))
+#define VPR_INFO_FETCH_WAIT	(5)
 
 void gm20b_init_mm(struct gpu_ops *gops);
+int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
 #endif
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
new file mode 100644
index 00000000..4b42b838
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -0,0 +1,26 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "acr_gm20b.h"
+
+void gm20b_init_pmu_ops(struct gpu_ops *gops)
+{
+#ifdef CONFIG_TEGRA_ACR
+	gm20b_init_secure_pmu(gops);
+#else
+	gk20a_init_pmu_ops(gops);
+#endif
+}
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
new file mode 100644
index 00000000..d36d3803
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -0,0 +1,19 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PMU_GM20B_H_
+#define __PMU_GM20B_H_
+void gm20b_init_pmu_ops(struct gpu_ops *gops);
+#endif /*__PMU_GM20B_H_*/
-- 
cgit v1.2.2