28 files changed, 8728 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
new file mode 100644
index 00000000..a39cdf2c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -0,0 +1,1444 @@
+/*
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/types.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/acr/nvgpu_acr.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/mm.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "mm_gm20b.h"
+#include "acr_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
+/*Defines*/
+#define gm20b_dbg_pmu(fmt, arg...) \
+        gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata);
+/*Externs*/
+/*Forwards*/
+static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+static int lsfm_discover_ucode_images(struct gk20a *g,
+        struct ls_flcn_mgr *plsfm);
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+        struct flcn_ucode_img *ucode_image, u32 falcon_id);
+static void lsfm_free_ucode_img_res(struct gk20a *g,
+                                    struct flcn_ucode_img *p_img);
+static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g,
+                                           struct flcn_ucode_img *p_img);
+static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
+static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+        struct nvgpu_mem *nonwpr);
+static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm);
+/*Globals*/
+static get_ucode_details pmu_acr_supp_ucode_list[] = {
+        pmu_ucode_details,
+        fecs_ucode_details,
+        gpccs_ucode_details,
+};
+/*Once is LS mode, cpuctl_alias is only accessible*/
+static void start_gm20b_pmu(struct gk20a *g)
+{
+        /*disable irqs for hs falcon booting as we will poll for halt*/
+        nvgpu_mutex_acquire(&g->pmu.isr_mutex);
+        pmu_enable_irq(&g->pmu, true);
+        g->pmu.isr_enabled = true;
+        nvgpu_mutex_release(&g->pmu.isr_mutex);
+        gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
+                pwr_falcon_cpuctl_startcpu_f(1));
+}
+void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
+{
+        g->ops.fb.read_wpr_info(g, inf);
+}
+bool gm20b_is_pmu_supported(struct gk20a *g)
+{
+        return true;
+}
+static int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
+{
+        struct nvgpu_firmware *pmu_fw, *pmu_desc, *pmu_sig;
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct lsf_ucode_desc *lsf_desc;
+        int err;
+        gm20b_dbg_pmu("requesting PMU ucode in GM20B\n");
+        pmu_fw = nvgpu_request_firmware(g, GM20B_PMU_UCODE_IMAGE, 0);
+        if (!pmu_fw) {
+                nvgpu_err(g, "failed to load pmu ucode!!");
+                return -ENOENT;
+        }
+        g->acr.pmu_fw = pmu_fw;
+        gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation");
+        gm20b_dbg_pmu("requesting PMU ucode desc in GM20B\n");
+        pmu_desc = nvgpu_request_firmware(g, GM20B_PMU_UCODE_DESC, 0);
+        if (!pmu_desc) {
+                nvgpu_err(g, "failed to load pmu ucode desc!!");
+                err = -ENOENT;
+                goto release_img_fw;
+        }
+        pmu_sig = nvgpu_request_firmware(g, GM20B_PMU_UCODE_SIG, 0);
+        if (!pmu_sig) {
+                nvgpu_err(g, "failed to load pmu sig!!");
+                err = -ENOENT;
+                goto release_desc;
+        }
+        pmu->desc = (struct pmu_ucode_desc *)pmu_desc->data;
+        pmu->ucode_image = (u32 *)pmu_fw->data;
+        g->acr.pmu_desc = pmu_desc;
+        err = nvgpu_init_pmu_fw_support(pmu);
+        if (err) {
+                gm20b_dbg_pmu("failed to set function pointers\n");
+                goto release_sig;
+        }
+        lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
+        if (!lsf_desc) {
+                err = -ENOMEM;
+                goto release_sig;
+        }
+        memcpy(lsf_desc, (void *)pmu_sig->data, sizeof(struct lsf_ucode_desc));
+        lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
+        p_img->desc = pmu->desc;
+        p_img->data = pmu->ucode_image;
+        p_img->data_size = pmu->desc->image_size;
+        p_img->fw_ver = NULL;
+        p_img->header = NULL;
+        p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
+        gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
+        nvgpu_release_firmware(g, pmu_sig);
+        return 0;
+release_sig:
+        nvgpu_release_firmware(g, pmu_sig);
+release_desc:
+        nvgpu_release_firmware(g, pmu_desc);
+        g->acr.pmu_desc = NULL;
+release_img_fw:
+        nvgpu_release_firmware(g, pmu_fw);
+        g->acr.pmu_fw = NULL;
+        return err;
+}
+static int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
+{
+        struct lsf_ucode_desc *lsf_desc;
+        struct nvgpu_firmware *fecs_sig;
+        int err;
+        fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0);
+        if (!fecs_sig) {
+                nvgpu_err(g, "failed to load fecs sig");
+                return -ENOENT;
+        }
+        lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
+        if (!lsf_desc) {
+                err = -ENOMEM;
+                goto rel_sig;
+        }
+        memcpy(lsf_desc, (void *)fecs_sig->data, sizeof(struct lsf_ucode_desc));
+        lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
+        p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
+        if (p_img->desc == NULL) {
+                err = -ENOMEM;
+                goto free_lsf_desc;
+        }
+        p_img->desc->bootloader_start_offset =
+                g->ctxsw_ucode_info.fecs.boot.offset;
+        p_img->desc->bootloader_size =
+                ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
+        p_img->desc->bootloader_imem_offset =
+                g->ctxsw_ucode_info.fecs.boot_imem_offset;
+        p_img->desc->bootloader_entry_point =
+                g->ctxsw_ucode_info.fecs.boot_entry;
+        p_img->desc->image_size =
+                ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
+                ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
+                ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
+        p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
+                ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
+        p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
+        p_img->desc->app_imem_offset = 0;
+        p_img->desc->app_imem_entry = 0;
+        p_img->desc->app_dmem_offset = 0;
+        p_img->desc->app_resident_code_offset = 0;
+        p_img->desc->app_resident_code_size =
+                g->ctxsw_ucode_info.fecs.code.size;
+        p_img->desc->app_resident_data_offset =
+                g->ctxsw_ucode_info.fecs.data.offset -
+                g->ctxsw_ucode_info.fecs.code.offset;
+        p_img->desc->app_resident_data_size =
+                g->ctxsw_ucode_info.fecs.data.size;
+        p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
+        p_img->data_size = p_img->desc->image_size;
+        p_img->fw_ver = NULL;
+        p_img->header = NULL;
+        p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
+        gm20b_dbg_pmu("fecs fw loaded\n");
+        nvgpu_release_firmware(g, fecs_sig);
+        return 0;
+free_lsf_desc:
+        nvgpu_kfree(g, lsf_desc);
+rel_sig:
+        nvgpu_release_firmware(g, fecs_sig);
+        return err;
+}
+static int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
+{
+        struct lsf_ucode_desc *lsf_desc;
+        struct nvgpu_firmware *gpccs_sig;
+        int err;
+        if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS))
+                return -ENOENT;
+        gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, 0);
+        if (!gpccs_sig) {
+                nvgpu_err(g, "failed to load gpccs sig");
+                return -ENOENT;
+        }
+        lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
+        if (!lsf_desc) {
+                err = -ENOMEM;
+                goto rel_sig;
+        }
+        memcpy(lsf_desc, (void *)gpccs_sig->data,
+                sizeof(struct lsf_ucode_desc));
+        lsf_desc->falcon_id = LSF_FALCON_ID_GPCCS;
+        p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
+        if (p_img->desc == NULL) {
+                err = -ENOMEM;
+                goto free_lsf_desc;
+        }
+        p_img->desc->bootloader_start_offset =
+                0;
+        p_img->desc->bootloader_size =
+                ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256);
+        p_img->desc->bootloader_imem_offset =
+                g->ctxsw_ucode_info.gpccs.boot_imem_offset;
+        p_img->desc->bootloader_entry_point =
+                g->ctxsw_ucode_info.gpccs.boot_entry;
+        p_img->desc->image_size =
+                ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) +
+                ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) +
+                ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+        p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256)
+                + ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+        p_img->desc->app_start_offset = p_img->desc->bootloader_size;
+        p_img->desc->app_imem_offset = 0;
+        p_img->desc->app_imem_entry = 0;
+        p_img->desc->app_dmem_offset = 0;
+        p_img->desc->app_resident_code_offset = 0;
+        p_img->desc->app_resident_code_size =
+                ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256);
+        p_img->desc->app_resident_data_offset =
+                ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) -
+                ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256);
+        p_img->desc->app_resident_data_size =
+                ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256);
+        p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va +
+                g->ctxsw_ucode_info.gpccs.boot.offset);
+        p_img->data_size = ALIGN(p_img->desc->image_size, 256);
+        p_img->fw_ver = NULL;
+        p_img->header = NULL;
+        p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
+        gm20b_dbg_pmu("gpccs fw loaded\n");
+        nvgpu_release_firmware(g, gpccs_sig);
+        return 0;
+free_lsf_desc:
+        nvgpu_kfree(g, lsf_desc);
+rel_sig:
+        nvgpu_release_firmware(g, gpccs_sig);
+        return err;
+}
+bool gm20b_is_lazy_bootstrap(u32 falcon_id)
+{
+        bool enable_status = false;
+        switch (falcon_id) {
+        case LSF_FALCON_ID_FECS:
+                enable_status = false;
+                break;
+        case LSF_FALCON_ID_GPCCS:
+                enable_status = false;
+                break;
+        default:
+                break;
+        }
+        return enable_status;
+}
+bool gm20b_is_priv_load(u32 falcon_id)
+{
+        bool enable_status = false;
+        switch (falcon_id) {
+        case LSF_FALCON_ID_FECS:
+                enable_status = false;
+                break;
+        case LSF_FALCON_ID_GPCCS:
+                enable_status = false;
+                break;
+        default:
+                break;
+        }
+        return enable_status;
+}
+int gm20b_alloc_blob_space(struct gk20a *g,
+                size_t size, struct nvgpu_mem *mem)
+{
+        int err;
+        err = nvgpu_dma_alloc_sys(g, size, mem);
+        return err;
+}
+int prepare_ucode_blob(struct gk20a *g)
+{
+        int err;
+        struct ls_flcn_mgr lsfm_l, *plsfm;
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct wpr_carveout_info wpr_inf;
+        if (g->acr.ucode_blob.cpu_va) {
+                /*Recovery case, we do not need to form
+                non WPR blob of ucodes*/
+                err = nvgpu_init_pmu_fw_support(pmu);
+                if (err) {
+                        gm20b_dbg_pmu("failed to set function pointers\n");
+                        return err;
+                }
+                return 0;
+        }
+        plsfm = &lsfm_l;
+        memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
+        gm20b_dbg_pmu("fetching GMMU regs\n");
+        g->ops.fb.vpr_info_fetch(g);
+        gr_gk20a_init_ctxsw_ucode(g);
+        g->ops.pmu.get_wpr(g, &wpr_inf);
+        gm20b_dbg_pmu("wpr carveout base:%llx\n", wpr_inf.wpr_base);
+        gm20b_dbg_pmu("wpr carveout size :%llx\n", wpr_inf.size);
+        /* Discover all managed falcons*/
+        err = lsfm_discover_ucode_images(g, plsfm);
+        gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
+        if (err)
+                goto free_sgt;
+        if (plsfm->managed_flcn_cnt && !g->acr.ucode_blob.cpu_va) {
+                /* Generate WPR requirements*/
+                err = lsf_gen_wpr_requirements(g, plsfm);
+                if (err)
+                        goto free_sgt;
+                /*Alloc memory to hold ucode blob contents*/
+                err = g->ops.pmu.alloc_blob_space(g, plsfm->wpr_size
+                                , &g->acr.ucode_blob);
+                if (err)
+                        goto free_sgt;
+                gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
+                        plsfm->managed_flcn_cnt, plsfm->wpr_size);
+                lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
+        } else {
+                gm20b_dbg_pmu("LSFM is managing no falcons.\n");
+        }
+        gm20b_dbg_pmu("prepare ucode blob return 0\n");
+        free_acr_resources(g, plsfm);
+free_sgt:
+        return err;
+}
+static u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+        u32 falcon_id)
+{
+        return (plsfm->disable_mask >> falcon_id) & 0x1;
+}
+/* Discover all managed falcon ucode images */
+static int lsfm_discover_ucode_images(struct gk20a *g,
+        struct ls_flcn_mgr *plsfm)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct flcn_ucode_img ucode_img;
+        u32 falcon_id;
+        u32 i;
+        int status;
+        /* LSFM requires a secure PMU, discover it first.*/
+        /* Obtain the PMU ucode image and add it to the list if required*/
+        memset(&ucode_img, 0, sizeof(ucode_img));
+        status = pmu_ucode_details(g, &ucode_img);
+        if (status)
+                return status;
+        /* The falon_id is formed by grabbing the static base
+         * falon_id from the image and adding the
+         * engine-designated falcon instance.*/
+        pmu->pmu_mode |= PMU_SECURE_MODE;
+        falcon_id = ucode_img.lsf_desc->falcon_id +
+                ucode_img.flcn_inst;
+        if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
+                pmu->falcon_id = falcon_id;
+                if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
+                        pmu->falcon_id) == 0)
+                        pmu->pmu_mode |= PMU_LSFM_MANAGED;
+                plsfm->managed_flcn_cnt++;
+        } else {
+                gm20b_dbg_pmu("id not managed %d\n",
+                        ucode_img.lsf_desc->falcon_id);
+        }
+        /*Free any ucode image resources if not managing this falcon*/
+        if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
+                gm20b_dbg_pmu("pmu is not LSFM managed\n");
+                lsfm_free_ucode_img_res(g, &ucode_img);
+        }
+        /* Enumerate all constructed falcon objects,
+         as we need the ucode image info and total falcon count.*/
+        /*0th index is always PMU which is already handled in earlier
+        if condition*/
+        for (i = 1; i < (MAX_SUPPORTED_LSFM); i++) {
+                memset(&ucode_img, 0, sizeof(ucode_img));
+                if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
+                        if (ucode_img.lsf_desc != NULL) {
+                                /* We have engine sigs, ensure that this falcon
+                                is aware of the secure mode expectations
+                                (ACR status)*/
+                                /* falon_id is formed by grabbing the static
+                                base falonId from the image and adding the
+                                engine-designated falcon instance. */
+                                falcon_id = ucode_img.lsf_desc->falcon_id +
+                                        ucode_img.flcn_inst;
+                                if (!lsfm_falcon_disabled(g, plsfm,
+                                        falcon_id)) {
+                                        /* Do not manage non-FB ucode*/
+                                        if (lsfm_add_ucode_img(g,
+                                                plsfm, &ucode_img, falcon_id)
+                                                == 0)
+                                                plsfm->managed_flcn_cnt++;
+                                } else {
+                                        gm20b_dbg_pmu("not managed %d\n",
+                                                ucode_img.lsf_desc->falcon_id);
+                                        lsfm_free_nonpmu_ucode_img_res(g,
+                                                &ucode_img);
+                                }
+                        }
+                } else {
+                        /* Consumed all available falcon objects */
+                        gm20b_dbg_pmu("Done checking for ucodes %d\n", i);
+                        break;
+                }
+        }
+        return 0;
+}
+int gm20b_pmu_populate_loader_cfg(struct gk20a *g,
+        void *lsfm, u32 *p_bl_gen_desc_size)
+{
+        struct wpr_carveout_info wpr_inf;
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct lsfm_managed_ucode_img *p_lsfm =
+                        (struct lsfm_managed_ucode_img *)lsfm;
+        struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
+        struct loader_config *ldr_cfg = &(p_lsfm->bl_gen_desc.loader_cfg);
+        u64 addr_base;
+        struct pmu_ucode_desc *desc;
+        u64 addr_code, addr_data;
+        u32 addr_args;
+        if (p_img->desc == NULL) /*This means its a header based ucode,
+                                  and so we do not fill BL gen desc structure*/
+                return -EINVAL;
+        desc = p_img->desc;
+        /*
+         Calculate physical and virtual addresses for various portions of
+         the PMU ucode image
+         Calculate the 32-bit addresses for the application code, application
+         data, and bootloader code. These values are all based on IM_BASE.
+         The 32-bit addresses will be the upper 32-bits of the virtual or
+         physical addresses of each respective segment.
+        */
+        addr_base = p_lsfm->lsb_header.ucode_off;
+        g->ops.pmu.get_wpr(g, &wpr_inf);
+        addr_base += wpr_inf.wpr_base;
+        gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
+        /*From linux*/
+        addr_code = u64_lo32((addr_base +
+                                desc->app_start_offset +
+                                desc->app_resident_code_offset) >> 8);
+        gm20b_dbg_pmu("app start %d app res code off %d\n",
+                desc->app_start_offset, desc->app_resident_code_offset);
+        addr_data = u64_lo32((addr_base +
+                                desc->app_start_offset +
+                                desc->app_resident_data_offset) >> 8);
+        gm20b_dbg_pmu("app res data offset%d\n",
+                desc->app_resident_data_offset);
+        gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
+        addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
+                        gk20a_readl(g, pwr_falcon_hwcfg_r())))
+                        << GK20A_PMU_DMEM_BLKSIZE2);
+        addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
+        gm20b_dbg_pmu("addr_args %x\n", addr_args);
+        /* Populate the loader_config state*/
+        ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE;
+        ldr_cfg->code_dma_base = addr_code;
+        ldr_cfg->code_dma_base1 = 0x0;
+        ldr_cfg->code_size_total = desc->app_size;
+        ldr_cfg->code_size_to_load = desc->app_resident_code_size;
+        ldr_cfg->code_entry_point = desc->app_imem_entry;
+        ldr_cfg->data_dma_base = addr_data;
+        ldr_cfg->data_dma_base1 = 0;
+        ldr_cfg->data_size = desc->app_resident_data_size;
+        ldr_cfg->overlay_dma_base = addr_code;
+        ldr_cfg->overlay_dma_base1 = 0x0;
+        /* Update the argc/argv members*/
+        ldr_cfg->argc = 1;
+        ldr_cfg->argv = addr_args;
+        *p_bl_gen_desc_size = sizeof(struct loader_config);
+        g->acr.pmu_args = addr_args;
+        return 0;
+}
+int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g,
+        void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid)
+{
+        struct wpr_carveout_info wpr_inf;
+        struct lsfm_managed_ucode_img *p_lsfm =
+                        (struct lsfm_managed_ucode_img *)lsfm;
+        struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
+        struct flcn_bl_dmem_desc *ldr_cfg =
+                        &(p_lsfm->bl_gen_desc.bl_dmem_desc);
+        u64 addr_base;
+        struct pmu_ucode_desc *desc;
+        u64 addr_code, addr_data;
+        if (p_img->desc == NULL) /*This means its a header based ucode,
+                                  and so we do not fill BL gen desc structure*/
+                return -EINVAL;
+        desc = p_img->desc;
+        /*
+         Calculate physical and virtual addresses for various portions of
+         the PMU ucode image
+         Calculate the 32-bit addresses for the application code, application
+         data, and bootloader code. These values are all based on IM_BASE.
+         The 32-bit addresses will be the upper 32-bits of the virtual or
+         physical addresses of each respective segment.
+        */
+        addr_base = p_lsfm->lsb_header.ucode_off;
+        g->ops.pmu.get_wpr(g, &wpr_inf);
+        addr_base += wpr_inf.wpr_base;
+        gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
+                        p_lsfm->wpr_header.falcon_id);
+        addr_code = u64_lo32((addr_base +
+                                desc->app_start_offset +
+                                desc->app_resident_code_offset) >> 8);
+        addr_data = u64_lo32((addr_base +
+                                desc->app_start_offset +
+                                desc->app_resident_data_offset) >> 8);
+        gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
+                (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
+                p_lsfm->wpr_header.falcon_id);
+        /* Populate the LOADER_CONFIG state */
+        memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc));
+        ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
+        ldr_cfg->code_dma_base = addr_code;
+        ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
+        ldr_cfg->data_dma_base = addr_data;
+        ldr_cfg->data_size = desc->app_resident_data_size;
+        ldr_cfg->code_entry_point = desc->app_imem_entry;
+        *p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc);
+        return 0;
+}
+/* Populate falcon boot loader generic desc.*/
+static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
+                struct lsfm_managed_ucode_img *pnode)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
+                gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
+                g->ops.pmu.flcn_populate_bl_dmem_desc(g,
+                                pnode, &pnode->bl_gen_desc_size,
+                                pnode->wpr_header.falcon_id);
+                return 0;
+        }
+        if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
+                gm20b_dbg_pmu("pmu write flcn bl gen desc\n");
+                if (pnode->wpr_header.falcon_id == pmu->falcon_id)
+                        return g->ops.pmu.pmu_populate_loader_cfg(g, pnode,
+                                &pnode->bl_gen_desc_size);
+        }
+        /* Failed to find the falcon requested. */
+        return -ENOENT;
+}
+/* Initialize WPR contents */
+static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+        struct nvgpu_mem *ucode)
+{
+        struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
+        struct lsf_wpr_header last_wpr_hdr;
+        u32 i;
+        /* The WPR array is at the base of the WPR */
+        pnode = plsfm->ucode_img_list;
+        memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header));
+        i = 0;
+        /*
+         * Walk the managed falcons, flush WPR and LSB headers to FB.
+         * flush any bl args to the storage area relative to the
+         * ucode image (appended on the end as a DMEM area).
+         */
+        while (pnode) {
+                /* Flush WPR header to memory*/
+                nvgpu_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
+                                &pnode->wpr_header, sizeof(pnode->wpr_header));
+                gm20b_dbg_pmu("wpr header");
+                gm20b_dbg_pmu("falconid :%d",
+                                pnode->wpr_header.falcon_id);
+                gm20b_dbg_pmu("lsb_offset :%x",
+                                pnode->wpr_header.lsb_offset);
+                gm20b_dbg_pmu("bootstrap_owner :%d",
+                        pnode->wpr_header.bootstrap_owner);
+                gm20b_dbg_pmu("lazy_bootstrap :%d",
+                                pnode->wpr_header.lazy_bootstrap);
+                gm20b_dbg_pmu("status :%d",
+                                pnode->wpr_header.status);
+                /*Flush LSB header to memory*/
+                nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
+                                &pnode->lsb_header, sizeof(pnode->lsb_header));
+                gm20b_dbg_pmu("lsb header");
+                gm20b_dbg_pmu("ucode_off :%x",
+                                pnode->lsb_header.ucode_off);
+                gm20b_dbg_pmu("ucode_size :%x",
+                                pnode->lsb_header.ucode_size);
+                gm20b_dbg_pmu("data_size :%x",
+                                pnode->lsb_header.data_size);
+                gm20b_dbg_pmu("bl_code_size :%x",
+                                pnode->lsb_header.bl_code_size);
+                gm20b_dbg_pmu("bl_imem_off :%x",
+                                pnode->lsb_header.bl_imem_off);
+                gm20b_dbg_pmu("bl_data_off :%x",
+                                pnode->lsb_header.bl_data_off);
+                gm20b_dbg_pmu("bl_data_size :%x",
+                                pnode->lsb_header.bl_data_size);
+                gm20b_dbg_pmu("app_code_off :%x",
+                                pnode->lsb_header.app_code_off);
+                gm20b_dbg_pmu("app_code_size :%x",
+                                pnode->lsb_header.app_code_size);
+                gm20b_dbg_pmu("app_data_off :%x",
+                                pnode->lsb_header.app_data_off);
+                gm20b_dbg_pmu("app_data_size :%x",
+                                pnode->lsb_header.app_data_size);
+                gm20b_dbg_pmu("flags :%x",
+                                pnode->lsb_header.flags);
+                /*If this falcon has a boot loader and related args,
+                 * flush them.*/
+                if (!pnode->ucode_img.header) {
+                        /*Populate gen bl and flush to memory*/
+                        lsfm_fill_flcn_bl_gen_desc(g, pnode);
+                        nvgpu_mem_wr_n(g, ucode,
+                                        pnode->lsb_header.bl_data_off,
+                                        &pnode->bl_gen_desc,
+                                        pnode->bl_gen_desc_size);
+                }
+                /*Copying of ucode*/
+                nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
+                                pnode->ucode_img.data,
+                                pnode->ucode_img.data_size);
+                pnode = pnode->next;
+                i++;
+        }
+        /* Tag the terminator WPR header with an invalid falcon ID. */
+        last_wpr_hdr.falcon_id = LSF_FALCON_ID_INVALID;
+        nvgpu_mem_wr_n(g, ucode,
+                        plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header),
+                        &last_wpr_hdr,
+                        sizeof(struct lsf_wpr_header));
+}
+/*!
+ * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
+ *
+ * @param[in] p_ucodehdr : UCODE header
+ * @param[out] lsb_hdr : updates values in LSB header
+ *
+ * @return 0
+ */
+static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
+        struct lsf_lsb_header *lsb_hdr)
+{
+        u32 code_size = 0;
+        u32 data_size = 0;
+        u32 i = 0;
+        u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
+        /* Lets calculate code size*/
+        code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+        for (i = 0; i < total_apps; i++) {
+                code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
+                        (total_apps, i)];
+        }
+        code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
+        /* Calculate data size*/
+        data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+        for (i = 0; i < total_apps; i++) {
+                data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
+                        (total_apps, i)];
+        }
+        lsb_hdr->ucode_size = code_size;
+        lsb_hdr->data_size = data_size;
+        lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
+        lsb_hdr->bl_imem_off = 0;
+        lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
+        lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
+        return 0;
+}
+/*!
+ * @brief lsfm_fill_static_lsb_hdr_info
+ * Populate static LSB header infomation using the provided ucode image
+ */
+static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
+        u32 falcon_id, struct lsfm_managed_ucode_img *pnode)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        u32 full_app_size = 0;
+        u32 data = 0;
+        if (pnode->ucode_img.lsf_desc)
+                memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
+                        sizeof(struct lsf_ucode_desc));
+        pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
+        /* The remainder of the LSB depends on the loader usage */
+        if (pnode->ucode_img.header) {
+                /* Does not use a loader */
+                pnode->lsb_header.data_size = 0;
+                pnode->lsb_header.bl_code_size = 0;
+                pnode->lsb_header.bl_data_off = 0;
+                pnode->lsb_header.bl_data_size = 0;
+                lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
+                        &(pnode->lsb_header));
+                /* Load the first 256 bytes of IMEM. */
+                /* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
+                True for all method based falcons */
+                data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
+                        NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+                pnode->lsb_header.flags = data;
+        } else {
+                /* Uses a loader. that is has a desc */
+                pnode->lsb_header.data_size = 0;
+                /* The loader code size is already aligned (padded) such that
+                the code following it is aligned, but the size in the image
+                desc is not, bloat it up to be on a 256 byte alignment. */
+                pnode->lsb_header.bl_code_size = ALIGN(
+                        pnode->ucode_img.desc->bootloader_size,
+                        LSF_BL_CODE_SIZE_ALIGNMENT);
+                full_app_size = ALIGN(pnode->ucode_img.desc->app_size,
+                        LSF_BL_CODE_SIZE_ALIGNMENT) +
+                        pnode->lsb_header.bl_code_size;
+                pnode->lsb_header.ucode_size = ALIGN(
+                        pnode->ucode_img.desc->app_resident_data_offset,
+                        LSF_BL_CODE_SIZE_ALIGNMENT) +
+                        pnode->lsb_header.bl_code_size;
+                pnode->lsb_header.data_size = full_app_size -
+                        pnode->lsb_header.ucode_size;
+                /* Though the BL is located at 0th offset of the image, the VA
+                is different to make sure that it doesnt collide the actual OS
+                VA range */
+                pnode->lsb_header.bl_imem_off =
+                        pnode->ucode_img.desc->bootloader_imem_offset;
+                /* TODO: OBJFLCN should export properties using which the below
+                        flags should be populated.*/
+                pnode->lsb_header.flags = 0;
+                if (falcon_id == pmu->falcon_id) {
+                        data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+                        pnode->lsb_header.flags = data;
+                }
+                if (g->ops.pmu.is_priv_load(falcon_id)) {
+                        pnode->lsb_header.flags |=
+                                NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
+                }
+        }
+}
+/* Adds a ucode image to the list of managed ucode images managed. */
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
+        struct flcn_ucode_img *ucode_image, u32 falcon_id)
+{
+        struct lsfm_managed_ucode_img *pnode;
+        pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img));
+        if (pnode == NULL)
+                return -ENOMEM;
+        /* Keep a copy of the ucode image info locally */
+        memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img));
+        /* Fill in static WPR header info*/
+        pnode->wpr_header.falcon_id = falcon_id;
+        pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+        pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
+        pnode->wpr_header.lazy_bootstrap =
+                        g->ops.pmu.is_lazy_bootstrap(falcon_id);
+        /*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
+        Android */
+        /* Fill in static LSB header info elsewhere */
+        lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
+        pnode->next = plsfm->ucode_img_list;
+        plsfm->ucode_img_list = pnode;
+        return 0;
+}
+/* Free any ucode image structure resources. */
+static void lsfm_free_ucode_img_res(struct gk20a *g,
+                                    struct flcn_ucode_img *p_img)
+{
+        if (p_img->lsf_desc != NULL) {
+                nvgpu_kfree(g, p_img->lsf_desc);
+                p_img->lsf_desc = NULL;
+        }
+}
+/* Free any ucode image structure resources. */
+static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g,
+                                           struct flcn_ucode_img *p_img)
+{
+        if (p_img->lsf_desc != NULL) {
+                nvgpu_kfree(g, p_img->lsf_desc);
+                p_img->lsf_desc = NULL;
+        }
+        if (p_img->desc != NULL) {
+                nvgpu_kfree(g, p_img->desc);
+                p_img->desc = NULL;
+        }
+}
+static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm)
+{
+        u32 cnt = plsfm->managed_flcn_cnt;
+        struct lsfm_managed_ucode_img *mg_ucode_img;
+        while (cnt) {
+                mg_ucode_img = plsfm->ucode_img_list;
+                if (mg_ucode_img->ucode_img.lsf_desc->falcon_id ==
+                                LSF_FALCON_ID_PMU)
+                        lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img);
+                else
+                        lsfm_free_nonpmu_ucode_img_res(g,
+                                &mg_ucode_img->ucode_img);
+                plsfm->ucode_img_list = mg_ucode_img->next;
+                nvgpu_kfree(g, mg_ucode_img);
+                cnt--;
+        }
+}
+/* Generate WPR requirements for ACR allocation request */
+static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
+{
+        struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
+        u32 wpr_offset;
+        /* Calculate WPR size required */
+        /* Start with an array of WPR headers at the base of the WPR.
+         The expectation here is that the secure falcon will do a single DMA
+         read of this array and cache it internally so it's OK to pack these.
+         Also, we add 1 to the falcon count to indicate the end of the array.*/
+        wpr_offset = sizeof(struct lsf_wpr_header) *
+                (plsfm->managed_flcn_cnt+1);
+        /* Walk the managed falcons, accounting for the LSB structs
+        as well as the ucode images. */
+        while (pnode) {
+                /* Align, save off, and include an LSB header size */
+                wpr_offset = ALIGN(wpr_offset,
+                        LSF_LSB_HEADER_ALIGNMENT);
+                pnode->wpr_header.lsb_offset = wpr_offset;
+                wpr_offset += sizeof(struct lsf_lsb_header);
+                /* Align, save off, and include the original (static)
+                ucode image size */
+                wpr_offset = ALIGN(wpr_offset,
+                        LSF_UCODE_DATA_ALIGNMENT);
+                pnode->lsb_header.ucode_off = wpr_offset;
+                wpr_offset += pnode->ucode_img.data_size;
+                /* For falcons that use a boot loader (BL), we append a loader
+                desc structure on the end of the ucode image and consider this
+                the boot loader data. The host will then copy the loader desc
+                args to this space within the WPR region (before locking down)
+                and the HS bin will then copy them to DMEM 0 for the loader. */
+                if (!pnode->ucode_img.header) {
+                        /* Track the size for LSB details filled in later
+                         Note that at this point we don't know what kind of i
+                        boot loader desc, so we just take the size of the
+                        generic one, which is the largest it will will ever be.
+                        */
+                        /* Align (size bloat) and save off generic
+                        descriptor size*/
+                        pnode->lsb_header.bl_data_size = ALIGN(
+                                sizeof(pnode->bl_gen_desc),
+                                LSF_BL_DATA_SIZE_ALIGNMENT);
+                        /*Align, save off, and include the additional BL data*/
+                        wpr_offset = ALIGN(wpr_offset,
+                                LSF_BL_DATA_ALIGNMENT);
+                        pnode->lsb_header.bl_data_off = wpr_offset;
+                        wpr_offset += pnode->lsb_header.bl_data_size;
+                } else {
+                        /* bl_data_off is already assigned in static
+                        information. But that is from start of the image */
+                        pnode->lsb_header.bl_data_off +=
+                                (wpr_offset - pnode->ucode_img.data_size);
+                }
+                /* Finally, update ucode surface size to include updates */
+                pnode->full_ucode_size = wpr_offset -
+                        pnode->lsb_header.ucode_off;
+                if (pnode->wpr_header.falcon_id != LSF_FALCON_ID_PMU) {
+                        pnode->lsb_header.app_code_off =
+                                pnode->lsb_header.bl_code_size;
+                        pnode->lsb_header.app_code_size =
+                                pnode->lsb_header.ucode_size -
+                                pnode->lsb_header.bl_code_size;
+                        pnode->lsb_header.app_data_off =
+                                pnode->lsb_header.ucode_size;
+                        pnode->lsb_header.app_data_size =
+                                pnode->lsb_header.data_size;
+                }
+                pnode = pnode->next;
+        }
+        plsfm->wpr_size = wpr_offset;
+        return 0;
+}
+/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
+ * start and end are addresses of ucode blob in non-WPR region*/
+int gm20b_bootstrap_hs_flcn(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        int err = 0;
+        u64 *acr_dmem;
+        u32 img_size_in_bytes = 0;
+        u32 status, size;
+        u64 start;
+        struct acr_desc *acr = &g->acr;
+        struct nvgpu_firmware *acr_fw = acr->acr_fw;
+        struct flcn_bl_dmem_desc *bl_dmem_desc = &acr->bl_dmem_desc;
+        u32 *acr_ucode_header_t210_load;
+        u32 *acr_ucode_data_t210_load;
+        start = nvgpu_mem_get_addr(g, &acr->ucode_blob);
+        size = acr->ucode_blob.size;
+        gm20b_dbg_pmu("");
+        if (!acr_fw) {
+                /*First time init case*/
+                acr_fw = nvgpu_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE, 0);
+                if (!acr_fw) {
+                        nvgpu_err(g, "pmu ucode get fail");
+                        return -ENOENT;
+                }
+                acr->acr_fw = acr_fw;
+                acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
+                acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
+                                acr->hsbin_hdr->header_offset);
+                acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
+                                acr->hsbin_hdr->data_offset);
+                acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
+                                acr->fw_hdr->hdr_offset);
+                img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
+                /* Lets patch the signatures first.. */
+                if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
+                                        (u32 *)(acr_fw->data +
+                                                acr->fw_hdr->sig_prod_offset),
+                                        (u32 *)(acr_fw->data +
+                                                acr->fw_hdr->sig_dbg_offset),
+                                        (u32 *)(acr_fw->data +
+                                                acr->fw_hdr->patch_loc),
+                                        (u32 *)(acr_fw->data +
+                                                acr->fw_hdr->patch_sig)) < 0) {
+                        nvgpu_err(g, "patch signatures fail");
+                        err = -1;
+                        goto err_release_acr_fw;
+                }
+                err = nvgpu_dma_alloc_map_sys(vm, img_size_in_bytes,
+                                &acr->acr_ucode);
+                if (err) {
+                        err = -ENOMEM;
+                        goto err_release_acr_fw;
+                }
+                acr_dmem = (u64 *)
+                        &(((u8 *)acr_ucode_data_t210_load)[
+                                        acr_ucode_header_t210_load[2]]);
+                acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)(
+                        acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]);
+                ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
+                        start;
+                ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
+                        size;
+                ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
+                ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
+                nvgpu_mem_wr_n(g, &acr->acr_ucode, 0,
+                                acr_ucode_data_t210_load, img_size_in_bytes);
+                /*
+                 * In order to execute this binary, we will be using
+                 * a bootloader which will load this image into PMU IMEM/DMEM.
+                 * Fill up the bootloader descriptor for PMU HAL to use..
+                 * TODO: Use standard descriptor which the generic bootloader is
+                 * checked in.
+                 */
+                bl_dmem_desc->signature[0] = 0;
+                bl_dmem_desc->signature[1] = 0;
+                bl_dmem_desc->signature[2] = 0;
+                bl_dmem_desc->signature[3] = 0;
+                bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
+                bl_dmem_desc->code_dma_base =
+                        (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8));
+                bl_dmem_desc->code_dma_base1 = 0x0;
+                bl_dmem_desc->non_sec_code_off  = acr_ucode_header_t210_load[0];
+                bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1];
+                bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5];
+                bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6];
+                bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */
+                bl_dmem_desc->data_dma_base =
+                        bl_dmem_desc->code_dma_base +
+                        ((acr_ucode_header_t210_load[2]) >> 8);
+                bl_dmem_desc->data_dma_base1 = 0x0;
+                bl_dmem_desc->data_size = acr_ucode_header_t210_load[3];
+        } else
+                acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0;
+        status = pmu_exec_gen_bl(g, bl_dmem_desc, 1);
+        if (status != 0) {
+                err = status;
+                goto err_free_ucode_map;
+        }
+        return 0;
+err_free_ucode_map:
+        nvgpu_dma_unmap_free(vm, &acr->acr_ucode);
+err_release_acr_fw:
+        nvgpu_release_firmware(g, acr_fw);
+        acr->acr_fw = NULL;
+        return err;
+}
+static u8 pmu_is_debug_mode_en(struct gk20a *g)
+{
+        u32 ctl_stat =  gk20a_readl(g, pwr_pmu_scpctl_stat_r());
+        return pwr_pmu_scpctl_stat_debug_mode_v(ctl_stat);
+}
+/*
+ * @brief Patch signatures into ucode image
+ */
+int acr_ucode_patch_sig(struct gk20a *g,
+                unsigned int *p_img,
+                unsigned int *p_prod_sig,
+                unsigned int *p_dbg_sig,
+                unsigned int *p_patch_loc,
+                unsigned int *p_patch_ind)
+{
+        unsigned int i, *p_sig;
+        gm20b_dbg_pmu("");
+        if (!pmu_is_debug_mode_en(g)) {
+                p_sig = p_prod_sig;
+                gm20b_dbg_pmu("PRODUCTION MODE\n");
+        } else {
+                p_sig = p_dbg_sig;
+                gm20b_dbg_pmu("DEBUG MODE\n");
+        }
+        /* Patching logic:*/
+        for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) {
+                p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)];
+                p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1];
+                p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2];
+                p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3];
+        }
+        return 0;
+}
+static int bl_bootstrap(struct nvgpu_pmu *pmu,
+        struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct acr_desc *acr = &g->acr;
+        struct mm_gk20a *mm = &g->mm;
+        u32 virt_addr = 0;
+        struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
+        u32 dst;
+        gk20a_dbg_fn("");
+        gk20a_writel(g, pwr_falcon_itfen_r(),
+                        gk20a_readl(g, pwr_falcon_itfen_r()) |
+                        pwr_falcon_itfen_ctxen_enable_f());
+        gk20a_writel(g, pwr_pmu_new_instblk_r(),
+                        pwr_pmu_new_instblk_ptr_f(
+                                nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+                        pwr_pmu_new_instblk_valid_f(1) |
+                        pwr_pmu_new_instblk_target_sys_coh_f());
+        /*copy bootloader interface structure to dmem*/
+        nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
+                sizeof(struct flcn_bl_dmem_desc), 0);
+        /* copy bootloader to TOP of IMEM */
+        dst = (pwr_falcon_hwcfg_imem_size_v(
+                        gk20a_readl(g, pwr_falcon_hwcfg_r())) << 8) - bl_sz;
+        nvgpu_flcn_copy_to_imem(pmu->flcn, dst,
+                (u8 *)(acr->hsbl_ucode.cpu_va), bl_sz, 0, 0,
+                pmu_bl_gm10x_desc->bl_start_tag);
+        gm20b_dbg_pmu("Before starting falcon with BL\n");
+        virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
+        nvgpu_flcn_bootstrap(pmu->flcn, virt_addr);
+        return 0;
+}
+int gm20b_init_nspmu_setup_hw1(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        int err = 0;
+        gk20a_dbg_fn("");
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
+        nvgpu_flcn_reset(pmu->flcn);
+        pmu->isr_enabled = true;
+        nvgpu_mutex_release(&pmu->isr_mutex);
+        /* setup apertures - virtual */
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+                pwr_fbif_transcfg_mem_type_virtual_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+                pwr_fbif_transcfg_mem_type_virtual_f());
+        /* setup apertures - physical */
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+                pwr_fbif_transcfg_mem_type_physical_f() |
+                pwr_fbif_transcfg_target_local_fb_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+                pwr_fbif_transcfg_mem_type_physical_f() |
+                pwr_fbif_transcfg_target_coherent_sysmem_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+                pwr_fbif_transcfg_mem_type_physical_f() |
+                pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+        err = g->ops.pmu.pmu_nsbootstrap(pmu);
+        return err;
+}
+int gm20b_init_pmu_setup_hw1(struct gk20a *g,
+                void *desc, u32 bl_sz)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        int err;
+        gk20a_dbg_fn("");
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
+        nvgpu_flcn_reset(pmu->flcn);
+        pmu->isr_enabled = true;
+        nvgpu_mutex_release(&pmu->isr_mutex);
+        /* setup apertures - virtual */
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
+                        pwr_fbif_transcfg_mem_type_physical_f() |
+                        pwr_fbif_transcfg_target_local_fb_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
+                        pwr_fbif_transcfg_mem_type_virtual_f());
+        /* setup apertures - physical */
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
+                        pwr_fbif_transcfg_mem_type_physical_f() |
+                        pwr_fbif_transcfg_target_local_fb_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
+                        pwr_fbif_transcfg_mem_type_physical_f() |
+                        pwr_fbif_transcfg_target_coherent_sysmem_f());
+        gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
+                        pwr_fbif_transcfg_mem_type_physical_f() |
+                        pwr_fbif_transcfg_target_noncoherent_sysmem_f());
+        /*Copying pmu cmdline args*/
+        g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
+                                g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
+        g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
+        g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
+                pmu, GK20A_PMU_TRACE_BUFSIZE);
+        g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
+        g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
+                pmu, GK20A_PMU_DMAIDX_VIRT);
+        nvgpu_flcn_copy_to_dmem(pmu->flcn, g->acr.pmu_args,
+                        (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
+                        g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+        /*disable irqs for hs falcon booting as we will poll for halt*/
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
+        pmu_enable_irq(pmu, false);
+        pmu->isr_enabled = false;
+        nvgpu_mutex_release(&pmu->isr_mutex);
+        /*Clearing mailbox register used to reflect capabilities*/
+        gk20a_writel(g, pwr_falcon_mailbox1_r(), 0);
+        err = bl_bootstrap(pmu, desc, bl_sz);
+        if (err)
+                return err;
+        return 0;
+}
+/*
+* Executes a generic bootloader and wait for PMU to halt.
+* This BL will be used for those binaries that are loaded
+* and executed at times other than RM PMU Binary execution.
+*
+* @param[in] g                  gk20a pointer
+* @param[in] desc               Bootloader descriptor
+* @param[in] dma_idx            DMA Index
+* @param[in] b_wait_for_halt    Wait for PMU to HALT
+*/
+int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        int err = 0;
+        u32 bl_sz;
+        struct acr_desc *acr = &g->acr;
+        struct nvgpu_firmware *hsbl_fw = acr->hsbl_fw;
+        struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
+        u32 *pmu_bl_gm10x = NULL;
+        gm20b_dbg_pmu("");
+        if (!hsbl_fw) {
+                hsbl_fw = nvgpu_request_firmware(g,
+                        GM20B_HSBIN_PMU_BL_UCODE_IMAGE, 0);
+                if (!hsbl_fw) {
+                        nvgpu_err(g, "pmu ucode load fail");
+                        return -ENOENT;
+                }
+                acr->hsbl_fw = hsbl_fw;
+                acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data;
+                acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data +
+                                acr->bl_bin_hdr->header_offset);
+                pmu_bl_gm10x_desc = acr->pmu_hsbl_desc;
+                pmu_bl_gm10x = (u32 *)(hsbl_fw->data +
+                        acr->bl_bin_hdr->data_offset);
+                bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size,
+                                256);
+                acr->hsbl_ucode.size = bl_sz;
+                gm20b_dbg_pmu("Executing Generic Bootloader\n");
+                /*TODO in code verify that enable PMU is done,
+                        scrubbing etc is done*/
+                /*TODO in code verify that gmmu vm init is done*/
+                err = nvgpu_dma_alloc_flags_sys(g,
+                                NVGPU_DMA_READ_ONLY, bl_sz, &acr->hsbl_ucode);
+                if (err) {
+                        nvgpu_err(g, "failed to allocate memory");
+                        goto err_done;
+                }
+                acr->hsbl_ucode.gpu_va = nvgpu_gmmu_map(vm,
+                                &acr->hsbl_ucode,
+                                bl_sz,
+                                0, /* flags */
+                                gk20a_mem_flag_read_only, false,
+                                acr->hsbl_ucode.aperture);
+                if (!acr->hsbl_ucode.gpu_va) {
+                        nvgpu_err(g, "failed to map pmu ucode memory!!");
+                        goto err_free_ucode;
+                }
+                nvgpu_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz);
+                gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
+        }
+        /*
+         * Disable interrupts to avoid kernel hitting breakpoint due
+         * to PMU halt
+         */
+        if (g->ops.pmu.falcon_clear_halt_interrupt_status(g,
+                        gk20a_get_gr_idle_timeout(g)))
+                goto err_unmap_bl;
+        gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
+                pwr_falcon_mmu_phys_sec_r()));
+        gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
+        g->ops.pmu.init_falcon_setup_hw(g, desc, acr->hsbl_ucode.size);
+        /* Poll for HALT */
+        if (b_wait_for_halt) {
+                err = g->ops.pmu.falcon_wait_for_halt(g,
+                                ACR_COMPLETION_TIMEOUT_MS);
+                if (err == 0) {
+                        /* Clear the HALT interrupt */
+                  if (g->ops.pmu.falcon_clear_halt_interrupt_status(g,
+                                  gk20a_get_gr_idle_timeout(g)))
+                        goto err_unmap_bl;
+                }
+                else
+                        goto err_unmap_bl;
+        }
+        gm20b_dbg_pmu("after waiting for halt, err %x\n", err);
+        gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
+                pwr_falcon_mmu_phys_sec_r()));
+        gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
+        start_gm20b_pmu(g);
+        return 0;
+err_unmap_bl:
+        nvgpu_gmmu_unmap(vm, &acr->hsbl_ucode, acr->hsbl_ucode.gpu_va);
+err_free_ucode:
+        nvgpu_dma_free(g, &acr->hsbl_ucode);
+err_done:
+        nvgpu_release_firmware(g, hsbl_fw);
+        return err;
+}
+/*!
+*       Wait for PMU to halt
+*       @param[in]      g               GPU object pointer
+*       @param[in]      timeout_ms      Timeout in msec for PMU to halt
+*       @return '0' if PMU halts
+*/
+int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        u32 data = 0;
+        int ret = -EBUSY;
+        ret = nvgpu_flcn_wait_for_halt(pmu->flcn, timeout_ms);
+        if (ret) {
+                nvgpu_err(g, "ACR boot timed out");
+                return ret;
+        }
+        g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r());
+        gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
+        data = gk20a_readl(g, pwr_falcon_mailbox0_r());
+        if (data) {
+                nvgpu_err(g, "ACR boot failed, err %x", data);
+                ret = -EAGAIN;
+        }
+        return ret;
+}
+/*!
+*       Wait for PMU halt interrupt status to be cleared
+*       @param[in]      g               GPU object pointer
+*       @param[in]      timeout_ms      Timeout in msec for halt to clear
+*       @return '0' if PMU halt irq status is clear
+*/
+int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout_ms)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        int status = 0;
+        if (nvgpu_flcn_clear_halt_intr_status(pmu->flcn, timeout_ms))
+                status = -EBUSY;
+        return status;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
new file mode 100644
index 00000000..9d261aae
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -0,0 +1,60 @@
+/*
+ * GM20B ACR
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __ACR_GM20B_H_
+#define __ACR_GM20B_H_
+#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode_image.bin"
+#define GM20B_PMU_UCODE_DESC "gpmu_ucode_desc.bin"
+#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
+#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
+#define GM20B_PMU_UCODE_SIG "pmu_sig.bin"
+#define GM20B_FECS_UCODE_SIG "fecs_sig.bin"
+#define T18x_GPCCS_UCODE_SIG "gpccs_sig.bin"
+bool gm20b_is_pmu_supported(struct gk20a *g);
+int prepare_ucode_blob(struct gk20a *g);
+int gm20b_bootstrap_hs_flcn(struct gk20a *g);
+bool gm20b_is_lazy_bootstrap(u32 falcon_id);
+bool gm20b_is_priv_load(u32 falcon_id);
+void gm20b_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf);
+int gm20b_alloc_blob_space(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+int gm20b_pmu_populate_loader_cfg(struct gk20a *g,
+        void *lsfm, u32 *p_bl_gen_desc_size);
+int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g,
+        void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid);
+int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms);
+int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout);
+int gm20b_init_pmu_setup_hw1(struct gk20a *g, void *desc, u32 bl_sz);
+int gm20b_pmu_setup_sw(struct gk20a *g);
+int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
+int gm20b_init_nspmu_setup_hw1(struct gk20a *g);
+int acr_ucode_patch_sig(struct gk20a *g,
+                unsigned int *p_img,
+                unsigned int *p_prod_sig,
+                unsigned int *p_dbg_sig,
+                unsigned int *p_patch_loc,
+                unsigned int *p_patch_ind);
+#endif /*__ACR_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
new file mode 100644
index 00000000..34c8d4b7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -0,0 +1,65 @@
+/*
+ * GM20B MMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/timers.h>
+#include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
+#include "bus_gm20b.h"
+#include "gk20a/gk20a.h"
+#include "gk20a/bus_gk20a.h"
+#include <nvgpu/hw/gm20b/hw_bus_gm20b.h>
+int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
+{
+        struct nvgpu_timeout timeout;
+        int err = 0;
+        u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
+        u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
+        gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
+        gk20a_writel(g, bus_bar1_block_r(),
+                     nvgpu_aperture_mask(g, bar1_inst,
+                       bus_bar1_block_target_sys_mem_ncoh_f(),
+                       bus_bar1_block_target_vid_mem_f()) |
+                     bus_bar1_block_mode_virtual_f() |
+                     bus_bar1_block_ptr_f(ptr_v));
+        nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
+        do {
+                u32 val = gk20a_readl(g, bus_bind_status_r());
+                u32 pending = bus_bind_status_bar1_pending_v(val);
+                u32 outstanding = bus_bind_status_bar1_outstanding_v(val);
+                if (!pending && !outstanding)
+                        break;
+                nvgpu_udelay(5);
+        } while (!nvgpu_timeout_expired(&timeout));
+        if (nvgpu_timeout_peek_expired(&timeout))
+                err = -EINVAL;
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.h b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h
new file mode 100644
index 00000000..961b906a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.h
@@ -0,0 +1,33 @@
+/*
+ * GM20B BUS
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVGPU_GM20B_BUS
+#define _NVGPU_GM20B_BUS
+struct gk20a;
+struct nvgpu_mem;
+int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.c b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
new file mode 100644
index 00000000..61d3b6f5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.c
@@ -0,0 +1,1605 @@
+/*
+ * GM20B Clocks
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "clk_gm20b.h"
+#include <nvgpu/soc.h>
+#include <nvgpu/fuse.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/hw/gm20b/hw_trim_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_timer_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_therm_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
+#define gk20a_dbg_clk(fmt, arg...) \
+        gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
+#define DFS_DET_RANGE   6       /* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE   12      /* -2^12 ... 2^12-1 */
+#define DFS_TESTOUT_DET BIT(0)
+#define DFS_EXT_CAL_EN  BIT(9)
+#define DFS_EXT_STROBE  BIT(16)
+#define BOOT_GPU_UV_B1  1000000 /* gpu rail boot voltage 1.0V */
+#define BOOT_GPU_UV_C1  800000  /* gpu rail boot voltage 0.8V */
+#define ADC_SLOPE_UV    10000   /* default ADC detection slope 10mV */
+#define DVFS_SAFE_MARGIN        10      /* 10% */
+static struct pll_parms gpc_pll_params_b1 = {
+        128000,  2600000,       /* freq */
+        1300000, 2600000,       /* vco */
+        12000,   38400,         /* u */
+        1, 255,                 /* M */
+        8, 255,                 /* N */
+        1, 31,                  /* PL */
+        -165230, 214007,        /* DFS_COEFF */
+        0, 0,                   /* ADC char coeff - to be read from fuses */
+        0x7 << 3,               /* vco control in NA mode */
+        500,                    /* Locking and ramping timeout */
+        40,                     /* Lock delay in NA mode */
+        5,                      /* IDDQ mode exit delay */
+};
+static struct pll_parms gpc_pll_params_c1 = {
+        76800,   2600000,       /* freq */
+        1300000, 2600000,       /* vco */
+        19200,   38400,         /* u */
+        1, 255,                 /* M */
+        8, 255,                 /* N */
+        1, 31,                  /* PL */
+        -172550, 195374,        /* DFS_COEFF */
+        0, 0,                   /* ADC char coeff - to be read from fuses */
+        (0x1 << 3) | 0x7,       /* vco control in NA mode */
+        500,                    /* Locking and ramping timeout */
+        40,                     /* Lock delay in NA mode */
+        5,                      /* IDDQ mode exit delay */
+        0x3 << 10,              /* DFS control settings */
+};
+static struct pll_parms gpc_pll_params;
+static void clk_setup_slide(struct gk20a *g, u32 clk_u);
+#define DUMP_REG(addr_func) \
+do {                                                                    \
+        addr = trim_sys_##addr_func##_r();                              \
+        data = gk20a_readl(g, addr);                                    \
+        pr_info(#addr_func "[0x%x] = 0x%x\n", addr, data);              \
+} while (0)
+static void dump_gpc_pll(struct gk20a *g, struct pll *gpll, u32 last_cfg)
+{
+        u32 addr, data;
+        pr_info("**** GPCPLL DUMP ****");
+        pr_info("gpcpll s/w M=%u N=%u P=%u\n", gpll->M, gpll->N, gpll->PL);
+        pr_info("gpcpll_cfg_last = 0x%x\n", last_cfg);
+        DUMP_REG(gpcpll_cfg);
+        DUMP_REG(gpcpll_coeff);
+        DUMP_REG(sel_vco);
+        pr_info("\n");
+}
+#define PLDIV_GLITCHLESS 1
+#if PLDIV_GLITCHLESS
+/*
+ * Post divider tarnsition is glitchless only if there is common "1" in binary
+ * representation of old and new settings.
+ */
+static u32 get_interim_pldiv(struct gk20a *g, u32 old_pl, u32 new_pl)
+{
+        u32 pl;
+        if ((g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) || (old_pl & new_pl))
+                return 0;
+        pl = old_pl | BIT(ffs(new_pl) - 1);     /* pl never 0 */
+        new_pl |= BIT(ffs(old_pl) - 1);
+        return min(pl, new_pl);
+}
+#endif
+/* Calculate and update M/N/PL as well as pll->freq
+    ref_clk_f = clk_in_f;
+    u_f = ref_clk_f / M;
+    vco_f = u_f * N = ref_clk_f * N / M;
+    PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
+    gpcclk = gpc2clk / 2; */
+static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
+        struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
+{
+        u32 min_vco_f, max_vco_f;
+        u32 best_M, best_N;
+        u32 low_PL, high_PL, best_PL;
+        u32 m, n, n2;
+        u32 target_vco_f, vco_f;
+        u32 ref_clk_f, target_clk_f, u_f;
+        u32 delta, lwv, best_delta = ~0;
+        u32 pl;
+        BUG_ON(target_freq == NULL);
+        gk20a_dbg_fn("request target freq %d MHz", *target_freq);
+        ref_clk_f = pll->clk_in;
+        target_clk_f = *target_freq;
+        max_vco_f = pll_params->max_vco;
+        min_vco_f = pll_params->min_vco;
+        best_M = pll_params->max_M;
+        best_N = pll_params->min_N;
+        best_PL = pll_params->min_PL;
+        target_vco_f = target_clk_f + target_clk_f / 50;
+        if (max_vco_f < target_vco_f)
+                max_vco_f = target_vco_f;
+        /* Set PL search boundaries. */
+        high_PL = nvgpu_div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
+        high_PL = min(high_PL, pll_params->max_PL);
+        high_PL = max(high_PL, pll_params->min_PL);
+        low_PL = nvgpu_div_to_pl(min_vco_f / target_vco_f);
+        low_PL = min(low_PL, pll_params->max_PL);
+        low_PL = max(low_PL, pll_params->min_PL);
+        gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
+                        low_PL, nvgpu_pl_to_div(low_PL), high_PL, nvgpu_pl_to_div(high_PL));
+        for (pl = low_PL; pl <= high_PL; pl++) {
+                target_vco_f = target_clk_f * nvgpu_pl_to_div(pl);
+                for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
+                        u_f = ref_clk_f / m;
+                        if (u_f < pll_params->min_u)
+                                break;
+                        if (u_f > pll_params->max_u)
+                                continue;
+                        n = (target_vco_f * m) / ref_clk_f;
+                        n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
+                        if (n > pll_params->max_N)
+                                break;
+                        for (; n <= n2; n++) {
+                                if (n < pll_params->min_N)
+                                        continue;
+                                if (n > pll_params->max_N)
+                                        break;
+                                vco_f = ref_clk_f * n / m;
+                                if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
+                                        lwv = (vco_f + (nvgpu_pl_to_div(pl) / 2))
+                                                / nvgpu_pl_to_div(pl);
+                                        delta = abs(lwv - target_clk_f);
+                                        if (delta < best_delta) {
+                                                best_delta = delta;
+                                                best_M = m;
+                                                best_N = n;
+                                                best_PL = pl;
+                                                if (best_delta == 0 ||
+                                                    /* 0.45% for non best fit */
+                                                    (!best_fit && (vco_f / best_delta > 218))) {
+                                                        goto found_match;
+                                                }
+                                                gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
+                                                        delta, m, n, pl);
+                                        }
+                                }
+                        }
+                }
+        }
+found_match:
+        BUG_ON(best_delta == ~0U);
+        if (best_fit && best_delta != 0)
+                gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
+                        target_clk_f);
+        pll->M = best_M;
+        pll->N = best_N;
+        pll->PL = best_PL;
+        /* save current frequency */
+        pll->freq = ref_clk_f * pll->N / (pll->M * nvgpu_pl_to_div(pll->PL));
+        *target_freq = pll->freq;
+        gk20a_dbg_clk("actual target freq %d kHz, M %d, N %d, PL %d(div%d)",
+                *target_freq, pll->M, pll->N, pll->PL, nvgpu_pl_to_div(pll->PL));
+        gk20a_dbg_fn("done");
+        return 0;
+}
+/* GPCPLL NA/DVFS mode methods */
+static inline int fuse_get_gpcpll_adc_rev(u32 val)
+{
+        return (val >> 30) & 0x3;
+}
+static inline int fuse_get_gpcpll_adc_slope_uv(u32 val)
+{
+        /*      Integer part in mV  * 1000 + fractional part in uV */
+        return ((val >> 24) & 0x3f) * 1000 + ((val >> 14) & 0x3ff);
+}
+static inline int fuse_get_gpcpll_adc_intercept_uv(u32 val)
+{
+        /*      Integer part in mV  * 1000 + fractional part in 100uV */
+        return ((val >> 4) & 0x3ff) * 1000 + ((val >> 0) & 0xf) * 100;
+}
+static int nvgpu_fuse_calib_gpcpll_get_adc(struct gk20a *g,
+                                           int *slope_uv, int *intercept_uv)
+{
+        u32 val;
+        int ret;
+        ret = nvgpu_tegra_fuse_read_reserved_calib(g, &val);
+        if (ret)
+                return ret;
+        if (!fuse_get_gpcpll_adc_rev(val))
+                return -EINVAL;
+        *slope_uv = fuse_get_gpcpll_adc_slope_uv(val);
+        *intercept_uv = fuse_get_gpcpll_adc_intercept_uv(val);
+        return 0;
+}
+#ifdef CONFIG_TEGRA_USE_NA_GPCPLL
+static bool nvgpu_fuse_can_use_na_gpcpll(struct gk20a *g)
+{
+        return nvgpu_tegra_get_gpu_speedo_id(g);
+}
+#endif
+/*
+ * Read ADC characteristic parmeters from fuses.
+ * Determine clibration settings.
+ */
+static int clk_config_calibration_params(struct gk20a *g)
+{
+        int slope, offs;
+        struct pll_parms *p = &gpc_pll_params;
+        if (!nvgpu_fuse_calib_gpcpll_get_adc(g, &slope, &offs)) {
+                p->uvdet_slope = slope;
+                p->uvdet_offs = offs;
+        }
+        if (!p->uvdet_slope || !p->uvdet_offs) {
+                /*
+                 * If ADC conversion slope/offset parameters are not fused
+                 * (non-production config), report error, but allow to use
+                 * boot internal calibration with default slope.
+                 */
+                nvgpu_err(g, "ADC coeff are not fused");
+                return -EINVAL;
+        }
+        return 0;
+}
+/*
+ * Determine DFS_COEFF for the requested voltage. Always select external
+ * calibration override equal to the voltage, and set maximum detection
+ * limit "0" (to make sure that PLL output remains under F/V curve when
+ * voltage increases).
+ */
+static void clk_config_dvfs_detection(int mv, struct na_dvfs *d)
+{
+        u32 coeff, coeff_max;
+        struct pll_parms *p = &gpc_pll_params;
+        coeff_max = trim_sys_gpcpll_dvfs0_dfs_coeff_v(
+                trim_sys_gpcpll_dvfs0_dfs_coeff_m());
+        coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
+        coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+        coeff = min(coeff, coeff_max);
+        d->dfs_coeff = coeff;
+        d->dfs_ext_cal = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs,
+                                           p->uvdet_slope);
+        BUG_ON(abs(d->dfs_ext_cal) >= (1 << DFS_DET_RANGE));
+        d->uv_cal = p->uvdet_offs + d->dfs_ext_cal * p->uvdet_slope;
+        d->dfs_det_max = 0;
+}
+/*
+ * Solve equation for integer and fractional part of the effective NDIV:
+ *
+ * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) +
+ * DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE
+ *
+ * The SDM_DIN LSB is finally shifted out, since it is not accessible by s/w.
+ */
+static void clk_config_dvfs_ndiv(int mv, u32 n_eff, struct na_dvfs *d)
+{
+        int n, det_delta;
+        u32 rem, rem_range;
+        struct pll_parms *p = &gpc_pll_params;
+        det_delta = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs,
+                                      p->uvdet_slope);
+        det_delta -= d->dfs_ext_cal;
+        det_delta = min(det_delta, d->dfs_det_max);
+        det_delta = det_delta * d->dfs_coeff;
+        n = (int)(n_eff << DFS_DET_RANGE) - det_delta;
+        BUG_ON((n < 0) || (n > (int)(p->max_N << DFS_DET_RANGE)));
+        d->n_int = ((u32)n) >> DFS_DET_RANGE;
+        rem = ((u32)n) & ((1 << DFS_DET_RANGE) - 1);
+        rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+        d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE);
+        d->sdm_din = (d->sdm_din >> BITS_PER_BYTE) & 0xff;
+}
+/* Voltage dependent configuration */
+static void clk_config_dvfs(struct gk20a *g, struct pll *gpll)
+{
+        struct na_dvfs *d = &gpll->dvfs;
+        d->mv = g->ops.clk.predict_mv_at_hz_cur_tfloor(&g->clk,
+                        rate_gpc2clk_to_gpu(gpll->freq));
+        clk_config_dvfs_detection(d->mv, d);
+        clk_config_dvfs_ndiv(d->mv, gpll->N, d);
+}
+/* Update DVFS detection settings in flight */
+static void clk_set_dfs_coeff(struct gk20a *g, u32 dfs_coeff)
+{
+        u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        data |= DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
+        data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
+                trim_sys_gpcpll_dvfs0_dfs_coeff_f(dfs_coeff));
+        gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
+        data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        nvgpu_udelay(1);
+        data &= ~DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+}
+static void __maybe_unused clk_set_dfs_det_max(struct gk20a *g, u32 dfs_det_max)
+{
+        u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        data |= DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
+        data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
+                trim_sys_gpcpll_dvfs0_dfs_det_max_f(dfs_det_max));
+        gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
+        data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        nvgpu_udelay(1);
+        data &= ~DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+}
+static void clk_set_dfs_ext_cal(struct gk20a *g, u32 dfs_det_cal)
+{
+        u32 data, ctrl;
+        data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        data &= ~(BIT(DFS_DET_RANGE + 1) - 1);
+        data |= dfs_det_cal & (BIT(DFS_DET_RANGE + 1) - 1);
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+        nvgpu_udelay(1);
+        ctrl = trim_sys_gpcpll_dvfs1_dfs_ctrl_v(data);
+        if (~ctrl & DFS_EXT_CAL_EN) {
+                data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(),
+                        trim_sys_gpcpll_dvfs1_dfs_ctrl_f(
+                                ctrl | DFS_EXT_CAL_EN | DFS_TESTOUT_DET));
+                gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
+        }
+}
+static void clk_setup_dvfs_detection(struct gk20a *g, struct pll *gpll)
+{
+        struct na_dvfs *d = &gpll->dvfs;
+        u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        data |= DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
+        data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
+                trim_sys_gpcpll_dvfs0_dfs_coeff_f(d->dfs_coeff));
+        data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
+                trim_sys_gpcpll_dvfs0_dfs_det_max_f(d->dfs_det_max));
+        gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
+        data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
+        nvgpu_udelay(1);
+        data &= ~DFS_EXT_STROBE;
+        gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
+        clk_set_dfs_ext_cal(g, d->dfs_ext_cal);
+}
+/* Enable NA/DVFS mode */
+static int clk_enbale_pll_dvfs(struct gk20a *g)
+{
+        u32 data, cfg = 0;
+        int delay = gpc_pll_params.iddq_exit_delay; /* iddq & calib delay */
+        struct pll_parms *p = &gpc_pll_params;
+        bool calibrated = p->uvdet_slope && p->uvdet_offs;
+        /* Enable NA DVFS */
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+        data |= trim_sys_gpcpll_dvfs1_en_dfs_m();
+        gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
+        /* Set VCO_CTRL */
+        if (p->vco_ctrl) {
+                data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
+                data = set_field(data, trim_sys_gpcpll_cfg3_vco_ctrl_m(),
+                                 trim_sys_gpcpll_cfg3_vco_ctrl_f(p->vco_ctrl));
+                gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
+        }
+        /* Set NA mode DFS control */
+        if (p->dfs_ctrl) {
+                data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+                data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(),
+                        trim_sys_gpcpll_dvfs1_dfs_ctrl_f(p->dfs_ctrl));
+                gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
+        }
+        /*
+         * If calibration parameters are known (either from fuses, or from
+         * internal calibration on boot) - use them. Internal calibration is
+         * started anyway; it will complete, but results will not be used.
+         */
+        if (calibrated) {
+                data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+                data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
+                gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
+        }
+        /* Exit IDDQ mode */
+        data = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        data = set_field(data, trim_sys_gpcpll_cfg_iddq_m(),
+                         trim_sys_gpcpll_cfg_iddq_power_on_v());
+        gk20a_writel(g, trim_sys_gpcpll_cfg_r(), data);
+        gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        nvgpu_udelay(delay);
+        /*
+         * Dynamic ramp setup based on update rate, which in DVFS mode on GM20b
+         * is always 38.4 MHz, the same as reference clock rate.
+         */
+        clk_setup_slide(g, g->clk.gpc_pll.clk_in);
+        if (calibrated)
+                return 0;
+        /*
+         * If calibration parameters are not fused, start internal calibration,
+         * wait for completion, and use results along with default slope to
+         * calculate ADC offset during boot.
+         */
+        data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+        data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
+        gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
+        /* C1 PLL must be enabled to read internal calibration results */
+        if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) {
+                cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+                                trim_sys_gpcpll_cfg_enable_yes_f());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+        }
+        /* Wait for internal calibration done (spec < 2us). */
+        do {
+                data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
+                if (trim_sys_gpcpll_dvfs1_dfs_cal_done_v(data))
+                        break;
+                nvgpu_udelay(1);
+                delay--;
+        } while (delay > 0);
+        /* Read calibration results */
+        data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
+        data = trim_sys_gpcpll_cfg3_dfs_testout_v(data);
+        if (g->clk.gpc_pll.id == GM20B_GPC_PLL_C1) {
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+                                trim_sys_gpcpll_cfg_enable_no_f());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+                cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        }
+        if (delay <= 0) {
+                nvgpu_err(g, "GPCPLL calibration timeout");
+                return -ETIMEDOUT;
+        }
+        p->uvdet_offs = g->clk.pll_poweron_uv - data * ADC_SLOPE_UV;
+        p->uvdet_slope = ADC_SLOPE_UV;
+        return 0;
+}
+/* GPCPLL slide methods */
+static void clk_setup_slide(struct gk20a *g, u32 clk_u)
+{
+        u32 data, step_a, step_b;
+        switch (clk_u) {
+        case 12000:
+        case 12800:
+        case 13000:                     /* only on FPGA */
+                step_a = 0x2B;
+                step_b = 0x0B;
+                break;
+        case 19200:
+                step_a = 0x12;
+                step_b = 0x08;
+                break;
+        case 38400:
+                step_a = 0x04;
+                step_b = 0x05;
+                break;
+        default:
+                nvgpu_err(g, "Unexpected reference rate %u kHz", clk_u);
+                BUG();
+        }
+        /* setup */
+        data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+        data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
+                        trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
+        gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
+        data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
+        data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
+                        trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
+        gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
+}
+static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll)
+{
+        u32 data, coeff;
+        u32 nold, sdm_old;
+        int ramp_timeout = gpc_pll_params.lock_timeout;
+        /* get old coefficients */
+        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+        nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
+        /* do nothing if NDIV is same */
+        if (gpll->mode == GPC_PLL_MODE_DVFS) {
+                /* in DVFS mode check both integer and fraction */
+                coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+                sdm_old = trim_sys_gpcpll_cfg2_sdm_din_v(coeff);
+                if ((gpll->dvfs.n_int == nold) &&
+                    (gpll->dvfs.sdm_din == sdm_old))
+                        return 0;
+        } else {
+                if (gpll->N == nold)
+                        return 0;
+                /* dynamic ramp setup based on update rate */
+                clk_setup_slide(g, gpll->clk_in / gpll->M);
+        }
+        /* pll slowdown mode */
+        data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+        data = set_field(data,
+                        trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
+                        trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
+        gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+        /* new ndiv ready for ramp */
+        if (gpll->mode == GPC_PLL_MODE_DVFS) {
+                /* in DVFS mode SDM is updated via "new" field */
+                coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+                coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_new_m(),
+                        trim_sys_gpcpll_cfg2_sdm_din_new_f(gpll->dvfs.sdm_din));
+                gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
+                coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+                coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
+                        trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int));
+                nvgpu_udelay(1);
+                gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        } else {
+                coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+                coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
+                                trim_sys_gpcpll_coeff_ndiv_f(gpll->N));
+                nvgpu_udelay(1);
+                gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        }
+        /* dynamic ramp to new ndiv */
+        data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+        data = set_field(data,
+                        trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
+                        trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
+        nvgpu_udelay(1);
+        gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+        do {
+                nvgpu_udelay(1);
+                ramp_timeout--;
+                data = gk20a_readl(
+                        g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
+                if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
+                        break;
+        } while (ramp_timeout > 0);
+        if ((gpll->mode == GPC_PLL_MODE_DVFS) && (ramp_timeout > 0)) {
+                /* in DVFS mode complete SDM update */
+                coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+                coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
+                        trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
+                gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
+        }
+        /* exit slowdown mode */
+        data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+        data = set_field(data,
+                        trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
+                        trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
+        data = set_field(data,
+                        trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
+                        trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
+        gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
+        gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
+        if (ramp_timeout <= 0) {
+                nvgpu_err(g, "gpcpll dynamic ramp timeout");
+                return -ETIMEDOUT;
+        }
+        return 0;
+}
+/* GPCPLL bypass methods */
+static int clk_change_pldiv_under_bypass(struct gk20a *g, struct pll *gpll)
+{
+        u32 data, coeff;
+        /* put PLL in bypass before programming it */
+        data = gk20a_readl(g, trim_sys_sel_vco_r());
+        data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+                trim_sys_sel_vco_gpc2clk_out_bypass_f());
+        gk20a_writel(g, trim_sys_sel_vco_r(), data);
+        /* change PLDIV */
+        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+        nvgpu_udelay(1);
+        coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
+                          trim_sys_gpcpll_coeff_pldiv_f(gpll->PL));
+        gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        /* put PLL back on vco */
+        data = gk20a_readl(g, trim_sys_sel_vco_r());
+        nvgpu_udelay(1);
+        data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+                trim_sys_sel_vco_gpc2clk_out_vco_f());
+        gk20a_writel(g, trim_sys_sel_vco_r(), data);
+        return 0;
+}
+static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll)
+{
+        u32 data, cfg, coeff, timeout;
+        /* put PLL in bypass before programming it */
+        data = gk20a_readl(g, trim_sys_sel_vco_r());
+        data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+                trim_sys_sel_vco_gpc2clk_out_bypass_f());
+        gk20a_writel(g, trim_sys_sel_vco_r(), data);
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        nvgpu_udelay(1);
+        if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
+                /* get out from IDDQ (1st power up) */
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
+                                trim_sys_gpcpll_cfg_iddq_power_on_v());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+                gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+                nvgpu_udelay(gpc_pll_params.iddq_exit_delay);
+        } else {
+                /* clear SYNC_MODE before disabling PLL */
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
+                                trim_sys_gpcpll_cfg_sync_mode_disable_f());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+                gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+                /* disable running PLL before changing coefficients */
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+                                trim_sys_gpcpll_cfg_enable_no_f());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+                gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        }
+        /* change coefficients */
+        if (gpll->mode == GPC_PLL_MODE_DVFS) {
+                clk_setup_dvfs_detection(g, gpll);
+                coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
+                coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
+                        trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
+                gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
+                coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
+                        trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int) |
+                        trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
+                gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        } else {
+                coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
+                        trim_sys_gpcpll_coeff_ndiv_f(gpll->N) |
+                        trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
+                gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        }
+        /* enable PLL after changing coefficients */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+                        trim_sys_gpcpll_cfg_enable_yes_f());
+        gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+        /* just delay in DVFS mode (lock cannot be used) */
+        if (gpll->mode == GPC_PLL_MODE_DVFS) {
+                gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+                nvgpu_udelay(gpc_pll_params.na_lock_delay);
+                gk20a_dbg_clk("NA config_pll under bypass: %u (%u) kHz %d mV",
+                              gpll->freq, gpll->freq / 2,
+                              (trim_sys_gpcpll_cfg3_dfs_testout_v(
+                                      gk20a_readl(g, trim_sys_gpcpll_cfg3_r()))
+                               * gpc_pll_params.uvdet_slope
+                               + gpc_pll_params.uvdet_offs) / 1000);
+                goto pll_locked;
+        }
+        /* lock pll */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
+                cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
+                        trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
+                gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+                cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        }
+        /* wait pll lock */
+        timeout = gpc_pll_params.lock_timeout + 1;
+        do {
+                nvgpu_udelay(1);
+                cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+                if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
+                        goto pll_locked;
+        } while (--timeout > 0);
+        /* PLL is messed up. What can we do here? */
+        dump_gpc_pll(g, gpll, cfg);
+        BUG();
+        return -EBUSY;
+pll_locked:
+        gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
+                trim_sys_gpcpll_cfg_r(), cfg);
+        /* set SYNC_MODE for glitchless switch out of bypass */
+        cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
+                        trim_sys_gpcpll_cfg_sync_mode_enable_f());
+        gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+        gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        /* put PLL back on vco */
+        data = gk20a_readl(g, trim_sys_sel_vco_r());
+        data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
+                trim_sys_sel_vco_gpc2clk_out_vco_f());
+        gk20a_writel(g, trim_sys_sel_vco_r(), data);
+        return 0;
+}
+/*
+ *  Change GPCPLL frequency:
+ *  - in legacy (non-DVFS) mode
+ *  - in DVFS mode at constant DVFS detection settings, matching current/lower
+ *    voltage; the same procedure can be used in this case, since maximum DVFS
+ *    detection limit makes sure that PLL output remains under F/V curve when
+ *    voltage increases arbitrary.
+ */
+static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new,
+                        int allow_slide)
+{
+        u32 cfg, coeff, data;
+        bool can_slide, pldiv_only;
+        struct pll gpll;
+        gk20a_dbg_fn("");
+        if (!nvgpu_platform_is_silicon(g))
+                return 0;
+        /* get old coefficients */
+        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+        gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
+        gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff);
+        gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff);
+        gpll.clk_in = gpll_new->clk_in;
+        /* combine target dvfs with old coefficients */
+        gpll.dvfs = gpll_new->dvfs;
+        gpll.mode = gpll_new->mode;
+        /* do NDIV slide if there is no change in M and PL */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
+        if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL))
+                return clk_slide_gpc_pll(g, gpll_new);
+        /* slide down to NDIV_LO */
+        if (can_slide) {
+                int ret;
+                gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
+                                      gpll.clk_in);
+                if (gpll.mode == GPC_PLL_MODE_DVFS)
+                        clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
+                ret = clk_slide_gpc_pll(g, &gpll);
+                if (ret)
+                        return ret;
+        }
+        pldiv_only = can_slide && (gpll_new->M == gpll.M);
+        /*
+         *  Split FO-to-bypass jump in halfs by setting out divider 1:2.
+         *  (needed even if PLDIV_GLITCHLESS is set, since 1:1 <=> 1:2 direct
+         *  transition is not really glitch-less - see get_interim_pldiv
+         *  function header).
+         */
+        if ((gpll_new->PL < 2) || (gpll.PL < 2)) {
+                data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+                data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
+                        trim_sys_gpc2clk_out_vcodiv_f(2));
+                gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+                /* Intentional 2nd write to assure linear divider operation */
+                gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+                gk20a_readl(g, trim_sys_gpc2clk_out_r());
+                nvgpu_udelay(2);
+        }
+#if PLDIV_GLITCHLESS
+        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+        if (pldiv_only) {
+                /* Insert interim PLDIV state if necessary */
+                u32 interim_pl = get_interim_pldiv(g, gpll_new->PL, gpll.PL);
+                if (interim_pl) {
+                        coeff = set_field(coeff,
+                                trim_sys_gpcpll_coeff_pldiv_m(),
+                                trim_sys_gpcpll_coeff_pldiv_f(interim_pl));
+                        gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+                        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+                }
+                goto set_pldiv; /* path A: no need to bypass */
+        }
+        /* path B: bypass if either M changes or PLL is disabled */
+#endif
+        /*
+         * Program and lock pll under bypass. On exit PLL is out of bypass,
+         * enabled, and locked. VCO is at vco_min if sliding is allowed.
+         * Otherwise it is at VCO target (and therefore last slide call below
+         * is effectively NOP). PL is set to target. Output divider is engaged
+         * at 1:2 if either entry, or exit PL setting is 1:1.
+         */
+        gpll = *gpll_new;
+        if (allow_slide) {
+                gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco,
+                                      gpll_new->clk_in);
+                if (gpll.mode == GPC_PLL_MODE_DVFS)
+                        clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
+        }
+        if (pldiv_only)
+                clk_change_pldiv_under_bypass(g, &gpll);
+        else
+                clk_lock_gpc_pll_under_bypass(g, &gpll);
+#if PLDIV_GLITCHLESS
+        coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+set_pldiv:
+        /* coeff must be current from either path A or B */
+        if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) {
+                coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
+                        trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL));
+                gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
+        }
+#endif
+        /* restore out divider 1:1 */
+        data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+        if ((data & trim_sys_gpc2clk_out_vcodiv_m()) !=
+            trim_sys_gpc2clk_out_vcodiv_by1_f()) {
+                data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
+                                 trim_sys_gpc2clk_out_vcodiv_by1_f());
+                nvgpu_udelay(2);
+                gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+                /* Intentional 2nd write to assure linear divider operation */
+                gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+                gk20a_readl(g, trim_sys_gpc2clk_out_r());
+        }
+        /* slide up to target NDIV */
+        return clk_slide_gpc_pll(g, gpll_new);
+}
+/* Find GPCPLL config safe at DVFS coefficient = 0, matching target frequency */
+static void clk_config_pll_safe_dvfs(struct gk20a *g, struct pll *gpll)
+{
+        u32 nsafe, nmin;
+        if (gpll->freq > g->clk.dvfs_safe_max_freq)
+                gpll->freq = gpll->freq * (100 - DVFS_SAFE_MARGIN) / 100;
+        nmin = DIV_ROUND_UP(gpll->M * gpc_pll_params.min_vco, gpll->clk_in);
+        nsafe = gpll->M * gpll->freq / gpll->clk_in;
+        /*
+         * If safe frequency is above VCOmin, it can be used in safe PLL config
+         * as is. Since safe frequency is below both old and new frequencies,
+         * in this case all three configurations have same post divider 1:1, and
+         * direct old=>safe=>new n-sliding will be used for transitions.
+         *
+         * Otherwise, if safe frequency is below VCO min, post-divider in safe
+         * configuration (and possibly in old and/or new configurations) is
+         * above 1:1, and each old=>safe and safe=>new transitions includes
+         * sliding to/from VCOmin, as well as divider changes. To avoid extra
+         * dynamic ramps from VCOmin during old=>safe transition and to VCOmin
+         * during safe=>new transition, select nmin as safe NDIV, and set safe
+         * post divider to assure PLL output is below safe frequency
+         */
+        if (nsafe < nmin) {
+                gpll->PL = DIV_ROUND_UP(nmin * gpll->clk_in,
+                                        gpll->M * gpll->freq);
+                nsafe = nmin;
+        }
+        gpll->N = nsafe;
+        clk_config_dvfs_ndiv(gpll->dvfs.mv, gpll->N, &gpll->dvfs);
+        gk20a_dbg_clk("safe freq %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d",
+                gpll->freq, gpll->M, gpll->N, gpll->PL, nvgpu_pl_to_div(gpll->PL),
+                gpll->dvfs.mv, gpll->dvfs.uv_cal / 1000, gpll->dvfs.dfs_coeff);
+}
+/* Change GPCPLL frequency and DVFS detection settings in DVFS mode */
+static int clk_program_na_gpc_pll(struct gk20a *g, struct pll *gpll_new,
+                                  int allow_slide)
+{
+        int ret;
+        struct pll gpll_safe;
+        struct pll *gpll_old = &g->clk.gpc_pll_last;
+        BUG_ON(gpll_new->M != 1);       /* the only MDIV in NA mode  */
+        clk_config_dvfs(g, gpll_new);
+        /*
+         * In cases below no intermediate steps in PLL DVFS configuration are
+         * necessary because either
+         * - PLL DVFS will be configured under bypass directly to target, or
+         * - voltage is not changing, so DVFS detection settings are the same
+         */
+        if (!allow_slide || !gpll_new->enabled ||
+            (gpll_old->dvfs.mv == gpll_new->dvfs.mv))
+                return clk_program_gpc_pll(g, gpll_new, allow_slide);
+        /*
+         * Interim step for changing DVFS detection settings: low enough
+         * frequency to be safe at at DVFS coeff = 0.
+         *
+         * 1. If voltage is increasing:
+         * - safe frequency target matches the lowest - old - frequency
+         * - DVFS settings are still old
+         * - Voltage already increased to new level by tegra DVFS, but maximum
+         *    detection limit assures PLL output remains under F/V curve
+         *
+         * 2. If voltage is decreasing:
+         * - safe frequency target matches the lowest - new - frequency
+         * - DVFS settings are still old
+         * - Voltage is also old, it will be lowered by tegra DVFS afterwards
+         *
+         * Interim step can be skipped if old frequency is below safe minimum,
+         * i.e., it is low enough to be safe at any voltage in operating range
+         * with zero DVFS coefficient.
+         */
+        if (gpll_old->freq > g->clk.dvfs_safe_max_freq) {
+                if (gpll_old->dvfs.mv < gpll_new->dvfs.mv) {
+                        gpll_safe = *gpll_old;
+                        gpll_safe.dvfs.mv = gpll_new->dvfs.mv;
+                } else {
+                        gpll_safe = *gpll_new;
+                        gpll_safe.dvfs = gpll_old->dvfs;
+                }
+                clk_config_pll_safe_dvfs(g, &gpll_safe);
+                ret = clk_program_gpc_pll(g, &gpll_safe, 1);
+                if (ret) {
+                        nvgpu_err(g, "Safe dvfs program fail");
+                        return ret;
+                }
+        }
+        /*
+         * DVFS detection settings transition:
+         * - Set DVFS coefficient zero (safe, since already at frequency safe
+         *   at DVFS coeff = 0 for the lowest of the old/new end-points)
+         * - Set calibration level to new voltage (safe, since DVFS coeff = 0)
+         * - Set DVFS coefficient to match new voltage (safe, since already at
+         *   frequency safe at DVFS coeff = 0 for the lowest of the old/new
+         *   end-points.
+         */
+        clk_set_dfs_coeff(g, 0);
+        clk_set_dfs_ext_cal(g, gpll_new->dvfs.dfs_ext_cal);
+        clk_set_dfs_coeff(g, gpll_new->dvfs.dfs_coeff);
+        gk20a_dbg_clk("config_pll  %d kHz, M %d, N %d, PL %d(div%d), mV(cal) %d(%d), DC %d",
+                gpll_new->freq, gpll_new->M, gpll_new->N, gpll_new->PL,
+                nvgpu_pl_to_div(gpll_new->PL),
+                max(gpll_new->dvfs.mv, gpll_old->dvfs.mv),
+                gpll_new->dvfs.uv_cal / 1000, gpll_new->dvfs.dfs_coeff);
+        /* Finally set target rate (with DVFS detection settings already new) */
+        return clk_program_gpc_pll(g, gpll_new, 1);
+}
+static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
+{
+        u32 cfg, coeff;
+        struct clk_gk20a *clk = &g->clk;
+        struct pll gpll = clk->gpc_pll;
+        /* slide to VCO min */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
+                coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+                gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
+                gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
+                                      gpll.clk_in);
+                if (gpll.mode == GPC_PLL_MODE_DVFS)
+                        clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
+                clk_slide_gpc_pll(g, &gpll);
+        }
+        /* put PLL in bypass before disabling it */
+        cfg = gk20a_readl(g, trim_sys_sel_vco_r());
+        cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
+                        trim_sys_sel_vco_gpc2clk_out_bypass_f());
+        gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
+        /* clear SYNC_MODE before disabling PLL */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
+                        trim_sys_gpcpll_cfg_sync_mode_disable_f());
+        gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+        /* disable PLL */
+        cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
+                        trim_sys_gpcpll_cfg_enable_no_f());
+        gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
+        gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        clk->gpc_pll.enabled = false;
+        clk->gpc_pll_last.enabled = false;
+        return 0;
+}
+struct pll_parms *gm20b_get_gpc_pll_parms(void)
+{
+        return &gpc_pll_params;
+}
+int gm20b_init_clk_setup_sw(struct gk20a *g)
+{
+        struct clk_gk20a *clk = &g->clk;
+        unsigned long safe_rate;
+        int err;
+        gk20a_dbg_fn("");
+        err = nvgpu_mutex_init(&clk->clk_mutex);
+        if (err)
+                return err;
+        if (clk->sw_ready) {
+                gk20a_dbg_fn("skip init");
+                return 0;
+        }
+        if (clk->gpc_pll.id == GM20B_GPC_PLL_C1) {
+                gpc_pll_params = gpc_pll_params_c1;
+                if (!clk->pll_poweron_uv)
+                        clk->pll_poweron_uv = BOOT_GPU_UV_C1;
+        } else {
+                gpc_pll_params = gpc_pll_params_b1;
+                if (!clk->pll_poweron_uv)
+                        clk->pll_poweron_uv = BOOT_GPU_UV_B1;
+        }
+        clk->gpc_pll.clk_in = g->ops.clk.get_ref_clock_rate(g) / KHZ;
+        if (clk->gpc_pll.clk_in == 0) {
+                nvgpu_err(g, "GPCPLL reference clock is zero");
+                err = -EINVAL;
+                goto fail;
+        }
+        safe_rate = g->ops.clk.get_fmax_at_vmin_safe(clk);
+        safe_rate = safe_rate * (100 - DVFS_SAFE_MARGIN) / 100;
+        clk->dvfs_safe_max_freq = rate_gpu_to_gpc2clk(safe_rate);
+        clk->gpc_pll.PL = (clk->dvfs_safe_max_freq == 0) ? 0 :
+                DIV_ROUND_UP(gpc_pll_params.min_vco, clk->dvfs_safe_max_freq);
+        /* Initial freq: low enough to be safe at Vmin (default 1/3 VCO min) */
+        clk->gpc_pll.M = 1;
+        clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
+                                clk->gpc_pll.clk_in);
+        clk->gpc_pll.PL = max(clk->gpc_pll.PL, 3U);
+        clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
+        clk->gpc_pll.freq /= nvgpu_pl_to_div(clk->gpc_pll.PL);
+         /*
+          * All production parts should have ADC fuses burnt. Therefore, check
+          * ADC fuses always, regardless of whether NA mode is selected; and if
+          * NA mode is indeed selected, and part can support it, switch to NA
+          * mode even when ADC calibration is not fused; less accurate s/w
+          * self-calibration will be used for those parts.
+          */
+        clk_config_calibration_params(g);
+#ifdef CONFIG_TEGRA_USE_NA_GPCPLL
+        if (nvgpu_fuse_can_use_na_gpcpll(g)) {
+                /* NA mode is supported only at max update rate 38.4 MHz */
+                BUG_ON(clk->gpc_pll.clk_in != gpc_pll_params.max_u);
+                clk->gpc_pll.mode = GPC_PLL_MODE_DVFS;
+                gpc_pll_params.min_u = gpc_pll_params.max_u;
+        }
+#endif
+        clk->sw_ready = true;
+        gk20a_dbg_fn("done");
+        nvgpu_info(g,
+                "GPCPLL initial settings:%s M=%u, N=%u, P=%u (id = %u)",
+                clk->gpc_pll.mode == GPC_PLL_MODE_DVFS ? " NA mode," : "",
+                clk->gpc_pll.M, clk->gpc_pll.N, clk->gpc_pll.PL,
+                clk->gpc_pll.id);
+        return 0;
+fail:
+        nvgpu_mutex_destroy(&clk->clk_mutex);
+        return err;
+}
+static int set_pll_freq(struct gk20a *g, int allow_slide);
+static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq);
+int gm20b_clk_prepare(struct clk_gk20a *clk)
+{
+        int ret = 0;
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        if (!clk->gpc_pll.enabled && clk->clk_hw_on)
+                ret = set_pll_freq(clk->g, 1);
+        nvgpu_mutex_release(&clk->clk_mutex);
+        return ret;
+}
+void gm20b_clk_unprepare(struct clk_gk20a *clk)
+{
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        if (clk->gpc_pll.enabled && clk->clk_hw_on)
+                clk_disable_gpcpll(clk->g, 1);
+        nvgpu_mutex_release(&clk->clk_mutex);
+}
+int gm20b_clk_is_prepared(struct clk_gk20a *clk)
+{
+        return clk->gpc_pll.enabled && clk->clk_hw_on;
+}
+unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate)
+{
+        return rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
+}
+int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate,
+                                 unsigned long parent_rate)
+{
+        u32 old_freq;
+        int ret = -ENODATA;
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        old_freq = clk->gpc_pll.freq;
+        ret = set_pll_target(clk->g, rate_gpu_to_gpc2clk(rate), old_freq);
+        if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
+                ret = set_pll_freq(clk->g, 1);
+        nvgpu_mutex_release(&clk->clk_mutex);
+        return ret;
+}
+long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate,
+                             unsigned long *parent_rate)
+{
+        u32 freq;
+        struct pll tmp_pll;
+        unsigned long maxrate;
+        struct gk20a *g = clk->g;
+        maxrate = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
+        if (rate > maxrate)
+                rate = maxrate;
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        freq = rate_gpu_to_gpc2clk(rate);
+        if (freq > gpc_pll_params.max_freq)
+                freq = gpc_pll_params.max_freq;
+        else if (freq < gpc_pll_params.min_freq)
+                freq = gpc_pll_params.min_freq;
+        tmp_pll = clk->gpc_pll;
+        clk_config_pll(clk, &tmp_pll, &gpc_pll_params, &freq, true);
+        nvgpu_mutex_release(&clk->clk_mutex);
+        return rate_gpc2clk_to_gpu(tmp_pll.freq);
+}
+static int gm20b_init_clk_setup_hw(struct gk20a *g)
+{
+        u32 data;
+        gk20a_dbg_fn("");
+        /* LDIV: Div4 mode (required); both  bypass and vco ratios 1:1 */
+        data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+        data = set_field(data,
+                        trim_sys_gpc2clk_out_sdiv14_m() |
+                        trim_sys_gpc2clk_out_vcodiv_m() |
+                        trim_sys_gpc2clk_out_bypdiv_m(),
+                        trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
+                        trim_sys_gpc2clk_out_vcodiv_by1_f() |
+                        trim_sys_gpc2clk_out_bypdiv_f(0));
+        gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
+        /*
+         * Clear global bypass control; PLL is still under bypass, since SEL_VCO
+         * is cleared by default.
+         */
+        data = gk20a_readl(g, trim_sys_bypassctrl_r());
+        data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
+                         trim_sys_bypassctrl_gpcpll_vco_f());
+        gk20a_writel(g, trim_sys_bypassctrl_r(), data);
+        /* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */
+        data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_r());
+        if (!fuse_ctrl_opt_ram_svop_pdp_data_v(data)) {
+                data = set_field(data, fuse_ctrl_opt_ram_svop_pdp_data_m(),
+                         fuse_ctrl_opt_ram_svop_pdp_data_f(0x2));
+                gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_r(), data);
+                data = gk20a_readl(g, fuse_ctrl_opt_ram_svop_pdp_override_r());
+                data = set_field(data,
+                        fuse_ctrl_opt_ram_svop_pdp_override_data_m(),
+                        fuse_ctrl_opt_ram_svop_pdp_override_data_yes_f());
+                gk20a_writel(g, fuse_ctrl_opt_ram_svop_pdp_override_r(), data);
+        }
+        /* Disable idle slow down */
+        data = gk20a_readl(g, therm_clk_slowdown_r(0));
+        data = set_field(data, therm_clk_slowdown_idle_factor_m(),
+                         therm_clk_slowdown_idle_factor_disabled_f());
+        gk20a_writel(g, therm_clk_slowdown_r(0), data);
+        gk20a_readl(g, therm_clk_slowdown_r(0));
+        if (g->clk.gpc_pll.mode == GPC_PLL_MODE_DVFS)
+                return clk_enbale_pll_dvfs(g);
+        return 0;
+}
+static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
+{
+        struct clk_gk20a *clk = &g->clk;
+        if (freq > gpc_pll_params.max_freq)
+                freq = gpc_pll_params.max_freq;
+        else if (freq < gpc_pll_params.min_freq)
+                freq = gpc_pll_params.min_freq;
+        if (freq != old_freq) {
+                /* gpc_pll.freq is changed to new value here */
+                if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
+                                   &freq, true)) {
+                        nvgpu_err(g, "failed to set pll target for %d", freq);
+                        return -EINVAL;
+                }
+        }
+        return 0;
+}
+static int set_pll_freq(struct gk20a *g, int allow_slide)
+{
+        struct clk_gk20a *clk = &g->clk;
+        int err = 0;
+        gk20a_dbg_fn("last freq: %dMHz, target freq %dMHz",
+                     clk->gpc_pll_last.freq, clk->gpc_pll.freq);
+        /* If programming with dynamic sliding failed, re-try under bypass */
+        if (clk->gpc_pll.mode == GPC_PLL_MODE_DVFS) {
+                err = clk_program_na_gpc_pll(g, &clk->gpc_pll, allow_slide);
+                if (err && allow_slide)
+                        err = clk_program_na_gpc_pll(g, &clk->gpc_pll, 0);
+        } else {
+                err = clk_program_gpc_pll(g, &clk->gpc_pll, allow_slide);
+                if (err && allow_slide)
+                        err = clk_program_gpc_pll(g, &clk->gpc_pll, 0);
+        }
+        if (!err) {
+                clk->gpc_pll.enabled = true;
+                clk->gpc_pll_last = clk->gpc_pll;
+                return 0;
+        }
+        /*
+         * Just report error but not restore PLL since dvfs could already change
+         * voltage even when programming failed.
+         */
+        nvgpu_err(g, "failed to set pll to %d", clk->gpc_pll.freq);
+        return err;
+}
+int gm20b_init_clk_support(struct gk20a *g)
+{
+        struct clk_gk20a *clk = &g->clk;
+        u32 err;
+        gk20a_dbg_fn("");
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        clk->clk_hw_on = true;
+        err = gm20b_init_clk_setup_hw(g);
+        nvgpu_mutex_release(&clk->clk_mutex);
+        if (err)
+                return err;
+        /* FIXME: this effectively prevents host level clock gating */
+        err = g->ops.clk.prepare_enable(&g->clk);
+        if (err)
+                return err;
+        /* The prev call may not enable PLL if gbus is unbalanced - force it */
+        nvgpu_mutex_acquire(&clk->clk_mutex);
+        if (!clk->gpc_pll.enabled)
+                err = set_pll_freq(g, 1);
+        nvgpu_mutex_release(&clk->clk_mutex);
+        if (err)
+                return err;
+        if (!clk->debugfs_set && g->ops.clk.init_debugfs) {
+                err = g->ops.clk.init_debugfs(g);
+                if (err)
+                        return err;
+                clk->debugfs_set = true;
+        }
+        return err;
+}
+int gm20b_suspend_clk_support(struct gk20a *g)
+{
+        int ret = 0;
+        g->ops.clk.disable_unprepare(&g->clk);
+        /* The prev call may not disable PLL if gbus is unbalanced - force it */
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
+        if (g->clk.gpc_pll.enabled)
+                ret = clk_disable_gpcpll(g, 1);
+        g->clk.clk_hw_on = false;
+        nvgpu_mutex_release(&g->clk.clk_mutex);
+        nvgpu_mutex_destroy(&g->clk.clk_mutex);
+        return ret;
+}
+int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val)
+{
+        struct gk20a *g = clk->g;
+        struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
+        u32 det_out;
+        int err;
+        if (clk->gpc_pll.mode != GPC_PLL_MODE_DVFS)
+                return -ENOSYS;
+        err = gk20a_busy(g);
+        if (err)
+                return err;
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
+        det_out = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
+        det_out = trim_sys_gpcpll_cfg3_dfs_testout_v(det_out);
+        *val = div64_u64((u64)det_out * gpc_pll_params->uvdet_slope +
+                gpc_pll_params->uvdet_offs, 1000ULL);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
+        gk20a_idle(g);
+        return 0;
+}
+int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val)
+{
+        struct gk20a *g = clk->g;
+        u32 clk_slowdown, clk_slowdown_save;
+        int err;
+        u32 ncycle = 800; /* count GPCCLK for ncycle of clkin */
+        u64 freq = clk->gpc_pll.clk_in;
+        u32 count1, count2;
+        err = gk20a_busy(g);
+        if (err)
+                return err;
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
+        /* Disable clock slowdown during measurements */
+        clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
+        clk_slowdown = set_field(clk_slowdown_save,
+                                 therm_clk_slowdown_idle_factor_m(),
+                                 therm_clk_slowdown_idle_factor_disabled_f());
+        gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
+        gk20a_readl(g, therm_clk_slowdown_r(0));
+        gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
+                     trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
+        gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
+                     trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
+                     trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
+                     trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
+        /* start */
+        /* It should take less than 25us to finish 800 cycle of 38.4MHz.
+         *  But longer than 100us delay is required here.
+         */
+        gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
+        nvgpu_udelay(200);
+        count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
+        nvgpu_udelay(100);
+        count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
+        freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
+        do_div(freq, ncycle);
+        *val = freq;
+        /* Restore clock slowdown */
+        gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
+        gk20a_idle(g);
+        if (count1 != count2)
+                return -EBUSY;
+        return 0;
+}
+int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val)
+{
+        if (((reg < trim_sys_gpcpll_cfg_r()) ||
+            (reg > trim_sys_gpcpll_dvfs2_r())) &&
+            (reg != trim_sys_sel_vco_r()) &&
+            (reg != trim_sys_gpc2clk_out_r()) &&
+            (reg != trim_sys_bypassctrl_r()))
+                return -EPERM;
+        if (reg == trim_sys_gpcpll_dvfs2_r())
+                reg = trim_gpc_bcast_gpcpll_dvfs2_r();
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
+        if (!g->clk.clk_hw_on) {
+                nvgpu_mutex_release(&g->clk.clk_mutex);
+                return -EINVAL;
+        }
+        gk20a_writel(g, reg, val);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
+        return 0;
+}
+int gm20b_clk_get_pll_debug_data(struct gk20a *g,
+                        struct nvgpu_clk_pll_debug_data *d)
+{
+        u32 reg;
+        nvgpu_mutex_acquire(&g->clk.clk_mutex);
+        if (!g->clk.clk_hw_on) {
+                nvgpu_mutex_release(&g->clk.clk_mutex);
+                return -EINVAL;
+        }
+        d->trim_sys_bypassctrl_reg = trim_sys_bypassctrl_r();
+        d->trim_sys_bypassctrl_val = gk20a_readl(g, trim_sys_bypassctrl_r());
+        d->trim_sys_sel_vco_reg = trim_sys_sel_vco_r();
+        d->trim_sys_sel_vco_val = gk20a_readl(g, trim_sys_sel_vco_r());
+        d->trim_sys_gpc2clk_out_reg = trim_sys_gpc2clk_out_r();
+        d->trim_sys_gpc2clk_out_val = gk20a_readl(g, trim_sys_gpc2clk_out_r());
+        d->trim_sys_gpcpll_cfg_reg = trim_sys_gpcpll_cfg_r();
+        d->trim_sys_gpcpll_dvfs2_reg = trim_gpc_bcast_gpcpll_dvfs2_r();
+        reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
+        d->trim_sys_gpcpll_cfg_val = reg;
+        d->trim_sys_gpcpll_cfg_enabled = trim_sys_gpcpll_cfg_enable_v(reg);
+        d->trim_sys_gpcpll_cfg_locked = trim_sys_gpcpll_cfg_pll_lock_v(reg);
+        d->trim_sys_gpcpll_cfg_sync_on = trim_sys_gpcpll_cfg_sync_mode_v(reg);
+        reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
+        d->trim_sys_gpcpll_coeff_val = reg;
+        d->trim_sys_gpcpll_coeff_mdiv = trim_sys_gpcpll_coeff_mdiv_v(reg);
+        d->trim_sys_gpcpll_coeff_ndiv = trim_sys_gpcpll_coeff_ndiv_v(reg);
+        d->trim_sys_gpcpll_coeff_pldiv = trim_sys_gpcpll_coeff_pldiv_v(reg);
+        reg = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
+        d->trim_sys_gpcpll_dvfs0_val = reg;
+        d->trim_sys_gpcpll_dvfs0_dfs_coeff =
+                        trim_sys_gpcpll_dvfs0_dfs_coeff_v(reg);
+        d->trim_sys_gpcpll_dvfs0_dfs_det_max =
+                        trim_sys_gpcpll_dvfs0_dfs_det_max_v(reg);
+        d->trim_sys_gpcpll_dvfs0_dfs_dc_offset =
+                        trim_sys_gpcpll_dvfs0_dfs_dc_offset_v(reg);
+        nvgpu_mutex_release(&g->clk.clk_mutex);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/clk_gm20b.h b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
new file mode 100644
index 00000000..e814ac70
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/clk_gm20b.h
@@ -0,0 +1,95 @@
+/*
+ * GM20B Graphics
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_CLK_GM20B_H_
+#define _NVHOST_CLK_GM20B_H_
+#include <nvgpu/lock.h>
+struct gk20a;
+struct clk_gk20a;
+struct nvgpu_clk_pll_debug_data {
+        u32 trim_sys_sel_vco_reg;
+        u32 trim_sys_sel_vco_val;
+        u32 trim_sys_gpc2clk_out_reg;
+        u32 trim_sys_gpc2clk_out_val;
+        u32 trim_sys_bypassctrl_reg;
+        u32 trim_sys_bypassctrl_val;
+        u32 trim_sys_gpcpll_cfg_reg;
+        u32 trim_sys_gpcpll_dvfs2_reg;
+        u32 trim_sys_gpcpll_cfg_val;
+        bool trim_sys_gpcpll_cfg_enabled;
+        bool trim_sys_gpcpll_cfg_locked;
+        bool trim_sys_gpcpll_cfg_sync_on;
+        u32 trim_sys_gpcpll_coeff_val;
+        u32 trim_sys_gpcpll_coeff_mdiv;
+        u32 trim_sys_gpcpll_coeff_ndiv;
+        u32 trim_sys_gpcpll_coeff_pldiv;
+        u32 trim_sys_gpcpll_dvfs0_val;
+        u32 trim_sys_gpcpll_dvfs0_dfs_coeff;
+        u32 trim_sys_gpcpll_dvfs0_dfs_det_max;
+        u32 trim_sys_gpcpll_dvfs0_dfs_dc_offset;
+};
+int gm20b_init_clk_setup_sw(struct gk20a *g);
+int gm20b_clk_prepare(struct clk_gk20a *clk);
+void gm20b_clk_unprepare(struct clk_gk20a *clk);
+int gm20b_clk_is_prepared(struct clk_gk20a *clk);
+unsigned long gm20b_recalc_rate(struct clk_gk20a *clk, unsigned long parent_rate);
+int gm20b_gpcclk_set_rate(struct clk_gk20a *clk, unsigned long rate,
+                unsigned long parent_rate);
+long gm20b_round_rate(struct clk_gk20a *clk, unsigned long rate,
+                unsigned long *parent_rate);
+struct pll_parms *gm20b_get_gpc_pll_parms(void);
+#ifdef CONFIG_DEBUG_FS
+int gm20b_clk_init_debugfs(struct gk20a *g);
+#endif
+int gm20b_clk_pll_reg_write(struct gk20a *g, u32 reg, u32 val);
+int gm20b_init_clk_support(struct gk20a *g);
+int gm20b_suspend_clk_support(struct gk20a *g);
+int gm20b_clk_get_voltage(struct clk_gk20a *clk, u64 *val);
+int gm20b_clk_get_gpcclk_clock_counter(struct clk_gk20a *clk, u64 *val);
+int gm20b_clk_get_pll_debug_data(struct gk20a *g,
+                        struct nvgpu_clk_pll_debug_data *d);
+/* 1:1 match between post divider settings and divisor value */
+static inline u32 nvgpu_pl_to_div(u32 pl)
+{
+        return pl;
+}
+static inline u32 nvgpu_div_to_pl(u32 div)
+{
+        return div;
+}
+#endif /* _NVHOST_CLK_GM20B_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
new file mode 100644
index 00000000..1f8cc326
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -0,0 +1,195 @@
+/*
+ * GM20B GPC MMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/fb_gk20a.h"
+#include "gm20b/fb_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
+#define VPR_INFO_FETCH_WAIT     (5)
+#define WPR_INFO_ADDR_ALIGNMENT 0x0000000c
+void fb_gm20b_init_fs_state(struct gk20a *g)
+{
+        gk20a_dbg_info("initialize gm20b fb");
+        gk20a_writel(g, fb_fbhub_num_active_ltcs_r(),
+                        g->ltc_count);
+}
+void gm20b_fb_set_mmu_page_size(struct gk20a *g)
+{
+        /* set large page size in fb */
+        u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
+        fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f();
+        gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
+}
+bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g)
+{
+        /* set large page size in fb */
+        u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
+        fb_mmu_ctrl |= fb_mmu_ctrl_use_full_comp_tag_line_true_f();
+        gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
+        return true;
+}
+unsigned int gm20b_fb_compression_page_size(struct gk20a *g)
+{
+        return SZ_128K;
+}
+unsigned int gm20b_fb_compressible_page_size(struct gk20a *g)
+{
+        return SZ_64K;
+}
+void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g)
+{
+        u32 val;
+        /* print vpr and wpr info */
+        val = gk20a_readl(g, fb_mmu_vpr_info_r());
+        val &= ~0x3;
+        val |= fb_mmu_vpr_info_index_addr_lo_v();
+        gk20a_writel(g, fb_mmu_vpr_info_r(), val);
+        nvgpu_err(g, "VPR: %08x %08x %08x %08x",
+                gk20a_readl(g, fb_mmu_vpr_info_r()),
+                gk20a_readl(g, fb_mmu_vpr_info_r()),
+                gk20a_readl(g, fb_mmu_vpr_info_r()),
+                gk20a_readl(g, fb_mmu_vpr_info_r()));
+        val = gk20a_readl(g, fb_mmu_wpr_info_r());
+        val &= ~0xf;
+        val |= (fb_mmu_wpr_info_index_allow_read_v());
+        gk20a_writel(g, fb_mmu_wpr_info_r(), val);
+        nvgpu_err(g, "WPR: %08x %08x %08x %08x %08x %08x",
+                gk20a_readl(g, fb_mmu_wpr_info_r()),
+                gk20a_readl(g, fb_mmu_wpr_info_r()),
+                gk20a_readl(g, fb_mmu_wpr_info_r()),
+                gk20a_readl(g, fb_mmu_wpr_info_r()),
+                gk20a_readl(g, fb_mmu_wpr_info_r()),
+                gk20a_readl(g, fb_mmu_wpr_info_r()));
+}
+static int gm20b_fb_vpr_info_fetch_wait(struct gk20a *g,
+                                            unsigned int msec)
+{
+        struct nvgpu_timeout timeout;
+        nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER);
+        do {
+                u32 val;
+                val = gk20a_readl(g, fb_mmu_vpr_info_r());
+                if (fb_mmu_vpr_info_fetch_v(val) ==
+                    fb_mmu_vpr_info_fetch_false_v())
+                        return 0;
+        } while (!nvgpu_timeout_expired(&timeout));
+        return -ETIMEDOUT;
+}
+int gm20b_fb_vpr_info_fetch(struct gk20a *g)
+{
+        if (gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
+                return -ETIME;
+        }
+        gk20a_writel(g, fb_mmu_vpr_info_r(),
+                        fb_mmu_vpr_info_fetch_true_v());
+        return gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
+}
+void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
+{
+        u32 val = 0;
+        u64 wpr_start = 0;
+        u64 wpr_end = 0;
+        val = gk20a_readl(g, fb_mmu_wpr_info_r());
+        val &= ~0xF;
+        val |= fb_mmu_wpr_info_index_wpr1_addr_lo_v();
+        gk20a_writel(g, fb_mmu_wpr_info_r(), val);
+        val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4;
+        wpr_start = hi32_lo32_to_u64(
+                        (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)),
+                        (val << WPR_INFO_ADDR_ALIGNMENT));
+        val = gk20a_readl(g, fb_mmu_wpr_info_r());
+        val &= ~0xF;
+        val |= fb_mmu_wpr_info_index_wpr1_addr_hi_v();
+        gk20a_writel(g, fb_mmu_wpr_info_r(), val);
+        val = gk20a_readl(g, fb_mmu_wpr_info_r()) >> 0x4;
+        wpr_end = hi32_lo32_to_u64(
+                        (val >> (32 - WPR_INFO_ADDR_ALIGNMENT)),
+                        (val << WPR_INFO_ADDR_ALIGNMENT));
+        inf->wpr_base = wpr_start;
+        inf->nonwpr_base = 0;
+        inf->size = (wpr_end - wpr_start);
+}
+bool gm20b_fb_debug_mode_enabled(struct gk20a *g)
+{
+        u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
+        return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) ==
+                gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
+}
+void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable)
+{
+        u32 reg_val, fb_debug_ctrl, gpc_debug_ctrl;
+        if (enable) {
+                fb_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
+                gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f();
+                g->mmu_debug_ctrl = true;
+        } else {
+                fb_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
+                gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f();
+                g->mmu_debug_ctrl = false;
+        }
+        reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+        reg_val = set_field(reg_val,
+                        fb_mmu_debug_ctrl_debug_m(), fb_debug_ctrl);
+        gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
+        reg_val = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
+        reg_val = set_field(reg_val,
+                        gr_gpcs_pri_mmu_debug_ctrl_debug_m(), gpc_debug_ctrl);
+        gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), reg_val);
+}
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h
new file mode 100644
index 00000000..32d36f57
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.h
@@ -0,0 +1,40 @@
+/*
+ * GM20B FB
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_GM20B_FB
+#define _NVHOST_GM20B_FB
+struct gk20a;
+void fb_gm20b_init_fs_state(struct gk20a *g);
+void gm20b_fb_set_mmu_page_size(struct gk20a *g);
+bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g);
+unsigned int gm20b_fb_compression_page_size(struct gk20a *g);
+unsigned int gm20b_fb_compressible_page_size(struct gk20a *g);
+void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g);
+void gm20b_fb_read_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf);
+int gm20b_fb_vpr_info_fetch(struct gk20a *g);
+bool gm20b_fb_debug_mode_enabled(struct gk20a *g);
+void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
new file mode 100644
index 00000000..0762e8bd
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -0,0 +1,223 @@
+/*
+ * GM20B Fifo
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/fifo_gk20a.h"
+#include "fifo_gm20b.h"
+#include <nvgpu/timers.h>
+#include <nvgpu/log.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_pbdma_gm20b.h>
+void channel_gm20b_bind(struct channel_gk20a *c)
+{
+        struct gk20a *g = c->g;
+        u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block)
+                >> ram_in_base_shift_v();
+        gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
+                c->chid, inst_ptr);
+        gk20a_writel(g, ccsr_channel_inst_r(c->chid),
+                ccsr_channel_inst_ptr_f(inst_ptr) |
+                nvgpu_aperture_mask(g, &c->inst_block,
+                 ccsr_channel_inst_target_sys_mem_ncoh_f(),
+                 ccsr_channel_inst_target_vid_mem_f()) |
+                ccsr_channel_inst_bind_true_f());
+        gk20a_writel(g, ccsr_channel_r(c->chid),
+                (gk20a_readl(g, ccsr_channel_r(c->chid)) &
+                 ~ccsr_channel_enable_set_f(~0)) |
+                 ccsr_channel_enable_set_true_f());
+        nvgpu_smp_wmb();
+        nvgpu_atomic_set(&c->bound, true);
+}
+static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
+{
+        u32 fault_id = ~0;
+        struct fifo_engine_info_gk20a *engine_info;
+        engine_info = gk20a_fifo_get_engine_info(g, engine_id);
+        if (engine_info) {
+                fault_id = engine_info->fault_id;
+        } else {
+                nvgpu_err(g, "engine_id is not in active list/invalid %d", engine_id);
+        }
+        return fault_id;
+}
+void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
+                unsigned long engine_ids)
+{
+        unsigned long delay = GR_IDLE_CHECK_DEFAULT;
+        unsigned long engine_id;
+        int ret = -EBUSY;
+        struct nvgpu_timeout timeout;
+        /* trigger faults for all bad engines */
+        for_each_set_bit(engine_id, &engine_ids, 32) {
+                if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {
+                        nvgpu_err(g, "faulting unknown engine %ld", engine_id);
+                } else {
+                        u32 mmu_id = gm20b_engine_id_to_mmu_id(g,
+                                                                engine_id);
+                        if (mmu_id != (u32)~0)
+                                gk20a_writel(g, fifo_trigger_mmu_fault_r(mmu_id),
+                                             fifo_trigger_mmu_fault_enable_f(1));
+                }
+        }
+        nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
+                           NVGPU_TIMER_CPU_TIMER);
+        /* Wait for MMU fault to trigger */
+        do {
+                if (gk20a_readl(g, fifo_intr_0_r()) &
+                                fifo_intr_0_mmu_fault_pending_f()) {
+                        ret = 0;
+                        break;
+                }
+                nvgpu_usleep_range(delay, delay * 2);
+                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+        } while (!nvgpu_timeout_expired(&timeout));
+        if (ret)
+                nvgpu_err(g, "mmu fault timeout");
+        /* release mmu fault trigger */
+        for_each_set_bit(engine_id, &engine_ids, 32)
+                gk20a_writel(g, fifo_trigger_mmu_fault_r(engine_id), 0);
+}
+u32 gm20b_fifo_get_num_fifos(struct gk20a *g)
+{
+        return ccsr_channel__size_1_v();
+}
+void gm20b_device_info_data_parse(struct gk20a *g,
+                                                u32 table_entry, u32 *inst_id,
+                                                u32 *pri_base, u32 *fault_id)
+{
+        if (top_device_info_data_type_v(table_entry) ==
+            top_device_info_data_type_enum2_v()) {
+                if (pri_base) {
+                        *pri_base =
+                                (top_device_info_data_pri_base_v(table_entry)
+                                << top_device_info_data_pri_base_align_v());
+                }
+                if (fault_id && (top_device_info_data_fault_id_v(table_entry) ==
+                        top_device_info_data_fault_id_valid_v())) {
+                        *fault_id =
+                            top_device_info_data_fault_id_enum_v(table_entry);
+                }
+        } else
+                nvgpu_err(g, "unknown device_info_data %d",
+                                top_device_info_data_type_v(table_entry));
+}
+void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)
+{
+        /*
+         * These are all errors which indicate something really wrong
+         * going on in the device.
+         */
+        f->intr.pbdma.device_fatal_0 =
+                pbdma_intr_0_memreq_pending_f() |
+                pbdma_intr_0_memack_timeout_pending_f() |
+                pbdma_intr_0_memack_extra_pending_f() |
+                pbdma_intr_0_memdat_timeout_pending_f() |
+                pbdma_intr_0_memdat_extra_pending_f() |
+                pbdma_intr_0_memflush_pending_f() |
+                pbdma_intr_0_memop_pending_f() |
+                pbdma_intr_0_lbconnect_pending_f() |
+                pbdma_intr_0_lback_timeout_pending_f() |
+                pbdma_intr_0_lback_extra_pending_f() |
+                pbdma_intr_0_lbdat_timeout_pending_f() |
+                pbdma_intr_0_lbdat_extra_pending_f() |
+                pbdma_intr_0_pri_pending_f();
+        /*
+         * These are data parsing, framing errors or others which can be
+         * recovered from with intervention... or just resetting the
+         * channel
+         */
+        f->intr.pbdma.channel_fatal_0 =
+                pbdma_intr_0_gpfifo_pending_f() |
+                pbdma_intr_0_gpptr_pending_f() |
+                pbdma_intr_0_gpentry_pending_f() |
+                pbdma_intr_0_gpcrc_pending_f() |
+                pbdma_intr_0_pbptr_pending_f() |
+                pbdma_intr_0_pbentry_pending_f() |
+                pbdma_intr_0_pbcrc_pending_f() |
+                pbdma_intr_0_method_pending_f() |
+                pbdma_intr_0_methodcrc_pending_f() |
+                pbdma_intr_0_pbseg_pending_f() |
+                pbdma_intr_0_signature_pending_f();
+        /* Can be used for sw-methods, or represents a recoverable timeout. */
+        f->intr.pbdma.restartable_0 =
+                pbdma_intr_0_device_pending_f();
+}
+static void gm20b_fifo_set_ctx_reload(struct channel_gk20a *ch)
+{
+        struct gk20a *g = ch->g;
+        u32 channel = gk20a_readl(g, ccsr_channel_r(ch->chid));
+        gk20a_writel(g, ccsr_channel_r(ch->chid),
+                channel | ccsr_channel_force_ctx_reload_true_f());
+}
+void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch)
+{
+        struct gk20a *g = ch->g;
+        struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
+        struct channel_gk20a *temp_ch;
+        /* If CTX_RELOAD is set on a channel, move it to some other channel */
+        if (gk20a_fifo_channel_status_is_ctx_reload(ch->g, ch->chid)) {
+                nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+                nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, channel_gk20a, ch_entry) {
+                        if (temp_ch->chid != ch->chid) {
+                                gm20b_fifo_set_ctx_reload(temp_ch);
+                                break;
+                        }
+                }
+                nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+        }
+}
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h
new file mode 100644
index 00000000..8d487358
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.h
@@ -0,0 +1,39 @@
+/*
+ * GM20B Fifo
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_GM20B_FIFO
+#define _NVHOST_GM20B_FIFO
+struct gk20a;
+void channel_gm20b_bind(struct channel_gk20a *c);
+void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
+                unsigned long engine_ids);
+u32 gm20b_fifo_get_num_fifos(struct gk20a *g);
+void gm20b_device_info_data_parse(struct gk20a *g,
+                                                u32 table_entry, u32 *inst_id,
+                                                u32 *pri_base, u32 *fault_id);
+void gm20b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f);
+void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c
new file mode 100644
index 00000000..0ebb2d0d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.c
@@ -0,0 +1,731 @@
+/*
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * This file is autogenerated.  Do not edit.
+ */
+#ifndef __gm20b_gating_reglist_h__
+#define __gm20b_gating_reglist_h__
+#include "gm20b_gating_reglist.h"
+#include <nvgpu/enabled.h>
+struct gating_desc {
+        u32 addr;
+        u32 prod;
+        u32 disable;
+};
+/* slcg bus */
+static const struct gating_desc gm20b_slcg_bus[] = {
+        {.addr = 0x00001c04, .prod = 0x00000000, .disable = 0x000003fe},
+};
+/* slcg ce2 */
+static const struct gating_desc gm20b_slcg_ce2[] = {
+        {.addr = 0x00106f28, .prod = 0x00000000, .disable = 0x000007fe},
+};
+/* slcg chiplet */
+static const struct gating_desc gm20b_slcg_chiplet[] = {
+        {.addr = 0x0010c07c, .prod = 0x00000000, .disable = 0x00000007},
+        {.addr = 0x0010e07c, .prod = 0x00000000, .disable = 0x00000007},
+        {.addr = 0x0010d07c, .prod = 0x00000000, .disable = 0x00000007},
+        {.addr = 0x0010e17c, .prod = 0x00000000, .disable = 0x00000007},
+};
+/* slcg fb */
+static const struct gating_desc gm20b_slcg_fb[] = {
+        {.addr = 0x00100d14, .prod = 0x00000000, .disable = 0xfffffffe},
+        {.addr = 0x00100c9c, .prod = 0x00000000, .disable = 0x000001fe},
+};
+/* slcg fifo */
+static const struct gating_desc gm20b_slcg_fifo[] = {
+        {.addr = 0x000026ac, .prod = 0x00000100, .disable = 0x0001fffe},
+};
+/* slcg gr */
+static const struct gating_desc gm20b_slcg_gr[] = {
+        {.addr = 0x004041f4, .prod = 0x00000002, .disable = 0x03fffffe},
+        {.addr = 0x0040917c, .prod = 0x00020008, .disable = 0x0003fffe},
+        {.addr = 0x00409894, .prod = 0x00000040, .disable = 0x0003fffe},
+        {.addr = 0x004078c4, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00406004, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00405864, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00405910, .prod = 0xfffffff0, .disable = 0xfffffffe},
+        {.addr = 0x00408044, .prod = 0x00000000, .disable = 0x000007fe},
+        {.addr = 0x00407004, .prod = 0x00000000, .disable = 0x0000007e},
+        {.addr = 0x0041a17c, .prod = 0x00020008, .disable = 0x0003fffe},
+        {.addr = 0x0041a894, .prod = 0x00000040, .disable = 0x0003fffe},
+        {.addr = 0x00418504, .prod = 0x00000000, .disable = 0x0007fffe},
+        {.addr = 0x0041860c, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x0041868c, .prod = 0x00000000, .disable = 0x0000001e},
+        {.addr = 0x0041871c, .prod = 0x00000000, .disable = 0x0000003e},
+        {.addr = 0x00418388, .prod = 0x00000000, .disable = 0x00000001},
+        {.addr = 0x0041882c, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00418bc0, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00418974, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00418c74, .prod = 0xffffffc0, .disable = 0xfffffffe},
+        {.addr = 0x00418cf4, .prod = 0xfffffffc, .disable = 0xfffffffe},
+        {.addr = 0x00418d74, .prod = 0xffffffe0, .disable = 0xfffffffe},
+        {.addr = 0x00418f10, .prod = 0xffffffe0, .disable = 0xfffffffe},
+        {.addr = 0x00418e10, .prod = 0xfffffffe, .disable = 0xfffffffe},
+        {.addr = 0x00419024, .prod = 0x000001fe, .disable = 0x000001fe},
+        {.addr = 0x0041889c, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00419d64, .prod = 0x00000000, .disable = 0x000001ff},
+        {.addr = 0x00419a44, .prod = 0x00000000, .disable = 0x0000000e},
+        {.addr = 0x00419a4c, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00419a54, .prod = 0x00000000, .disable = 0x0000003e},
+        {.addr = 0x00419a5c, .prod = 0x00000000, .disable = 0x0000000e},
+        {.addr = 0x00419a64, .prod = 0x00000000, .disable = 0x000001fe},
+        {.addr = 0x00419a6c, .prod = 0x00000000, .disable = 0x0000000e},
+        {.addr = 0x00419a74, .prod = 0x00000000, .disable = 0x0000000e},
+        {.addr = 0x00419a7c, .prod = 0x00000000, .disable = 0x0000003e},
+        {.addr = 0x00419a84, .prod = 0x00000000, .disable = 0x0000000e},
+        {.addr = 0x0041986c, .prod = 0x00000104, .disable = 0x00fffffe},
+        {.addr = 0x00419cd8, .prod = 0x00000000, .disable = 0x001ffffe},
+        {.addr = 0x00419ce0, .prod = 0x00000000, .disable = 0x001ffffe},
+        {.addr = 0x00419c74, .prod = 0x0000001e, .disable = 0x0000001e},
+        {.addr = 0x00419fd4, .prod = 0x00000000, .disable = 0x0003fffe},
+        {.addr = 0x00419fdc, .prod = 0xffedff00, .disable = 0xfffffffe},
+        {.addr = 0x00419fe4, .prod = 0x00001b00, .disable = 0x00001ffe},
+        {.addr = 0x00419ff4, .prod = 0x00000000, .disable = 0x00003ffe},
+        {.addr = 0x00419ffc, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x0041be2c, .prod = 0x04115fc0, .disable = 0xfffffffe},
+        {.addr = 0x0041bfec, .prod = 0xfffffff0, .disable = 0xfffffffe},
+        {.addr = 0x0041bed4, .prod = 0xfffffff6, .disable = 0xfffffffe},
+        {.addr = 0x00408814, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x0040881c, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408a84, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408a8c, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408a94, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408a9c, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408aa4, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408aac, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x004089ac, .prod = 0x00000000, .disable = 0x0001fffe},
+        {.addr = 0x00408a24, .prod = 0x00000000, .disable = 0x000001ff},
+};
+/* slcg ltc */
+static const struct gating_desc gm20b_slcg_ltc[] = {
+        {.addr = 0x0017e050, .prod = 0x00000000, .disable = 0xfffffffe},
+        {.addr = 0x0017e35c, .prod = 0x00000000, .disable = 0xfffffffe},
+};
+/* slcg perf */
+static const struct gating_desc gm20b_slcg_perf[] = {
+        {.addr = 0x001be018, .prod = 0x000001ff, .disable = 0x00000000},
+        {.addr = 0x001bc018, .prod = 0x000001ff, .disable = 0x00000000},
+        {.addr = 0x001b8018, .prod = 0x000001ff, .disable = 0x00000000},
+        {.addr = 0x001b4124, .prod = 0x00000001, .disable = 0x00000000},
+};
+/* slcg PriRing */
+static const struct gating_desc gm20b_slcg_priring[] = {
+        {.addr = 0x001200a8, .prod = 0x00000000, .disable = 0x00000001},
+};
+/* slcg pwr_csb */
+static const struct gating_desc gm20b_slcg_pwr_csb[] = {
+        {.addr = 0x0000017c, .prod = 0x00020008, .disable = 0x0003fffe},
+        {.addr = 0x00000e74, .prod = 0x00000000, .disable = 0x0000000f},
+        {.addr = 0x00000a74, .prod = 0x00000000, .disable = 0x00007ffe},
+        {.addr = 0x000016b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+/* slcg pmu */
+static const struct gating_desc gm20b_slcg_pmu[] = {
+        {.addr = 0x0010a17c, .prod = 0x00020008, .disable = 0x0003fffe},
+        {.addr = 0x0010aa74, .prod = 0x00000000, .disable = 0x00007ffe},
+        {.addr = 0x0010ae74, .prod = 0x00000000, .disable = 0x0000000f},
+};
+/* therm gr */
+static const struct gating_desc gm20b_slcg_therm[] = {
+        {.addr = 0x000206b8, .prod = 0x00000000, .disable = 0x0000000f},
+};
+/* slcg Xbar */
+static const struct gating_desc gm20b_slcg_xbar[] = {
+        {.addr = 0x0013cbe4, .prod = 0x00000000, .disable = 0x1ffffffe},
+        {.addr = 0x0013cc04, .prod = 0x00000000, .disable = 0x1ffffffe},
+};
+/* blcg bus */
+static const struct gating_desc gm20b_blcg_bus[] = {
+        {.addr = 0x00001c00, .prod = 0x00000042, .disable = 0x00000000},
+};
+/* blcg ctxsw prog */
+static const struct gating_desc gm20b_blcg_ctxsw_prog[] = {
+};
+/* blcg fb */
+static const struct gating_desc gm20b_blcg_fb[] = {
+        {.addr = 0x00100d10, .prod = 0x0000c242, .disable = 0x00000000},
+        {.addr = 0x00100d30, .prod = 0x0000c242, .disable = 0x00000000},
+        {.addr = 0x00100d3c, .prod = 0x00000242, .disable = 0x00000000},
+        {.addr = 0x00100d48, .prod = 0x0000c242, .disable = 0x00000000},
+        {.addr = 0x00100c98, .prod = 0x00000242, .disable = 0x00000000},
+};
+/* blcg fifo */
+static const struct gating_desc gm20b_blcg_fifo[] = {
+        {.addr = 0x000026a4, .prod = 0x0000c242, .disable = 0x00000000},
+};
+/* blcg gr */
+static const struct gating_desc gm20b_blcg_gr[] = {
+        {.addr = 0x004041f0, .prod = 0x00004046, .disable = 0x00000000},
+        {.addr = 0x00409890, .prod = 0x0000007f, .disable = 0x00000000},
+        {.addr = 0x004098b0, .prod = 0x0000007f, .disable = 0x00000000},
+        {.addr = 0x004078c0, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00406000, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00405860, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x0040590c, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00408040, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00407000, .prod = 0x00004041, .disable = 0x00000000},
+        {.addr = 0x00405bf0, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x0041a890, .prod = 0x0000007f, .disable = 0x00000000},
+        {.addr = 0x0041a8b0, .prod = 0x0000007f, .disable = 0x00000000},
+        {.addr = 0x00418500, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00418608, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00418688, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00418718, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00418828, .prod = 0x00000044, .disable = 0x00000000},
+        {.addr = 0x00418bbc, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00418970, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00418c70, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00418cf0, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00418d70, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00418f0c, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00418e0c, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00419020, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419038, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00418898, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00419a40, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00419a48, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a50, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a58, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a60, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a68, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a70, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a78, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419a80, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00419868, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00419cd4, .prod = 0x00000002, .disable = 0x00000000},
+        {.addr = 0x00419cdc, .prod = 0x00000002, .disable = 0x00000000},
+        {.addr = 0x00419c70, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00419fd0, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00419fd8, .prod = 0x00004046, .disable = 0x00000000},
+        {.addr = 0x00419fe0, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00419fe8, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x00419ff0, .prod = 0x00004045, .disable = 0x00000000},
+        {.addr = 0x00419ff8, .prod = 0x00000002, .disable = 0x00000000},
+        {.addr = 0x00419f90, .prod = 0x00000002, .disable = 0x00000000},
+        {.addr = 0x0041be28, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x0041bfe8, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x0041bed0, .prod = 0x00004044, .disable = 0x00000000},
+        {.addr = 0x00408810, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408818, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408a80, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408a88, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408a90, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408a98, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408aa0, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x00408aa8, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x004089a8, .prod = 0x00004042, .disable = 0x00000000},
+        {.addr = 0x004089b0, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x004089b8, .prod = 0x00004042, .disable = 0x00000000},
+};
+/* blcg ltc */
+static const struct gating_desc gm20b_blcg_ltc[] = {
+        {.addr = 0x0017e030, .prod = 0x00000044, .disable = 0x00000000},
+        {.addr = 0x0017e040, .prod = 0x00000044, .disable = 0x00000000},
+        {.addr = 0x0017e3e0, .prod = 0x00000044, .disable = 0x00000000},
+        {.addr = 0x0017e3c8, .prod = 0x00000044, .disable = 0x00000000},
+};
+/* blcg pwr_csb  */
+static const struct gating_desc gm20b_blcg_pwr_csb[] = {
+        {.addr = 0x00000a70, .prod = 0x00000045, .disable = 0x00000000},
+};
+/* blcg pmu */
+static const struct gating_desc gm20b_blcg_pmu[] = {
+        {.addr = 0x0010aa70, .prod = 0x00000045, .disable = 0x00000000},
+};
+/* blcg Xbar */
+static const struct gating_desc gm20b_blcg_xbar[] = {
+        {.addr = 0x0013cbe0, .prod = 0x00000042, .disable = 0x00000000},
+        {.addr = 0x0013cc00, .prod = 0x00000042, .disable = 0x00000000},
+};
+/* pg gr */
+static const struct gating_desc gm20b_pg_gr[] = {
+};
+/* inline functions */
+void gm20b_slcg_bus_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_bus) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_bus[i].addr,
+                                gm20b_slcg_bus[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_bus[i].addr,
+                                 gm20b_slcg_bus[i].disable);
+        }
+}
+void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_ce2) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_ce2[i].addr,
+                                gm20b_slcg_ce2[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_ce2[i].addr,
+                                 gm20b_slcg_ce2[i].disable);
+        }
+}
+void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_chiplet) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_chiplet[i].addr,
+                                gm20b_slcg_chiplet[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_chiplet[i].addr,
+                                 gm20b_slcg_chiplet[i].disable);
+        }
+}
+void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+}
+void gm20b_slcg_fb_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_fb) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_fb[i].addr,
+                                gm20b_slcg_fb[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_fb[i].addr,
+                                 gm20b_slcg_fb[i].disable);
+        }
+}
+void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_fifo) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_fifo[i].addr,
+                                gm20b_slcg_fifo[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_fifo[i].addr,
+                                 gm20b_slcg_fifo[i].disable);
+        }
+}
+void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_gr) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_gr[i].addr,
+                                gm20b_slcg_gr[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_gr[i].addr,
+                                 gm20b_slcg_gr[i].disable);
+        }
+}
+void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_ltc) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_ltc[i].addr,
+                                gm20b_slcg_ltc[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_ltc[i].addr,
+                                gm20b_slcg_ltc[i].disable);
+        }
+}
+void gm20b_slcg_perf_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_perf) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_perf[i].addr,
+                                gm20b_slcg_perf[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_perf[i].addr,
+                                gm20b_slcg_perf[i].disable);
+        }
+}
+void gm20b_slcg_priring_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_priring) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_priring[i].addr,
+                                gm20b_slcg_priring[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_priring[i].addr,
+                                gm20b_slcg_priring[i].disable);
+        }
+}
+void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_pwr_csb) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr,
+                                gm20b_slcg_pwr_csb[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_pwr_csb[i].addr,
+                                gm20b_slcg_pwr_csb[i].disable);
+        }
+}
+void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_pmu) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_pmu[i].addr,
+                                gm20b_slcg_pmu[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_pmu[i].addr,
+                                gm20b_slcg_pmu[i].disable);
+        }
+}
+void gm20b_slcg_therm_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_therm) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_therm[i].addr,
+                                gm20b_slcg_therm[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_therm[i].addr,
+                                gm20b_slcg_therm[i].disable);
+        }
+}
+void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_slcg_xbar) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_SLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_slcg_xbar[i].addr,
+                                gm20b_slcg_xbar[i].prod);
+                else
+                        gk20a_writel(g, gm20b_slcg_xbar[i].addr,
+                                gm20b_slcg_xbar[i].disable);
+        }
+}
+void gm20b_blcg_bus_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_bus) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_bus[i].addr,
+                                gm20b_blcg_bus[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_bus[i].addr,
+                                gm20b_blcg_bus[i].disable);
+        }
+}
+void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_ctxsw_prog) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr,
+                                gm20b_blcg_ctxsw_prog[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_ctxsw_prog[i].addr,
+                                gm20b_blcg_ctxsw_prog[i].disable);
+        }
+}
+void gm20b_blcg_fb_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_fb) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_fb[i].addr,
+                                gm20b_blcg_fb[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_fb[i].addr,
+                                gm20b_blcg_fb[i].disable);
+        }
+}
+void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_fifo) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_fifo[i].addr,
+                                gm20b_blcg_fifo[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_fifo[i].addr,
+                                gm20b_blcg_fifo[i].disable);
+        }
+}
+void gm20b_blcg_gr_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_gr) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_gr[i].addr,
+                                gm20b_blcg_gr[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_gr[i].addr,
+                                gm20b_blcg_gr[i].disable);
+        }
+}
+void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_ltc) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_ltc[i].addr,
+                                gm20b_blcg_ltc[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_ltc[i].addr,
+                                gm20b_blcg_ltc[i].disable);
+        }
+}
+void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_pwr_csb) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr,
+                                gm20b_blcg_pwr_csb[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_pwr_csb[i].addr,
+                                gm20b_blcg_pwr_csb[i].disable);
+        }
+}
+void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_pmu) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_pmu[i].addr,
+                                gm20b_blcg_pmu[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_pmu[i].addr,
+                                gm20b_blcg_pmu[i].disable);
+        }
+}
+void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_blcg_xbar) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_blcg_xbar[i].addr,
+                                gm20b_blcg_xbar[i].prod);
+                else
+                        gk20a_writel(g, gm20b_blcg_xbar[i].addr,
+                                gm20b_blcg_xbar[i].disable);
+        }
+}
+void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g,
+        bool prod)
+{
+        u32 i;
+        u32 size = sizeof(gm20b_pg_gr) / sizeof(struct gating_desc);
+        if (!nvgpu_is_enabled(g, NVGPU_GPU_CAN_BLCG))
+                return;
+        for (i = 0; i < size; i++) {
+                if (prod)
+                        gk20a_writel(g, gm20b_pg_gr[i].addr,
+                                gm20b_pg_gr[i].prod);
+                else
+                        gk20a_writel(g, gm20b_pg_gr[i].addr,
+                                gm20b_pg_gr[i].disable);
+        }
+}
+#endif /* __gm20b_gating_reglist_h__ */
diff --git a/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h
new file mode 100644
index 00000000..557f5689
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gm20b_gating_reglist.h
@@ -0,0 +1,100 @@
+/*
+ * drivers/video/tegra/host/gm20b/gm20b_gating_reglist.h
+ *
+ * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * This file is autogenerated.  Do not edit.
+ */
+#include "gk20a/gk20a.h"
+void gm20b_slcg_bus_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_ce2_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_chiplet_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_fb_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_fifo_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gr_gm20b_slcg_gr_load_gating_prod(struct gk20a *g,
+        bool prod);
+void ltc_gm20b_slcg_ltc_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_perf_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_priring_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_pwr_csb_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_pmu_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_therm_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_slcg_xbar_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_bus_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_ctxsw_firmware_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_fb_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_fifo_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_gr_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_ltc_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_pwr_csb_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_pmu_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gm20b_blcg_xbar_load_gating_prod(struct gk20a *g,
+        bool prod);
+void gr_gm20b_pg_gr_load_gating_prod(struct gk20a *g,
+        bool prod);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
new file mode 100644
index 00000000..cd7433b3
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.c
@@ -0,0 +1,72 @@
+/*
+ * drivers/video/tegra/host/gm20b/gr_ctx_gm20b.c
+ *
+ * GM20B Graphics Context
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "gr_ctx_gm20b.h"
+int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name)
+{
+        switch (index) {
+#ifdef GM20B_NETLIST_IMAGE_FW_NAME
+        case NETLIST_FINAL:
+                sprintf(name, GM20B_NETLIST_IMAGE_FW_NAME);
+                return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_A
+        case NETLIST_SLOT_A:
+                sprintf(name, GK20A_NETLIST_IMAGE_A);
+                return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_B
+        case NETLIST_SLOT_B:
+                sprintf(name, GK20A_NETLIST_IMAGE_B);
+                return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_C
+        case NETLIST_SLOT_C:
+                sprintf(name, GK20A_NETLIST_IMAGE_C);
+                return 0;
+#endif
+#ifdef GK20A_NETLIST_IMAGE_D
+        case NETLIST_SLOT_D:
+                sprintf(name, GK20A_NETLIST_IMAGE_D);
+                return 0;
+#endif
+        default:
+                return -1;
+        }
+        return -1;
+}
+bool gr_gm20b_is_firmware_defined(void)
+{
+#ifdef GM20B_NETLIST_IMAGE_FW_NAME
+        return true;
+#else
+        return false;
+#endif
+}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h
new file mode 100644
index 00000000..8712b353
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_ctx_gm20b.h
@@ -0,0 +1,36 @@
+/*
+ * GM20B Graphics Context
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __GR_CTX_GM20B_H__
+#define __GR_CTX_GM20B_H__
+#include "gk20a/gr_ctx_gk20a.h"
+/* production netlist, one and only one from below */
+/*#undef GM20B_NETLIST_IMAGE_FW_NAME*/
+#define GM20B_NETLIST_IMAGE_FW_NAME GK20A_NETLIST_IMAGE_B
+int gr_gm20b_get_netlist_name(struct gk20a *g, int index, char *name);
+bool gr_gm20b_is_firmware_defined(void);
+#endif /*__GR_CTX_GM20B_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
new file mode 100644
index 00000000..ef46c1ee
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -0,0 +1,1527 @@
+/*
+ * GM20B GPC MMU
+ *
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/fuse.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "common/linux/os_linux.h"
+#include "gr_gm20b.h"
+#include "pmu_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
+void gr_gm20b_init_gpc_mmu(struct gk20a *g)
+{
+        u32 temp;
+        gk20a_dbg_info("initialize gpc mmu");
+        if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                /* Bypass MMU check for non-secure boot. For
+                 * secure-boot,this register write has no-effect */
+                gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
+        }
+        temp = gk20a_readl(g, fb_mmu_ctrl_r());
+        temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
+                gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
+                gr_gpcs_pri_mmu_ctrl_use_full_comp_tag_line_m() |
+                gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
+                gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
+                gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
+                gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
+                gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
+                gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
+                gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
+        gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
+        gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
+        gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
+        gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
+                        gk20a_readl(g, fb_mmu_debug_ctrl_r()));
+        gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
+                        gk20a_readl(g, fb_mmu_debug_wr_r()));
+        gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
+                        gk20a_readl(g, fb_mmu_debug_rd_r()));
+        gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(),
+                gk20a_readl(g, fb_fbhub_num_active_ltcs_r()));
+}
+void gr_gm20b_bundle_cb_defaults(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        gr->bundle_cb_default_size =
+                gr_scc_bundle_cb_size_div_256b__prod_v();
+        gr->min_gpm_fifo_depth =
+                gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
+        gr->bundle_cb_token_limit =
+                gr_pd_ab_dist_cfg2_token_limit_init_v();
+}
+void gr_gm20b_cb_size_default(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        if (!gr->attrib_cb_default_size)
+                gr->attrib_cb_default_size =
+                        gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
+        gr->alpha_cb_default_size =
+                gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
+}
+int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        int size;
+        gr->attrib_cb_size = gr->attrib_cb_default_size
+                + (gr->attrib_cb_default_size >> 1);
+        gr->alpha_cb_size = gr->alpha_cb_default_size
+                + (gr->alpha_cb_default_size >> 1);
+        size = gr->attrib_cb_size *
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v() *
+                gr->max_tpc_count;
+        size += gr->alpha_cb_size *
+                gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() *
+                gr->max_tpc_count;
+        return size;
+}
+void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
+                                      struct channel_ctx_gk20a *ch_ctx,
+                                      u64 addr, bool patch)
+{
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
+                gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
+                gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
+                gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
+                gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
+                gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
+                gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
+}
+void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
+                                            struct channel_ctx_gk20a *ch_ctx,
+                                            u64 addr, u64 size, bool patch)
+{
+        u32 data;
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(),
+                gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(),
+                gr_scc_bundle_cb_size_div_256b_f(size) |
+                gr_scc_bundle_cb_size_valid_true_f(), patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
+                gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
+                gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
+                gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
+        /* data for state_limit */
+        data = (g->gr.bundle_cb_default_size *
+                gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
+                gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
+        data = min_t(u32, data, g->gr.min_gpm_fifo_depth);
+        gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
+                   g->gr.bundle_cb_token_limit, data);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(),
+                gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
+                gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
+}
+int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
+                        struct channel_gk20a *c, bool patch)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        u32 attrib_offset_in_chunk = 0;
+        u32 alpha_offset_in_chunk = 0;
+        u32 pd_ab_max_output;
+        u32 gpc_index, ppc_index;
+        u32 cbm_cfg_size1, cbm_cfg_size2;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
+        u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
+                        GPU_LIT_NUM_PES_PER_GPC);
+        gk20a_dbg_fn("");
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
+                gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
+                gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
+                patch);
+        pd_ab_max_output = (gr->alpha_cb_default_size *
+                gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) /
+                gr_pd_ab_dist_cfg1_max_output_granularity_v();
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(),
+                gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+                gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
+        alpha_offset_in_chunk = attrib_offset_in_chunk +
+                gr->tpc_count * gr->attrib_cb_size;
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                u32 temp = gpc_stride * gpc_index;
+                u32 temp2 = num_pes_per_gpc * gpc_index;
+                for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+                     ppc_index++) {
+                        cbm_cfg_size1 = gr->attrib_cb_default_size *
+                                gr->pes_tpc_count[ppc_index][gpc_index];
+                        cbm_cfg_size2 = gr->alpha_cb_default_size *
+                                gr->pes_tpc_count[ppc_index][gpc_index];
+                        gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
+                                ppc_in_gpc_stride * ppc_index,
+                                cbm_cfg_size1, patch);
+                        gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
+                                ppc_in_gpc_stride * ppc_index,
+                                attrib_offset_in_chunk, patch);
+                        attrib_offset_in_chunk += gr->attrib_cb_size *
+                                gr->pes_tpc_count[ppc_index][gpc_index];
+                        gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
+                                ppc_in_gpc_stride * ppc_index,
+                                cbm_cfg_size2, patch);
+                        gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
+                                ppc_in_gpc_stride * ppc_index,
+                                alpha_offset_in_chunk, patch);
+                        alpha_offset_in_chunk += gr->alpha_cb_size *
+                                gr->pes_tpc_count[ppc_index][gpc_index];
+                        gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
+                                gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size1) |
+                                gr_gpcs_swdx_tc_beta_cb_size_div3_f(cbm_cfg_size1/3),
+                                patch);
+                }
+        }
+        return 0;
+}
+void gr_gm20b_commit_global_pagepool(struct gk20a *g,
+                                            struct channel_ctx_gk20a *ch_ctx,
+                                            u64 addr, u32 size, bool patch)
+{
+        gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
+        gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_rm_pagepool_r(),
+                gr_gpcs_swdx_rm_pagepool_total_pages_f(size) |
+                gr_gpcs_swdx_rm_pagepool_valid_true_f(), patch);
+}
+void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data)
+{
+        u32 val;
+        gk20a_dbg_fn("");
+        val = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
+        val = set_field(val, gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
+                             gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(data));
+        gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), val);
+        gk20a_dbg_fn("done");
+}
+int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
+                                          u32 class_num, u32 offset, u32 data)
+{
+        gk20a_dbg_fn("");
+        if (class_num == MAXWELL_COMPUTE_B) {
+                switch (offset << 2) {
+                case NVB1C0_SET_SHADER_EXCEPTIONS:
+                        gk20a_gr_set_shader_exceptions(g, data);
+                        break;
+                case NVB1C0_SET_RD_COALESCE:
+                        gr_gm20b_set_rd_coalesce(g, data);
+                        break;
+                default:
+                        goto fail;
+                }
+        }
+        if (class_num == MAXWELL_B) {
+                switch (offset << 2) {
+                case NVB197_SET_SHADER_EXCEPTIONS:
+                        gk20a_gr_set_shader_exceptions(g, data);
+                        break;
+                case NVB197_SET_CIRCULAR_BUFFER_SIZE:
+                        g->ops.gr.set_circular_buffer_size(g, data);
+                        break;
+                case NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
+                        g->ops.gr.set_alpha_circular_buffer_size(g, data);
+                        break;
+                case NVB197_SET_RD_COALESCE:
+                        gr_gm20b_set_rd_coalesce(g, data);
+                        break;
+                default:
+                        goto fail;
+                }
+        }
+        return 0;
+fail:
+        return -EINVAL;
+}
+void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc_index, ppc_index, stride, val;
+        u32 pd_ab_max_output;
+        u32 alpha_cb_size = data * 4;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
+        gk20a_dbg_fn("");
+        /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
+                return; */
+        if (alpha_cb_size > gr->alpha_cb_size)
+                alpha_cb_size = gr->alpha_cb_size;
+        gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
+                (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
+                 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
+                 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
+        pd_ab_max_output = alpha_cb_size *
+                gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() /
+                gr_pd_ab_dist_cfg1_max_output_granularity_v();
+        gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
+                gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
+                gr_pd_ab_dist_cfg1_max_batches_init_f());
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                stride = gpc_stride * gpc_index;
+                for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+                        ppc_index++) {
+                        val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
+                                stride +
+                                ppc_in_gpc_stride * ppc_index);
+                        val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
+                                        gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
+                                                gr->pes_tpc_count[ppc_index][gpc_index]));
+                        gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
+                                stride +
+                                ppc_in_gpc_stride * ppc_index, val);
+                }
+        }
+}
+void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc_index, ppc_index, stride, val;
+        u32 cb_size = data * 4;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
+        gk20a_dbg_fn("");
+        if (cb_size > gr->attrib_cb_size)
+                cb_size = gr->attrib_cb_size;
+        gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
+                (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
+                 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
+                 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
+        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+                stride = gpc_stride * gpc_index;
+                for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
+                        ppc_index++) {
+                        val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
+                                stride +
+                                ppc_in_gpc_stride * ppc_index);
+                        val = set_field(val,
+                                gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
+                                gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
+                                        gr->pes_tpc_count[ppc_index][gpc_index]));
+                        gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
+                                stride +
+                                ppc_in_gpc_stride * ppc_index, val);
+                        val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
+                                                ppc_index + gpc_index));
+                        val = set_field(val,
+                                gr_gpcs_swdx_tc_beta_cb_size_v_m(),
+                                gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size *
+                                        gr->gpc_ppc_count[gpc_index]));
+                        val = set_field(val,
+                                gr_gpcs_swdx_tc_beta_cb_size_div3_m(),
+                                gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size *
+                                        gr->gpc_ppc_count[gpc_index])/3));
+                        gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
+                                                ppc_index + gpc_index), val);
+                }
+        }
+}
+void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g)
+{
+        /* setup sm warp esr report masks */
+        gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_mmu_fault_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_overflow_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() |
+                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f());
+        /* setup sm global esr report mask */
+        gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
+                gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() |
+                gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f());
+}
+bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
+{
+        bool valid = false;
+        switch (class_num) {
+        case MAXWELL_COMPUTE_B:
+        case MAXWELL_B:
+        case FERMI_TWOD_A:
+        case KEPLER_DMA_COPY_A:
+        case MAXWELL_DMA_COPY_A:
+                valid = true;
+                break;
+        default:
+                break;
+        }
+        return valid;
+}
+bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
+{
+        if (class_num == MAXWELL_B)
+                return true;
+        else
+                return false;
+}
+bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
+{
+        if (class_num == MAXWELL_COMPUTE_B)
+                return true;
+        else
+                return false;
+}
+/* Following are the blocks of registers that the ucode
+ stores in the extended region.*/
+/* ==  ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
+static const u32 _num_sm_dsm_perf_regs;
+/* ==  ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/
+static const u32 _num_sm_dsm_perf_ctrl_regs = 2;
+static u32 *_sm_dsm_perf_regs;
+static u32 _sm_dsm_perf_ctrl_regs[2];
+void gr_gm20b_init_sm_dsm_reg_info(void)
+{
+        if (_sm_dsm_perf_ctrl_regs[0] != 0)
+                return;
+        _sm_dsm_perf_ctrl_regs[0] =
+                              gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r();
+        _sm_dsm_perf_ctrl_regs[1] =
+                              gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r();
+}
+void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
+                                          u32 *num_sm_dsm_perf_regs,
+                                          u32 **sm_dsm_perf_regs,
+                                          u32 *perf_register_stride)
+{
+        *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
+        *sm_dsm_perf_regs = _sm_dsm_perf_regs;
+        *perf_register_stride = 0;
+}
+void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
+                                               u32 *num_sm_dsm_perf_ctrl_regs,
+                                               u32 **sm_dsm_perf_ctrl_regs,
+                                               u32 *ctrl_register_stride)
+{
+        *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
+        *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
+        *ctrl_register_stride =
+            ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
+}
+u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+{
+        u32 val;
+        struct gr_gk20a *gr = &g->gr;
+        /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
+        val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index));
+        return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
+}
+void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
+{
+        nvgpu_tegra_fuse_write_bypass(g, 0x1);
+        nvgpu_tegra_fuse_write_access_sw(g, 0x0);
+        if (g->gr.gpc_tpc_mask[gpc_index] == 0x1) {
+                nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
+                nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1);
+        } else if (g->gr.gpc_tpc_mask[gpc_index] == 0x2) {
+                nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1);
+                nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
+        } else {
+                nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0);
+                nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0);
+        }
+}
+void gr_gm20b_load_tpc_mask(struct gk20a *g)
+{
+        u32 pes_tpc_mask = 0, fuse_tpc_mask;
+        u32 gpc, pes;
+        u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
+        for (gpc = 0; gpc < g->gr.gpc_count; gpc++)
+                for (pes = 0; pes < g->gr.pe_count_per_gpc; pes++) {
+                        pes_tpc_mask |= g->gr.pes_tpc_mask[pes][gpc] <<
+                                        num_tpc_per_gpc * gpc;
+                }
+        fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
+        if (g->tpc_fs_mask_user && g->tpc_fs_mask_user != fuse_tpc_mask &&
+                fuse_tpc_mask == (0x1U << g->gr.max_tpc_count) - 1U) {
+                u32 val = g->tpc_fs_mask_user;
+                val &= (0x1U << g->gr.max_tpc_count) - 1U;
+                /* skip tpc to disable the other tpc cause channel timeout */
+                val = (0x1U << hweight32(val)) - 1U;
+                gk20a_writel(g, gr_fe_tpc_fs_r(), val);
+        } else {
+                gk20a_writel(g, gr_fe_tpc_fs_r(), pes_tpc_mask);
+        }
+}
+void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
+                                             u32 gpc, u32 tpc, u32 smid)
+{
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        u32 gpc_offset = gpc_stride * gpc;
+        u32 tpc_offset = tpc_in_gpc_stride * tpc;
+        gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
+                        gr_gpc0_tpc0_sm_cfg_sm_id_f(smid));
+        gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
+                        gr_gpc0_gpm_pd_sm_id_id_f(smid));
+        gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
+                        gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
+}
+int gr_gm20b_load_smid_config(struct gk20a *g)
+{
+        u32 *tpc_sm_id;
+        u32 i, j;
+        u32 tpc_index, gpc_index;
+        tpc_sm_id = nvgpu_kcalloc(g, gr_cwd_sm_id__size_1_v(), sizeof(u32));
+        if (!tpc_sm_id)
+                return -ENOMEM;
+        /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
+        for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
+                u32 reg = 0;
+                u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
+                                 gr_cwd_gpc_tpc_id_tpc0_s();
+                for (j = 0; j < 4; j++) {
+                        u32 sm_id = (i * 4) + j;
+                        u32 bits;
+                        if (sm_id >= g->gr.tpc_count)
+                                break;
+                        gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
+                        tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
+                        bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
+                               gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
+                        reg |= bits << (j * bit_stride);
+                        tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride;
+                }
+                gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
+        }
+        for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
+                gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
+        nvgpu_kfree(g, tpc_sm_id);
+        return 0;
+}
+int gr_gm20b_init_fs_state(struct gk20a *g)
+{
+        int err = 0;
+        gk20a_dbg_fn("");
+        err = gr_gk20a_init_fs_state(g);
+        if (err)
+                return err;
+        g->ops.gr.load_tpc_mask(g);
+        gk20a_writel(g, gr_bes_zrop_settings_r(),
+                     gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count));
+        gk20a_writel(g, gr_bes_crop_settings_r(),
+                     gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count));
+        gk20a_writel(g, gr_bes_crop_debug3_r(),
+                     gk20a_readl(g, gr_be0_crop_debug3_r()) |
+                     gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
+        g->ops.gr.load_smid_config(g);
+        return err;
+}
+int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
+        struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
+{
+        gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
+                        gr_fecs_dmactl_require_ctx_f(0));
+        /* Copy falcon bootloader into dmem */
+        gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
+        gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
+        /* start the falcon immediately if PRIV security is disabled*/
+        if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
+                                gr_fecs_cpuctl_startcpu_f(0x01));
+        }
+        return 0;
+}
+static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
+{
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
+        return (addr >= tpc_in_gpc_shared_base) &&
+                (addr < (tpc_in_gpc_shared_base +
+                         tpc_in_gpc_stride));
+}
+bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
+{
+        u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
+        return ((addr >= tpc_in_gpc_base) &&
+                (addr < tpc_in_gpc_base +
+                 (num_tpc_per_gpc * tpc_in_gpc_stride)))
+                || gr_gm20b_is_tpc_addr_shared(g, addr);
+}
+u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
+{
+        u32 i, start;
+        u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
+        u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        for (i = 0; i < num_tpcs; i++) {
+                start = tpc_in_gpc_base + (i * tpc_in_gpc_stride);
+                if ((addr >= start) &&
+                    (addr < (start + tpc_in_gpc_stride)))
+                        return i;
+        }
+        return 0;
+}
+#ifdef CONFIG_TEGRA_ACR
+static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
+{
+        struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
+        u64 addr_base = ucode_info->surface_desc.gpu_va;
+        gr_gk20a_load_falcon_bind_instblk(g);
+        g->ops.gr.falcon_load_ucode(g, addr_base,
+                &g->ctxsw_ucode_info.gpccs,
+                gr_gpcs_gpccs_falcon_hwcfg_r() -
+                gr_fecs_falcon_hwcfg_r());
+}
+int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
+{
+        u32 err, flags;
+        u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
+          gr_fecs_falcon_hwcfg_r();
+        u8 falcon_id_mask = 0;
+        gk20a_dbg_fn("");
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
+                        gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
+                gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
+                        gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
+        }
+        flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+        g->pmu_lsf_loaded_falcon_id = 0;
+        if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
+                /* this must be recovery so bootstrap fecs and gpccs */
+                if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
+                        gr_gm20b_load_gpccs_with_bootloader(g);
+                        err = g->ops.pmu.load_lsfalcon_ucode(g,
+                                        (1 << LSF_FALCON_ID_FECS));
+                } else {
+                        /* bind WPR VA inst block */
+                        gr_gk20a_load_falcon_bind_instblk(g);
+                        err = g->ops.pmu.load_lsfalcon_ucode(g,
+                                (1 << LSF_FALCON_ID_FECS) |
+                                (1 << LSF_FALCON_ID_GPCCS));
+                }
+                if (err) {
+                        nvgpu_err(g, "Unable to recover GR falcon");
+                        return err;
+                }
+        } else {
+                /* cold boot or rg exit */
+                __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
+                if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
+                        gr_gm20b_load_gpccs_with_bootloader(g);
+                } else {
+                        /* bind WPR VA inst block */
+                        gr_gk20a_load_falcon_bind_instblk(g);
+                        if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_FECS))
+                                falcon_id_mask |= (1 << LSF_FALCON_ID_FECS);
+                        if (g->ops.pmu.is_lazy_bootstrap(LSF_FALCON_ID_GPCCS))
+                                falcon_id_mask |= (1 << LSF_FALCON_ID_GPCCS);
+                        err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask);
+                        if (err) {
+                                nvgpu_err(g, "Unable to boot GPCCS");
+                                return err;
+                        }
+                }
+        }
+        /*start gpccs */
+        if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
+                gk20a_writel(g, reg_offset +
+                        gr_fecs_cpuctl_alias_r(),
+                        gr_gpccs_cpuctl_startcpu_f(1));
+        } else {
+                gk20a_writel(g, gr_gpccs_dmactl_r(),
+                        gr_gpccs_dmactl_require_ctx_f(0));
+                gk20a_writel(g, gr_gpccs_cpuctl_r(),
+                        gr_gpccs_cpuctl_startcpu_f(1));
+        }
+        /* start fecs */
+        gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
+        gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1);
+        gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff);
+        gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
+                        gr_fecs_cpuctl_startcpu_f(1));
+        gk20a_dbg_fn("done");
+        return 0;
+}
+#else
+int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
+{
+        return -EPERM;
+}
+#endif
+void gr_gm20b_detect_sm_arch(struct gk20a *g)
+{
+        u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+        g->params.sm_arch_spa_version =
+                gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+        g->params.sm_arch_sm_version =
+                gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+        g->params.sm_arch_warp_count =
+                gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
+{
+        return gr_scc_pagepool_total_pages_hwmax_value_v();
+}
+int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
+                          struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+                          u32 class,
+                          u32 flags)
+{
+        int err;
+        gk20a_dbg_fn("");
+        err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
+        if (err)
+                return err;
+        if (class == MAXWELL_COMPUTE_B)
+                (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
+        gk20a_dbg_fn("done");
+        return 0;
+}
+void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
+                struct channel_ctx_gk20a *ch_ctx,
+                struct nvgpu_mem *mem)
+{
+        struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+        u32 cta_preempt_option =
+                ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
+        gk20a_dbg_fn("");
+        if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
+                gk20a_dbg_info("CTA: %x", cta_preempt_option);
+                nvgpu_mem_wr(g, mem,
+                                ctxsw_prog_main_image_preemption_options_o(),
+                                cta_preempt_option);
+        }
+        gk20a_dbg_fn("done");
+}
+int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
+                           struct gk20a_debug_output *o)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 gr_engine_id;
+        gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+        gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
+                gk20a_readl(g, gr_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
+                gk20a_readl(g, gr_status_1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n",
+                gk20a_readl(g, gr_status_2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
+                gk20a_readl(g, gr_engine_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
+                gk20a_readl(g, gr_gpfifo_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
+                gk20a_readl(g, gr_gpfifo_ctl_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
+                gk20a_readl(g, gr_fecs_host_int_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION  : 0x%x\n",
+                gk20a_readl(g, gr_exception_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR  : 0x%x\n",
+                gk20a_readl(g, gr_fecs_intr_r()));
+        gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
+                gk20a_readl(g, fifo_engine_status_r(gr_engine_id)));
+        gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
+                gk20a_readl(g, gr_activity_1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n",
+                gk20a_readl(g, gr_activity_2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
+                gk20a_readl(g, gr_activity_4_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
+                gk20a_readl(g, gr_pri_sked_activity_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
+        if (gr->gpc_tpc_count[0] == 2)
+                gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                        gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
+        if (gr->gpc_tpc_count[0] == 2)
+                gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                        gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
+                gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
+                gk20a_readl(g, gr_pri_ds_mpipe_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n",
+                gk20a_readl(g, gr_pri_fe_go_idle_on_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
+                gk20a_readl(g, gr_fe_go_idle_timeout_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n",
+                gk20a_readl(g, gr_pri_fe_go_idle_check_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
+                gk20a_readl(g, gr_pri_fe_go_idle_info_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
+                gk20a_readl(g, gr_cwd_fs_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS: 0x%x\n",
+                gk20a_readl(g, gr_fe_tpc_fs_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID(0): 0x%x\n",
+                gk20a_readl(g, gr_cwd_gpc_tpc_id_r(0)));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
+                gk20a_readl(g, gr_cwd_sm_id_r(0)));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
+                gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
+                gk20a_readl(g, gr_fecs_ctxsw_status_1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
+                gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
+                gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
+                gk20a_readl(g, gr_fecs_ctxsw_idlestate_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
+                gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
+                gk20a_readl(g, gr_fecs_current_ctx_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
+                gk20a_readl(g, gr_fecs_new_ctx_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_crop_status1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n",
+                gk20a_readl(g, gr_pri_bes_crop_status1_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_zrop_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_zrop_status2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n",
+                gk20a_readl(g, gr_pri_bes_zrop_status_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n",
+                gk20a_readl(g, gr_pri_bes_zrop_status2_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_becs_be_exception_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n",
+                gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
+        gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
+                gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
+        return 0;
+}
+int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
+                                       bool enable)
+{
+        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        struct nvgpu_mem *mem;
+        u32 v;
+        gk20a_dbg_fn("");
+        if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
+                return -EINVAL;
+        mem = &ch_ctx->gr_ctx->mem;
+        if (nvgpu_mem_begin(c->g, mem))
+                return -ENOMEM;
+        v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
+        v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
+        v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
+        nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
+        nvgpu_mem_end(c->g, mem);
+        gk20a_dbg_fn("done");
+        return 0;
+}
+u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
+{
+        u32 fbp_en_mask, opt_fbio;
+        u32 tmp, max_fbps_count;
+        tmp = gk20a_readl(g, top_num_fbps_r());
+        max_fbps_count = top_num_fbps_value_v(tmp);
+        opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
+        fbp_en_mask =
+                ((1 << max_fbps_count) - 1) ^
+                fuse_status_opt_fbio_data_v(opt_fbio);
+        return fbp_en_mask;
+}
+u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
+{
+        u32 ltc_per_fbp, reg;
+        reg = gk20a_readl(g,  top_ltc_per_fbp_r());
+        ltc_per_fbp = top_ltc_per_fbp_value_v(reg);
+        return ltc_per_fbp;
+}
+u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
+{
+        u32 lts_per_ltc, reg;
+        reg = gk20a_readl(g,  top_slices_per_ltc_r());
+        lts_per_ltc = top_slices_per_ltc_value_v(reg);
+        return lts_per_ltc;
+}
+u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
+{
+        struct gr_gk20a *gr = &g->gr;
+        u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
+        u32 rop_l2_all_en;
+        tmp = gk20a_readl(g, top_num_fbps_r());
+        max_fbps_count = top_num_fbps_value_v(tmp);
+        max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g);
+        rop_l2_all_en = (1 << max_ltc_per_fbp) - 1;
+        /* mask of Rop_L2 for each FBP */
+        for (i = 0; i < max_fbps_count; i++) {
+                tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i));
+                gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
+        }
+        return gr->fbp_rop_l2_en_mask;
+}
+u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
+{
+        u32 tmp, max_fbps_count;
+        tmp = gk20a_readl(g, top_num_fbps_r());
+        max_fbps_count = top_num_fbps_value_v(tmp);
+        return max_fbps_count;
+}
+void gr_gm20b_init_cyclestats(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, true);
+        __nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, true);
+        g->gr.max_css_buffer_size = 0xffffffff;
+#else
+        (void)g;
+#endif
+}
+void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        u32 cde_v;
+        cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
+        cde_v |=  ctxsw_prog_main_image_ctl_cde_enabled_f();
+        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
+}
+void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
+{
+        /* Check if we have at least one valid warp */
+        /* get paused state on maxwell */
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc, tpc, sm_id;
+        u32  tpc_offset, gpc_offset, reg_offset;
+        u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+        /* for maxwell & kepler */
+        u32 numSmPerTpc = 1;
+        u32 numWarpPerTpc = g->params.sm_arch_warp_count * numSmPerTpc;
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                tpc_offset = tpc_in_gpc_stride * tpc;
+                gpc_offset = gpc_stride * gpc;
+                reg_offset = tpc_offset + gpc_offset;
+                /* 64 bit read */
+                warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
+                warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
+                /* 64 bit read */
+                warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
+                warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
+                /* 64 bit read */
+                warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
+                warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
+                w_state[sm_id].valid_warps[0] = warps_valid;
+                w_state[sm_id].trapped_warps[0] = warps_trapped;
+                w_state[sm_id].paused_warps[0] = warps_paused;
+                if (numWarpPerTpc > 64) {
+                        /* 64 bit read */
+                        warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32;
+                        warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset);
+                        /* 64 bit read */
+                        warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32;
+                        warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset);
+                        /* 64 bit read */
+                        warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32;
+                        warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset);
+                        w_state[sm_id].valid_warps[1] = warps_valid;
+                        w_state[sm_id].trapped_warps[1] = warps_trapped;
+                        w_state[sm_id].paused_warps[1] = warps_paused;
+                }
+        }
+        /* Only for debug purpose */
+        for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
+                gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
+                                                sm_id, w_state[sm_id].valid_warps[0]);
+                gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n",
+                                                sm_id, w_state[sm_id].valid_warps[1]);
+                gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
+                                                        sm_id, w_state[sm_id].trapped_warps[0]);
+                gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n",
+                                                sm_id, w_state[sm_id].trapped_warps[1]);
+                gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
+                                                sm_id, w_state[sm_id].paused_warps[0]);
+                gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n",
+                                                sm_id, w_state[sm_id].paused_warps[1]);
+        }
+}
+void gr_gm20b_get_access_map(struct gk20a *g,
+                                   u32 **whitelist, int *num_entries)
+{
+        static u32 wl_addr_gm20b[] = {
+                /* this list must be sorted (low to high) */
+                0x404468, /* gr_pri_mme_max_instructions       */
+                0x418300, /* gr_pri_gpcs_rasterarb_line_class  */
+                0x418800, /* gr_pri_gpcs_setup_debug           */
+                0x418e00, /* gr_pri_gpcs_swdx_config           */
+                0x418e40, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+                0x418e44, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+                0x418e48, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+                0x418e4c, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+                0x418e50, /* gr_pri_gpcs_swdx_tc_bundle_ctrl   */
+                0x418e58, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e5c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e60, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e64, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e68, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e6c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e70, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e74, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e78, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e7c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e80, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e84, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e88, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e8c, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e90, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x418e94, /* gr_pri_gpcs_swdx_tc_bundle_addr   */
+                0x419864, /* gr_pri_gpcs_tpcs_pe_l2_evict_policy */
+                0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg      */
+                0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg     */
+                0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */
+                0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl     */
+        };
+        *whitelist = wl_addr_gm20b;
+        *num_entries = ARRAY_SIZE(wl_addr_gm20b);
+}
+int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
+{
+        int sm_id;
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
+                                               GPU_LIT_TPC_IN_GPC_STRIDE);
+        u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_cfg_r() + offset));
+        gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
+        gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
+                       gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
+        gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
+                        gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        return 0;
+}
+int gm20b_gr_update_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id,
+                struct nvgpu_gr_sm_error_state *sm_error_state)
+{
+        u32 gpc, tpc, offset;
+        struct gr_gk20a *gr = &g->gr;
+        struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
+                                               GPU_LIT_TPC_IN_GPC_STRIDE);
+        int err = 0;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        gr->sm_error_states[sm_id].hww_global_esr =
+                        sm_error_state->hww_global_esr;
+        gr->sm_error_states[sm_id].hww_warp_esr =
+                        sm_error_state->hww_warp_esr;
+        gr->sm_error_states[sm_id].hww_warp_esr_pc =
+                        sm_error_state->hww_warp_esr_pc;
+        gr->sm_error_states[sm_id].hww_global_esr_report_mask =
+                        sm_error_state->hww_global_esr_report_mask;
+        gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
+                        sm_error_state->hww_warp_esr_report_mask;
+        err = gr_gk20a_disable_ctxsw(g);
+        if (err) {
+                nvgpu_err(g, "unable to stop gr ctxsw");
+                goto fail;
+        }
+        gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+        tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+        offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+        if (gk20a_is_channel_ctx_resident(ch)) {
+                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                                gr->sm_error_states[sm_id].hww_global_esr);
+                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
+                                gr->sm_error_states[sm_id].hww_warp_esr);
+                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
+                                gr->sm_error_states[sm_id].hww_warp_esr_pc);
+                gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
+                                gr->sm_error_states[sm_id].hww_global_esr_report_mask);
+                gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
+                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
+        } else {
+                err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
+                if (err)
+                        goto enable_ctxsw;
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
+                                gr->sm_error_states[sm_id].hww_global_esr_report_mask,
+                                true);
+                gr_gk20a_ctx_patch_write(g, ch_ctx,
+                                gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
+                                gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
+                                true);
+                gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
+        }
+enable_ctxsw:
+        err = gr_gk20a_enable_ctxsw(g);
+fail:
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        return err;
+}
+int gm20b_gr_clear_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id)
+{
+        u32 gpc, tpc, offset;
+        u32 val;
+        struct gr_gk20a *gr = &g->gr;
+        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
+        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
+                                               GPU_LIT_TPC_IN_GPC_STRIDE);
+        int err = 0;
+        nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+        memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
+        err = gr_gk20a_disable_ctxsw(g);
+        if (err) {
+                nvgpu_err(g, "unable to stop gr ctxsw");
+                goto fail;
+        }
+        if (gk20a_is_channel_ctx_resident(ch)) {
+                gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
+                tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
+                offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
+                val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
+                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                                val);
+                gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
+                                0);
+        }
+        err = gr_gk20a_enable_ctxsw(g);
+fail:
+        nvgpu_mutex_release(&g->dbg_sessions_lock);
+        return err;
+}
+int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
+                struct nvgpu_preemption_modes_rec *preemption_modes_rec)
+{
+        preemption_modes_rec->graphics_preemption_mode_flags =
+                        NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
+        preemption_modes_rec->compute_preemption_mode_flags = (
+                        NVGPU_PREEMPTION_MODE_COMPUTE_WFI |
+                        NVGPU_PREEMPTION_MODE_COMPUTE_CTA);
+        preemption_modes_rec->default_graphics_preempt_mode =
+                        NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
+        preemption_modes_rec->default_compute_preempt_mode =
+                        NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
+        return 0;
+}
+bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
+{
+        u32 ltc_shared_base = ltc_ltcs_ltss_v();
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        return (addr >= ltc_shared_base) &&
+                (addr < (ltc_shared_base + lts_stride));
+}
+bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
+{
+        u32 lts_shared_base = ltc_ltc0_ltss_v();
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1;
+        u32 base_offset = lts_shared_base & addr_mask;
+        u32 end_offset = base_offset + lts_stride;
+        return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) &&
+                ((addr & addr_mask) >= base_offset) &&
+                ((addr & addr_mask) < end_offset);
+}
+static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
+                                        u32 *priv_addr_table,
+                                        u32 *priv_addr_table_index)
+{
+        u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
+        u32 index = *priv_addr_table_index;
+        u32 lts_num;
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        for (lts_num = 0; lts_num < num_ltc_slices; lts_num++)
+                priv_addr_table[index++] = ltc_ltc0_lts0_v() +
+                                                ltc_num * ltc_stride +
+                                                lts_num * lts_stride +
+                                                (addr & (lts_stride - 1));
+        *priv_addr_table_index = index;
+}
+void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
+                                        u32 *priv_addr_table,
+                                        u32 *priv_addr_table_index)
+{
+        u32 num_ltc = g->ltc_count;
+        u32 i, start, ltc_num = 0;
+        u32 pltcg_base = ltc_pltcg_base_v();
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        for (i = 0; i < num_ltc; i++) {
+                start = pltcg_base + i * ltc_stride;
+                if ((addr >= start) && (addr < (start + ltc_stride))) {
+                        ltc_num = i;
+                        break;
+                }
+        }
+        gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
+                                priv_addr_table_index);
+}
+void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
+                                        u32 *priv_addr_table,
+                                        u32 *priv_addr_table_index)
+{
+        u32 num_ltc = g->ltc_count;
+        u32 ltc_num;
+        for (ltc_num = 0; ltc_num < num_ltc; ltc_num++)
+                gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num,
+                                        priv_addr_table, priv_addr_table_index);
+}
+void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+                        u32 global_esr)
+{
+        u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
+        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
+                        global_esr);
+        /* clear the warp hww */
+        gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, 0);
+}
+/*
+ * Disable both surface and LG coalesce.
+ */
+void gm20a_gr_disable_rd_coalesce(struct gk20a *g)
+{
+        u32 dbg2_reg;
+        dbg2_reg = gk20a_readl(g, gr_gpcs_tpcs_tex_m_dbg2_r());
+        dbg2_reg = set_field(dbg2_reg,
+                             gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_m(),
+                             gr_gpcs_tpcs_tex_m_dbg2_lg_rd_coalesce_en_f(0));
+        dbg2_reg = set_field(dbg2_reg,
+                             gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(),
+                             gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_f(0));
+        gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg);
+}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
new file mode 100644
index 00000000..18e6b032
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -0,0 +1,137 @@
+/*
+ * GM20B GPC MMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_GM20B_GR_MMU_H
+#define _NVHOST_GM20B_GR_MMU_H
+struct gk20a;
+struct nvgpu_warpstate;
+enum {
+        MAXWELL_B               = 0xB197,
+        MAXWELL_COMPUTE_B       = 0xB1C0,
+        KEPLER_INLINE_TO_MEMORY_B= 0xA140,
+        MAXWELL_DMA_COPY_A      = 0xB0B5,
+        MAXWELL_CHANNEL_GPFIFO_A= 0xB06F,
+};
+#define NVB197_SET_ALPHA_CIRCULAR_BUFFER_SIZE   0x02dc
+#define NVB197_SET_CIRCULAR_BUFFER_SIZE         0x1280
+#define NVB197_SET_SHADER_EXCEPTIONS            0x1528
+#define NVB197_SET_RD_COALESCE                  0x102c
+#define NVB1C0_SET_SHADER_EXCEPTIONS            0x1528
+#define NVB1C0_SET_RD_COALESCE                  0x0228
+#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
+void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
+                                      struct channel_ctx_gk20a *ch_ctx,
+                                      u64 addr, bool patch);
+int gr_gm20b_init_fs_state(struct gk20a *g);
+int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
+void gr_gm20b_set_rd_coalesce(struct gk20a *g, u32 data);
+void gm20a_gr_disable_rd_coalesce(struct gk20a *g);
+void gr_gm20b_init_gpc_mmu(struct gk20a *g);
+void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
+void gr_gm20b_cb_size_default(struct gk20a *g);
+int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
+void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
+                                            struct channel_ctx_gk20a *ch_ctx,
+                                            u64 addr, u64 size, bool patch);
+int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
+                        struct channel_gk20a *c, bool patch);
+void gr_gm20b_commit_global_pagepool(struct gk20a *g,
+                                            struct channel_ctx_gk20a *ch_ctx,
+                                            u64 addr, u32 size, bool patch);
+int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
+                                          u32 class_num, u32 offset, u32 data);
+void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
+void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data);
+void gr_gm20b_set_hww_esr_report_mask(struct gk20a *g);
+bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num);
+bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num);
+bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num);
+void gr_gm20b_init_sm_dsm_reg_info(void);
+void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g,
+                                          u32 *num_sm_dsm_perf_regs,
+                                          u32 **sm_dsm_perf_regs,
+                                          u32 *perf_register_stride);
+void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
+                                               u32 *num_sm_dsm_perf_ctrl_regs,
+                                               u32 **sm_dsm_perf_ctrl_regs,
+                                               u32 *ctrl_register_stride);
+u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
+void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
+void gr_gm20b_load_tpc_mask(struct gk20a *g);
+void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
+                                             u32 gpc, u32 tpc, u32 smid);
+int gr_gm20b_load_smid_config(struct gk20a *g);
+int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
+        struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
+bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr);
+u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr);
+int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
+int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
+void gr_gm20b_detect_sm_arch(struct gk20a *g);
+u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
+int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
+                          struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm,
+                          u32 class,
+                          u32 flags);
+void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
+                struct channel_ctx_gk20a *ch_ctx,
+                struct nvgpu_mem *mem);
+int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
+                           struct gk20a_debug_output *o);
+int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
+                                       bool enable);
+u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g);
+u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g);
+u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g);
+u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
+u32 gr_gm20b_get_max_fbps_count(struct gk20a *g);
+void gr_gm20b_init_cyclestats(struct gk20a *g);
+void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem);
+void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
+void gr_gm20b_get_access_map(struct gk20a *g,
+                                   u32 **whitelist, int *num_entries);
+int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc);
+int gm20b_gr_update_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id,
+                struct nvgpu_gr_sm_error_state *sm_error_state);
+int gm20b_gr_clear_sm_error_state(struct gk20a *g,
+                struct channel_gk20a *ch, u32 sm_id);
+int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
+                struct nvgpu_preemption_modes_rec *preemption_modes_rec);
+bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr);
+bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr);
+void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
+                                        u32 *priv_addr_table,
+                                        u32 *priv_addr_table_index);
+void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
+                                        u32 *priv_addr_table,
+                                        u32 *priv_addr_table_index);
+void gm20b_gr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
+                        u32 global_esr);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
new file mode 100644
index 00000000..227b6b6c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -0,0 +1,708 @@
+/*
+ * GM20B Graphics
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/ce2_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/fb_gk20a.h"
+#include "gk20a/fifo_gk20a.h"
+#include "gk20a/therm_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
+#include "gk20a/mc_gk20a.h"
+#include "gk20a/bus_gk20a.h"
+#include "gk20a/flcn_gk20a.h"
+#include "gk20a/priv_ring_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+#include "ltc_gm20b.h"
+#include "gr_gm20b.h"
+#include "ltc_gm20b.h"
+#include "fb_gm20b.h"
+#include "gm20b_gating_reglist.h"
+#include "fifo_gm20b.h"
+#include "gr_ctx_gm20b.h"
+#include "mm_gm20b.h"
+#include "pmu_gm20b.h"
+#include "clk_gm20b.h"
+#include "regops_gm20b.h"
+#include "therm_gm20b.h"
+#include "bus_gm20b.h"
+#include "hal_gm20b.h"
+#include "acr_gm20b.h"
+#include <nvgpu/debug.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/bus.h>
+#include <nvgpu/hw/gm20b/hw_proj_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
+#define PRIV_SECURITY_DISABLE 0x01
+int gm20b_get_litter_value(struct gk20a *g, int value)
+{
+        int ret = EINVAL;
+        switch (value) {
+        case GPU_LIT_NUM_GPCS:
+                ret = proj_scal_litter_num_gpcs_v();
+                break;
+        case GPU_LIT_NUM_PES_PER_GPC:
+                ret = proj_scal_litter_num_pes_per_gpc_v();
+                break;
+        case GPU_LIT_NUM_ZCULL_BANKS:
+                ret = proj_scal_litter_num_zcull_banks_v();
+                break;
+        case GPU_LIT_NUM_TPC_PER_GPC:
+                ret = proj_scal_litter_num_tpc_per_gpc_v();
+                break;
+        case GPU_LIT_NUM_SM_PER_TPC:
+                ret = proj_scal_litter_num_sm_per_tpc_v();
+                break;
+        case GPU_LIT_NUM_FBPS:
+                ret = proj_scal_litter_num_fbps_v();
+                break;
+        case GPU_LIT_GPC_BASE:
+                ret = proj_gpc_base_v();
+                break;
+        case GPU_LIT_GPC_STRIDE:
+                ret = proj_gpc_stride_v();
+                break;
+        case GPU_LIT_GPC_SHARED_BASE:
+                ret = proj_gpc_shared_base_v();
+                break;
+        case GPU_LIT_TPC_IN_GPC_BASE:
+                ret = proj_tpc_in_gpc_base_v();
+                break;
+        case GPU_LIT_TPC_IN_GPC_STRIDE:
+                ret = proj_tpc_in_gpc_stride_v();
+                break;
+        case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
+                ret = proj_tpc_in_gpc_shared_base_v();
+                break;
+        case GPU_LIT_PPC_IN_GPC_BASE:
+                ret = proj_ppc_in_gpc_base_v();
+                break;
+        case GPU_LIT_PPC_IN_GPC_STRIDE:
+                ret = proj_ppc_in_gpc_stride_v();
+                break;
+        case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
+                ret = proj_ppc_in_gpc_shared_base_v();
+                break;
+        case GPU_LIT_ROP_BASE:
+                ret = proj_rop_base_v();
+                break;
+        case GPU_LIT_ROP_STRIDE:
+                ret = proj_rop_stride_v();
+                break;
+        case GPU_LIT_ROP_SHARED_BASE:
+                ret = proj_rop_shared_base_v();
+                break;
+        case GPU_LIT_HOST_NUM_ENGINES:
+                ret = proj_host_num_engines_v();
+                break;
+        case GPU_LIT_HOST_NUM_PBDMA:
+                ret = proj_host_num_pbdma_v();
+                break;
+        case GPU_LIT_LTC_STRIDE:
+                ret = proj_ltc_stride_v();
+                break;
+        case GPU_LIT_LTS_STRIDE:
+                ret = proj_lts_stride_v();
+                break;
+        /* Even though GM20B doesn't have an FBPA unit, the HW reports one,
+         * and the microcode as a result leaves space in the context buffer
+         * for one, so make sure SW accounts for this also.
+         */
+        case GPU_LIT_NUM_FBPAS:
+                ret = proj_scal_litter_num_fbpas_v();
+                break;
+        /* Hardcode FBPA values other than NUM_FBPAS to 0. */
+        case GPU_LIT_FBPA_STRIDE:
+        case GPU_LIT_FBPA_BASE:
+        case GPU_LIT_FBPA_SHARED_BASE:
+                ret = 0;
+                break;
+        case GPU_LIT_TWOD_CLASS:
+                ret = FERMI_TWOD_A;
+                break;
+        case GPU_LIT_THREED_CLASS:
+                ret = MAXWELL_B;
+                break;
+        case GPU_LIT_COMPUTE_CLASS:
+                ret = MAXWELL_COMPUTE_B;
+                break;
+        case GPU_LIT_GPFIFO_CLASS:
+                ret = MAXWELL_CHANNEL_GPFIFO_A;
+                break;
+        case GPU_LIT_I2M_CLASS:
+                ret = KEPLER_INLINE_TO_MEMORY_B;
+                break;
+        case GPU_LIT_DMA_COPY_CLASS:
+                ret = MAXWELL_DMA_COPY_A;
+                break;
+        default:
+                nvgpu_err(g, "Missing definition %d", value);
+                BUG();
+                break;
+        }
+        return ret;
+}
+static const struct gpu_ops gm20b_ops = {
+        .ltc = {
+                .determine_L2_size_bytes = gm20b_determine_L2_size_bytes,
+                .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry,
+                .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry,
+                .init_cbc = gm20b_ltc_init_cbc,
+                .init_fs_state = gm20b_ltc_init_fs_state,
+                .init_comptags = gm20b_ltc_init_comptags,
+                .cbc_ctrl = gm20b_ltc_cbc_ctrl,
+                .isr = gm20b_ltc_isr,
+                .cbc_fix_config = gm20b_ltc_cbc_fix_config,
+                .flush = gm20b_flush_ltc,
+                .set_enabled = gm20b_ltc_set_enabled,
+        },
+        .ce2 = {
+                .isr_stall = gk20a_ce2_isr,
+                .isr_nonstall = gk20a_ce2_nonstall_isr,
+        },
+        .gr = {
+                .get_patch_slots = gr_gk20a_get_patch_slots,
+                .init_gpc_mmu = gr_gm20b_init_gpc_mmu,
+                .bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
+                .cb_size_default = gr_gm20b_cb_size_default,
+                .calc_global_ctx_buffer_size =
+                        gr_gm20b_calc_global_ctx_buffer_size,
+                .commit_global_attrib_cb = gr_gm20b_commit_global_attrib_cb,
+                .commit_global_bundle_cb = gr_gm20b_commit_global_bundle_cb,
+                .commit_global_cb_manager = gr_gm20b_commit_global_cb_manager,
+                .commit_global_pagepool = gr_gm20b_commit_global_pagepool,
+                .handle_sw_method = gr_gm20b_handle_sw_method,
+                .set_alpha_circular_buffer_size =
+                        gr_gm20b_set_alpha_circular_buffer_size,
+                .set_circular_buffer_size = gr_gm20b_set_circular_buffer_size,
+                .enable_hww_exceptions = gr_gk20a_enable_hww_exceptions,
+                .is_valid_class = gr_gm20b_is_valid_class,
+                .is_valid_gfx_class = gr_gm20b_is_valid_gfx_class,
+                .is_valid_compute_class = gr_gm20b_is_valid_compute_class,
+                .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs,
+                .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs,
+                .init_fs_state = gr_gm20b_init_fs_state,
+                .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask,
+                .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
+                .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
+                .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
+                .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
+                .free_channel_ctx = gk20a_free_channel_ctx,
+                .alloc_obj_ctx = gk20a_alloc_obj_ctx,
+                .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
+                .get_zcull_info = gr_gk20a_get_zcull_info,
+                .is_tpc_addr = gr_gm20b_is_tpc_addr,
+                .get_tpc_num = gr_gm20b_get_tpc_num,
+                .detect_sm_arch = gr_gm20b_detect_sm_arch,
+                .add_zbc_color = gr_gk20a_add_zbc_color,
+                .add_zbc_depth = gr_gk20a_add_zbc_depth,
+                .zbc_set_table = gk20a_gr_zbc_set_table,
+                .zbc_query_table = gr_gk20a_query_zbc,
+                .pmu_save_zbc = gk20a_pmu_save_zbc,
+                .add_zbc = gr_gk20a_add_zbc,
+                .pagepool_default_size = gr_gm20b_pagepool_default_size,
+                .init_ctx_state = gr_gk20a_init_ctx_state,
+                .alloc_gr_ctx = gr_gm20b_alloc_gr_ctx,
+                .free_gr_ctx = gr_gk20a_free_gr_ctx,
+                .update_ctxsw_preemption_mode =
+                        gr_gm20b_update_ctxsw_preemption_mode,
+                .dump_gr_regs = gr_gm20b_dump_gr_status_regs,
+                .update_pc_sampling = gr_gm20b_update_pc_sampling,
+                .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
+                .get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp,
+                .get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc,
+                .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
+                .get_max_fbps_count = gr_gm20b_get_max_fbps_count,
+                .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
+                .wait_empty = gr_gk20a_wait_idle,
+                .init_cyclestats = gr_gm20b_init_cyclestats,
+                .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
+                .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
+                .bpt_reg_info = gr_gm20b_bpt_reg_info,
+                .get_access_map = gr_gm20b_get_access_map,
+                .handle_fecs_error = gk20a_gr_handle_fecs_error,
+                .handle_sm_exception = gr_gk20a_handle_sm_exception,
+                .handle_tex_exception = gr_gk20a_handle_tex_exception,
+                .enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions,
+                .enable_exceptions = gk20a_gr_enable_exceptions,
+                .get_lrf_tex_ltc_dram_override = NULL,
+                .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
+                .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
+                .record_sm_error_state = gm20b_gr_record_sm_error_state,
+                .update_sm_error_state = gm20b_gr_update_sm_error_state,
+                .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
+                .suspend_contexts = gr_gk20a_suspend_contexts,
+                .resume_contexts = gr_gk20a_resume_contexts,
+                .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
+                .init_sm_id_table = gr_gk20a_init_sm_id_table,
+                .load_smid_config = gr_gm20b_load_smid_config,
+                .program_sm_id_numbering = gr_gm20b_program_sm_id_numbering,
+                .is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr,
+                .is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr,
+                .split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr,
+                .split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr,
+                .setup_rop_mapping = gr_gk20a_setup_rop_mapping,
+                .program_zcull_mapping = gr_gk20a_program_zcull_mapping,
+                .commit_global_timeslice = gr_gk20a_commit_global_timeslice,
+                .commit_inst = gr_gk20a_commit_inst,
+                .write_zcull_ptr = gr_gk20a_write_zcull_ptr,
+                .write_pm_ptr = gr_gk20a_write_pm_ptr,
+                .init_elcg_mode = gr_gk20a_init_elcg_mode,
+                .load_tpc_mask = gr_gm20b_load_tpc_mask,
+                .inval_icache = gr_gk20a_inval_icache,
+                .trigger_suspend = gr_gk20a_trigger_suspend,
+                .wait_for_pause = gr_gk20a_wait_for_pause,
+                .resume_from_pause = gr_gk20a_resume_from_pause,
+                .clear_sm_errors = gr_gk20a_clear_sm_errors,
+                .tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions,
+                .get_esr_sm_sel = gk20a_gr_get_esr_sm_sel,
+                .sm_debugger_attached = gk20a_gr_sm_debugger_attached,
+                .suspend_single_sm = gk20a_gr_suspend_single_sm,
+                .suspend_all_sms = gk20a_gr_suspend_all_sms,
+                .resume_single_sm = gk20a_gr_resume_single_sm,
+                .resume_all_sms = gk20a_gr_resume_all_sms,
+                .get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr,
+                .get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr,
+                .get_sm_no_lock_down_hww_global_esr_mask =
+                        gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask,
+                .lock_down_sm = gk20a_gr_lock_down_sm,
+                .wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down,
+                .clear_sm_hww = gm20b_gr_clear_sm_hww,
+                .init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
+                .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
+                .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+                .init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
+        },
+        .fb = {
+                .reset = fb_gk20a_reset,
+                .init_hw = gk20a_fb_init_hw,
+                .init_fs_state = fb_gm20b_init_fs_state,
+                .set_mmu_page_size = gm20b_fb_set_mmu_page_size,
+                .set_use_full_comp_tag_line =
+                        gm20b_fb_set_use_full_comp_tag_line,
+                .compression_page_size = gm20b_fb_compression_page_size,
+                .compressible_page_size = gm20b_fb_compressible_page_size,
+                .vpr_info_fetch = gm20b_fb_vpr_info_fetch,
+                .dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info,
+                .read_wpr_info = gm20b_fb_read_wpr_info,
+                .is_debug_mode_enabled = gm20b_fb_debug_mode_enabled,
+                .set_debug_mode = gm20b_fb_set_debug_mode,
+                .tlb_invalidate = gk20a_fb_tlb_invalidate,
+                .mem_unlock = NULL,
+        },
+        .clock_gating = {
+                .slcg_bus_load_gating_prod =
+                        gm20b_slcg_bus_load_gating_prod,
+                .slcg_ce2_load_gating_prod =
+                        gm20b_slcg_ce2_load_gating_prod,
+                .slcg_chiplet_load_gating_prod =
+                        gm20b_slcg_chiplet_load_gating_prod,
+                .slcg_ctxsw_firmware_load_gating_prod =
+                        gm20b_slcg_ctxsw_firmware_load_gating_prod,
+                .slcg_fb_load_gating_prod =
+                        gm20b_slcg_fb_load_gating_prod,
+                .slcg_fifo_load_gating_prod =
+                        gm20b_slcg_fifo_load_gating_prod,
+                .slcg_gr_load_gating_prod =
+                        gr_gm20b_slcg_gr_load_gating_prod,
+                .slcg_ltc_load_gating_prod =
+                        ltc_gm20b_slcg_ltc_load_gating_prod,
+                .slcg_perf_load_gating_prod =
+                        gm20b_slcg_perf_load_gating_prod,
+                .slcg_priring_load_gating_prod =
+                        gm20b_slcg_priring_load_gating_prod,
+                .slcg_pmu_load_gating_prod =
+                        gm20b_slcg_pmu_load_gating_prod,
+                .slcg_therm_load_gating_prod =
+                        gm20b_slcg_therm_load_gating_prod,
+                .slcg_xbar_load_gating_prod =
+                        gm20b_slcg_xbar_load_gating_prod,
+                .blcg_bus_load_gating_prod =
+                        gm20b_blcg_bus_load_gating_prod,
+                .blcg_ctxsw_firmware_load_gating_prod =
+                        gm20b_blcg_ctxsw_firmware_load_gating_prod,
+                .blcg_fb_load_gating_prod =
+                        gm20b_blcg_fb_load_gating_prod,
+                .blcg_fifo_load_gating_prod =
+                        gm20b_blcg_fifo_load_gating_prod,
+                .blcg_gr_load_gating_prod =
+                        gm20b_blcg_gr_load_gating_prod,
+                .blcg_ltc_load_gating_prod =
+                        gm20b_blcg_ltc_load_gating_prod,
+                .blcg_pwr_csb_load_gating_prod =
+                        gm20b_blcg_pwr_csb_load_gating_prod,
+                .blcg_xbar_load_gating_prod =
+                        gm20b_blcg_xbar_load_gating_prod,
+                .blcg_pmu_load_gating_prod =
+                        gm20b_blcg_pmu_load_gating_prod,
+                .pg_gr_load_gating_prod =
+                        gr_gm20b_pg_gr_load_gating_prod,
+        },
+        .fifo = {
+                .init_fifo_setup_hw = gk20a_init_fifo_setup_hw,
+                .bind_channel = channel_gm20b_bind,
+                .unbind_channel = gk20a_fifo_channel_unbind,
+                .disable_channel = gk20a_fifo_disable_channel,
+                .enable_channel = gk20a_fifo_enable_channel,
+                .alloc_inst = gk20a_fifo_alloc_inst,
+                .free_inst = gk20a_fifo_free_inst,
+                .setup_ramfc = gk20a_fifo_setup_ramfc,
+                .channel_set_timeslice = gk20a_fifo_set_timeslice,
+                .default_timeslice_us = gk20a_fifo_default_timeslice_us,
+                .setup_userd = gk20a_fifo_setup_userd,
+                .userd_gp_get = gk20a_fifo_userd_gp_get,
+                .userd_gp_put = gk20a_fifo_userd_gp_put,
+                .userd_pb_get = gk20a_fifo_userd_pb_get,
+                .pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
+                .preempt_channel = gk20a_fifo_preempt_channel,
+                .preempt_tsg = gk20a_fifo_preempt_tsg,
+                .enable_tsg = gk20a_enable_tsg,
+                .disable_tsg = gk20a_disable_tsg,
+                .tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
+                .tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
+                .update_runlist = gk20a_fifo_update_runlist,
+                .trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault,
+                .get_mmu_fault_info = gk20a_fifo_get_mmu_fault_info,
+                .wait_engine_idle = gk20a_fifo_wait_engine_idle,
+                .get_num_fifos = gm20b_fifo_get_num_fifos,
+                .get_pbdma_signature = gk20a_fifo_get_pbdma_signature,
+                .set_runlist_interleave = gk20a_fifo_set_runlist_interleave,
+                .tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
+                .force_reset_ch = gk20a_fifo_force_reset_ch,
+                .engine_enum_from_type = gk20a_fifo_engine_enum_from_type,
+                .device_info_data_parse = gm20b_device_info_data_parse,
+                .eng_runlist_base_size = fifo_eng_runlist_base__size_1_v,
+                .init_engine_info = gk20a_fifo_init_engine_info,
+                .runlist_entry_size = ram_rl_entry_size_v,
+                .get_tsg_runlist_entry = gk20a_get_tsg_runlist_entry,
+                .get_ch_runlist_entry = gk20a_get_ch_runlist_entry,
+                .is_fault_engine_subid_gpc = gk20a_is_fault_engine_subid_gpc,
+                .dump_pbdma_status = gk20a_dump_pbdma_status,
+                .dump_eng_status = gk20a_dump_eng_status,
+                .dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
+                .intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
+                .is_preempt_pending = gk20a_fifo_is_preempt_pending,
+                .init_pbdma_intr_descs = gm20b_fifo_init_pbdma_intr_descs,
+                .reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
+                .teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg,
+                .handle_sched_error = gk20a_fifo_handle_sched_error,
+                .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
+                .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
+                .tsg_bind_channel = gk20a_tsg_bind_channel,
+                .tsg_unbind_channel = gk20a_tsg_unbind_channel,
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+                .alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf,
+                .free_syncpt_buf = gk20a_fifo_free_syncpt_buf,
+                .add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd,
+                .get_syncpt_wait_cmd_size = gk20a_fifo_get_syncpt_wait_cmd_size,
+                .add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd,
+                .get_syncpt_incr_cmd_size = gk20a_fifo_get_syncpt_incr_cmd_size,
+#endif
+        },
+        .gr_ctx = {
+                .get_netlist_name = gr_gm20b_get_netlist_name,
+                .is_fw_defined = gr_gm20b_is_firmware_defined,
+        },
+        .mm = {
+                .support_sparse = gm20b_mm_support_sparse,
+                .gmmu_map = gk20a_locked_gmmu_map,
+                .gmmu_unmap = gk20a_locked_gmmu_unmap,
+                .vm_bind_channel = gk20a_vm_bind_channel,
+                .fb_flush = gk20a_mm_fb_flush,
+                .l2_invalidate = gk20a_mm_l2_invalidate,
+                .l2_flush = gk20a_mm_l2_flush,
+                .cbc_clean = gk20a_mm_cbc_clean,
+                .set_big_page_size = gm20b_mm_set_big_page_size,
+                .get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+                .get_default_big_page_size = gm20b_mm_get_default_big_page_size,
+                .gpu_phys_addr = gm20b_gpu_phys_addr,
+                .get_iommu_bit = gk20a_mm_get_iommu_bit,
+                .get_mmu_levels = gk20a_mm_get_mmu_levels,
+                .init_pdb = gk20a_mm_init_pdb,
+                .init_mm_setup_hw = gk20a_init_mm_setup_hw,
+                .is_bar1_supported = gm20b_mm_is_bar1_supported,
+                .alloc_inst_block = gk20a_alloc_inst_block,
+                .init_inst_block = gk20a_init_inst_block,
+                .mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
+                .get_kind_invalid = gm20b_get_kind_invalid,
+                .get_kind_pitch = gm20b_get_kind_pitch,
+        },
+        .therm = {
+                .init_therm_setup_hw = gm20b_init_therm_setup_hw,
+                .elcg_init_idle_filters = gk20a_elcg_init_idle_filters,
+        },
+        .pmu = {
+                .pmu_setup_elpg = gm20b_pmu_setup_elpg,
+                .pmu_get_queue_head = pwr_pmu_queue_head_r,
+                .pmu_get_queue_head_size = pwr_pmu_queue_head__size_1_v,
+                .pmu_get_queue_tail = pwr_pmu_queue_tail_r,
+                .pmu_get_queue_tail_size = pwr_pmu_queue_tail__size_1_v,
+                .pmu_queue_head = gk20a_pmu_queue_head,
+                .pmu_queue_tail = gk20a_pmu_queue_tail,
+                .pmu_msgq_tail = gk20a_pmu_msgq_tail,
+                .pmu_mutex_size = pwr_pmu_mutex__size_1_v,
+                .pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
+                .pmu_mutex_release = gk20a_pmu_mutex_release,
+                .write_dmatrfbase = gm20b_write_dmatrfbase,
+                .pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
+                .pmu_pg_init_param = NULL,
+                .pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
+                .pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
+                .pmu_is_lpwr_feature_supported = NULL,
+                .pmu_lpwr_enable_pg = NULL,
+                .pmu_lpwr_disable_pg = NULL,
+                .pmu_pg_param_post_init = NULL,
+                .dump_secure_fuses = pmu_dump_security_fuses_gm20b,
+                .reset_engine = gk20a_pmu_engine_reset,
+                .is_engine_in_reset = gk20a_pmu_is_engine_in_reset,
+        },
+        .clk = {
+                .init_clk_support = gm20b_init_clk_support,
+                .suspend_clk_support = gm20b_suspend_clk_support,
+#ifdef CONFIG_DEBUG_FS
+                .init_debugfs = gm20b_clk_init_debugfs,
+#endif
+                .get_voltage = gm20b_clk_get_voltage,
+                .get_gpcclk_clock_counter = gm20b_clk_get_gpcclk_clock_counter,
+                .pll_reg_write = gm20b_clk_pll_reg_write,
+                .get_pll_debug_data = gm20b_clk_get_pll_debug_data,
+        },
+        .regops = {
+                .get_global_whitelist_ranges =
+                        gm20b_get_global_whitelist_ranges,
+                .get_global_whitelist_ranges_count =
+                        gm20b_get_global_whitelist_ranges_count,
+                .get_context_whitelist_ranges =
+                        gm20b_get_context_whitelist_ranges,
+                .get_context_whitelist_ranges_count =
+                        gm20b_get_context_whitelist_ranges_count,
+                .get_runcontrol_whitelist = gm20b_get_runcontrol_whitelist,
+                .get_runcontrol_whitelist_count =
+                        gm20b_get_runcontrol_whitelist_count,
+                .get_runcontrol_whitelist_ranges =
+                        gm20b_get_runcontrol_whitelist_ranges,
+                .get_runcontrol_whitelist_ranges_count =
+                        gm20b_get_runcontrol_whitelist_ranges_count,
+                .get_qctl_whitelist = gm20b_get_qctl_whitelist,
+                .get_qctl_whitelist_count = gm20b_get_qctl_whitelist_count,
+                .get_qctl_whitelist_ranges = gm20b_get_qctl_whitelist_ranges,
+                .get_qctl_whitelist_ranges_count =
+                        gm20b_get_qctl_whitelist_ranges_count,
+                .apply_smpc_war = gm20b_apply_smpc_war,
+        },
+        .mc = {
+                .intr_enable = mc_gk20a_intr_enable,
+                .intr_unit_config = mc_gk20a_intr_unit_config,
+                .isr_stall = mc_gk20a_isr_stall,
+                .intr_stall = mc_gk20a_intr_stall,
+                .intr_stall_pause = mc_gk20a_intr_stall_pause,
+                .intr_stall_resume = mc_gk20a_intr_stall_resume,
+                .intr_nonstall = mc_gk20a_intr_nonstall,
+                .intr_nonstall_pause = mc_gk20a_intr_nonstall_pause,
+                .intr_nonstall_resume = mc_gk20a_intr_nonstall_resume,
+                .enable = gk20a_mc_enable,
+                .disable = gk20a_mc_disable,
+                .reset = gk20a_mc_reset,
+                .boot_0 = gk20a_mc_boot_0,
+                .is_intr1_pending = mc_gk20a_is_intr1_pending,
+        },
+        .debug = {
+                .show_dump = gk20a_debug_show_dump,
+        },
+        .dbg_session_ops = {
+                .exec_reg_ops = exec_regops_gk20a,
+                .dbg_set_powergate = dbg_set_powergate,
+                .check_and_set_global_reservation =
+                        nvgpu_check_and_set_global_reservation,
+                .check_and_set_context_reservation =
+                        nvgpu_check_and_set_context_reservation,
+                .release_profiler_reservation =
+                        nvgpu_release_profiler_reservation,
+                .perfbuffer_enable = gk20a_perfbuf_enable_locked,
+                .perfbuffer_disable = gk20a_perfbuf_disable_locked,
+        },
+        .bus = {
+                .init_hw = gk20a_bus_init_hw,
+                .isr = gk20a_bus_isr,
+                .read_ptimer = gk20a_read_ptimer,
+                .get_timestamps_zipper = nvgpu_get_timestamps_zipper,
+                .bar1_bind = gm20b_bus_bar1_bind,
+        },
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        .css = {
+                .enable_snapshot = css_hw_enable_snapshot,
+                .disable_snapshot = css_hw_disable_snapshot,
+                .check_data_available = css_hw_check_data_available,
+                .set_handled_snapshots = css_hw_set_handled_snapshots,
+                .allocate_perfmon_ids = css_gr_allocate_perfmon_ids,
+                .release_perfmon_ids = css_gr_release_perfmon_ids,
+        },
+#endif
+        .falcon = {
+                .falcon_hal_sw_init = gk20a_falcon_hal_sw_init,
+        },
+        .priv_ring = {
+                .isr = gk20a_priv_ring_isr,
+        },
+        .chip_init_gpu_characteristics = gk20a_init_gpu_characteristics,
+        .get_litter_value = gm20b_get_litter_value,
+};
+int gm20b_init_hal(struct gk20a *g)
+{
+        struct gpu_ops *gops = &g->ops;
+        u32 val;
+        gops->ltc = gm20b_ops.ltc;
+        gops->ce2 = gm20b_ops.ce2;
+        gops->gr = gm20b_ops.gr;
+        gops->fb = gm20b_ops.fb;
+        gops->clock_gating = gm20b_ops.clock_gating;
+        gops->fifo = gm20b_ops.fifo;
+        gops->gr_ctx = gm20b_ops.gr_ctx;
+        gops->mm = gm20b_ops.mm;
+        gops->therm = gm20b_ops.therm;
+        gops->pmu = gm20b_ops.pmu;
+        /*
+         * clk must be assigned member by member
+         * since some clk ops are assigned during probe prior to HAL init
+         */
+        gops->clk.init_clk_support = gm20b_ops.clk.init_clk_support;
+        gops->clk.suspend_clk_support = gm20b_ops.clk.suspend_clk_support;
+        gops->clk.get_voltage = gm20b_ops.clk.get_voltage;
+        gops->clk.get_gpcclk_clock_counter =
+                gm20b_ops.clk.get_gpcclk_clock_counter;
+        gops->clk.pll_reg_write = gm20b_ops.clk.pll_reg_write;
+        gops->clk.get_pll_debug_data = gm20b_ops.clk.get_pll_debug_data;
+        gops->regops = gm20b_ops.regops;
+        gops->mc = gm20b_ops.mc;
+        gops->dbg_session_ops = gm20b_ops.dbg_session_ops;
+        gops->debug = gm20b_ops.debug;
+        gops->bus = gm20b_ops.bus;
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+        gops->css = gm20b_ops.css;
+#endif
+        gops->falcon = gm20b_ops.falcon;
+        gops->priv_ring = gm20b_ops.priv_ring;
+        /* Lone functions */
+        gops->chip_init_gpu_characteristics =
+                gm20b_ops.chip_init_gpu_characteristics;
+        gops->get_litter_value = gm20b_ops.get_litter_value;
+        __nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
+        __nvgpu_set_enabled(g, NVGPU_SEC_SECUREGPCCS, false);
+        __nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
+#ifdef CONFIG_TEGRA_ACR
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (!val) {
+                        gk20a_dbg_info("priv security is disabled in HW");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                } else {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                }
+        }
+#else
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+                gk20a_dbg_info("running ASIM with PRIV security disabled");
+                __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+        } else {
+                val = gk20a_readl(g, fuse_opt_priv_sec_en_r());
+                if (!val) {
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, false);
+                } else {
+                        gk20a_dbg_info("priv security is not supported but enabled");
+                        __nvgpu_set_enabled(g, NVGPU_SEC_PRIVSECURITY, true);
+                        return -EPERM;
+                }
+        }
+#endif
+        /* priv security dependent ops */
+        if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
+                /* Add in ops from gm20b acr */
+                gops->pmu.is_pmu_supported = gm20b_is_pmu_supported;
+                gops->pmu.prepare_ucode = prepare_ucode_blob;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
+                gops->pmu.is_lazy_bootstrap = gm20b_is_lazy_bootstrap;
+                gops->pmu.is_priv_load = gm20b_is_priv_load;
+                gops->pmu.get_wpr = gm20b_wpr_info;
+                gops->pmu.alloc_blob_space = gm20b_alloc_blob_space;
+                gops->pmu.pmu_populate_loader_cfg =
+                        gm20b_pmu_populate_loader_cfg;
+                gops->pmu.flcn_populate_bl_dmem_desc =
+                        gm20b_flcn_populate_bl_dmem_desc;
+                gops->pmu.falcon_wait_for_halt = pmu_wait_for_halt;
+                gops->pmu.falcon_clear_halt_interrupt_status =
+                        clear_halt_interrupt_status;
+                gops->pmu.init_falcon_setup_hw = gm20b_init_pmu_setup_hw1;
+                gops->pmu.init_wpr_region = gm20b_pmu_init_acr;
+                gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode;
+                gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode;
+        } else {
+                /* Inherit from gk20a */
+                gops->pmu.is_pmu_supported = gk20a_is_pmu_supported;
+                gops->pmu.prepare_ucode = nvgpu_pmu_prepare_ns_ucode_blob;
+                gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
+                gops->pmu.pmu_nsbootstrap = pmu_bootstrap;
+                gops->pmu.load_lsfalcon_ucode = NULL;
+                gops->pmu.init_wpr_region = NULL;
+                gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
+        }
+        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        g->pmu_lsf_pmu_wpr_init_done = 0;
+        g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
+        g->name = "gm20b";
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.h b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h
new file mode 100644
index 00000000..22eae182
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.h
@@ -0,0 +1,31 @@
+/*
+ * GM20B Graphics
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_HAL_GM20B_H
+#define _NVHOST_HAL_GM20B_H
+struct gk20a;
+int gm20b_init_hal(struct gk20a *g);
+int gm20b_get_litter_value(struct gk20a *g, int value);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
new file mode 100644
index 00000000..6ec9aec5
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -0,0 +1,487 @@
+/*
+ * GM20B L2
+ *
+ * Copyright (c) 2014-2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <trace/events/gk20a.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
+#include "gk20a/gk20a.h"
+#include "ltc_gm20b.h"
+int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
+{
+        /* max memory size (MB) to cover */
+        u32 max_size = gr->max_comptag_mem;
+        /* one tag line covers 128KB */
+        u32 max_comptag_lines = max_size << 3;
+        u32 hw_max_comptag_lines =
+                ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
+        u32 cbc_param =
+                gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
+        u32 comptags_per_cacheline =
+                ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
+        u32 cacheline_size =
+                512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
+        u32 slices_per_ltc =
+                ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
+        u32 compbit_backing_size;
+        int err;
+        gk20a_dbg_fn("");
+        if (max_comptag_lines == 0)
+                return 0;
+        if (max_comptag_lines > hw_max_comptag_lines)
+                max_comptag_lines = hw_max_comptag_lines;
+        compbit_backing_size =
+                DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
+                cacheline_size * slices_per_ltc * g->ltc_count;
+        /* aligned to 2KB * ltc_count */
+        compbit_backing_size +=
+                g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+        /* must be a multiple of 64KB */
+        compbit_backing_size = roundup(compbit_backing_size, 64*1024);
+        max_comptag_lines =
+                (compbit_backing_size * comptags_per_cacheline) /
+                (cacheline_size * slices_per_ltc * g->ltc_count);
+        if (max_comptag_lines > hw_max_comptag_lines)
+                max_comptag_lines = hw_max_comptag_lines;
+        gk20a_dbg_info("compbit backing store size : %d",
+                compbit_backing_size);
+        gk20a_dbg_info("max comptag lines : %d",
+                max_comptag_lines);
+        err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size);
+        if (err)
+                return err;
+        err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines);
+        if (err)
+                return err;
+        gr->comptags_per_cacheline = comptags_per_cacheline;
+        gr->slices_per_ltc = slices_per_ltc;
+        gr->cacheline_size = cacheline_size;
+        return 0;
+}
+int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
+                       u32 min, u32 max)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_timeout timeout;
+        int err = 0;
+        u32 ltc, slice, ctrl1, val, hw_op = 0;
+        u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
+                                gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        const u32 max_lines = 16384;
+        gk20a_dbg_fn("");
+        trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max);
+        if (gr->compbit_store.mem.size == 0)
+                return 0;
+        while (1) {
+                const u32 iter_max = min(min + max_lines - 1, max);
+                bool full_cache_op = true;
+                nvgpu_mutex_acquire(&g->mm.l2_op_lock);
+                gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max);
+                if (op == gk20a_cbc_op_clear) {
+                        gk20a_writel(
+                                g, ltc_ltcs_ltss_cbc_ctrl2_r(),
+                                ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
+                                        min));
+                        gk20a_writel(
+                                g, ltc_ltcs_ltss_cbc_ctrl3_r(),
+                                ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
+                                        iter_max));
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
+                        full_cache_op = false;
+                } else if (op == gk20a_cbc_op_clean) {
+                        /* this is full-cache op */
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
+                } else if (op == gk20a_cbc_op_invalidate) {
+                        /* this is full-cache op */
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
+                } else {
+                        nvgpu_err(g, "Unknown op: %u", (unsigned)op);
+                        err = -EINVAL;
+                        goto out;
+                }
+                gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
+                             gk20a_readl(g,
+                                         ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
+                for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                        for (slice = 0; slice < slices_per_ltc; slice++) {
+                                ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
+                                        ltc * ltc_stride + slice * lts_stride;
+                                nvgpu_timeout_init(g, &timeout, 2000,
+                                                   NVGPU_TIMER_RETRY_TIMER);
+                                do {
+                                        val = gk20a_readl(g, ctrl1);
+                                        if (!(val & hw_op))
+                                                break;
+                                        nvgpu_udelay(5);
+                                } while (!nvgpu_timeout_expired(&timeout));
+                                if (nvgpu_timeout_peek_expired(&timeout)) {
+                                        nvgpu_err(g, "comp tag clear timeout");
+                                        err = -EBUSY;
+                                        goto out;
+                                }
+                        }
+                }
+                /* are we done? */
+                if (full_cache_op || iter_max == max)
+                        break;
+                /* note: iter_max is inclusive upper bound */
+                min = iter_max + 1;
+                /* give a chance for higher-priority threads to progress */
+                nvgpu_mutex_release(&g->mm.l2_op_lock);
+        }
+out:
+        trace_gk20a_ltc_cbc_ctrl_done(g->name);
+        nvgpu_mutex_release(&g->mm.l2_op_lock);
+        return err;
+}
+void gm20b_ltc_init_fs_state(struct gk20a *g)
+{
+        u32 reg;
+        gk20a_dbg_info("initialize gm20b l2");
+        g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r());
+        g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r());
+        gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count);
+        gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(),
+        g->ltc_count);
+        gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(),
+        g->ltc_count);
+        gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(),
+                     gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) |
+                     ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m());
+        /* Disable LTC interrupts */
+        reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
+        reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m();
+        reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m();
+        reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m();
+        gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg);
+}
+void gm20b_ltc_isr(struct gk20a *g)
+{
+        u32 mc_intr, ltc_intr;
+        unsigned int ltc, slice;
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        mc_intr = gk20a_readl(g, mc_intr_ltc_r());
+        nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
+        for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                if ((mc_intr & 1 << ltc) == 0)
+                        continue;
+                for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
+                        ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
+                                           ltc_stride * ltc +
+                                           lts_stride * slice);
+                        nvgpu_err(g, "ltc%d, slice %d: %08x",
+                                  ltc, slice, ltc_intr);
+                        gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
+                                           ltc_stride * ltc +
+                                           lts_stride * slice,
+                                     ltc_intr);
+                }
+        }
+}
+u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
+{
+        u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
+        if (val == 2) {
+                return base * 2;
+        } else if (val != 1) {
+                nvgpu_err(g, "Invalid number of active ltcs: %08x", val);
+        }
+        return base;
+}
+/*
+ * Performs a full flush of the L2 cache.
+ */
+void gm20b_flush_ltc(struct gk20a *g)
+{
+        struct nvgpu_timeout timeout;
+        unsigned int ltc;
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        /* Clean... */
+        gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
+                ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
+                ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() |
+                ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() |
+                ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() |
+                ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() |
+                ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f());
+        /* Wait on each LTC individually. */
+        for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                u32 op_pending;
+                /*
+                 * Use 5ms - this should be sufficient time to flush the cache.
+                 * On tegra, rough EMC BW available for old tegra chips (newer
+                 * chips are strictly faster) can be estimated as follows:
+                 *
+                 * Lowest reasonable EMC clock speed will be around 102MHz on
+                 * t124 for display enabled boards and generally fixed to max
+                 * for non-display boards (since they are generally plugged in).
+                 *
+                 * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
+                 * BW the GPU will likely get about half (display and overhead/
+                 * utilization inefficiency eating the rest) so 650MB/s at
+                 * worst. Assuming at most 1MB of GPU L2 cache (less for most
+                 * chips) worst case is we take 1MB/650MB/s = 1.5ms.
+                 *
+                 * So 5ms timeout here should be more than sufficient.
+                 */
+                nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
+                do {
+                        int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
+                                     ltc * ltc_stride;
+                        op_pending = gk20a_readl(g, cmgmt1);
+                } while ((op_pending &
+                          ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
+                         !nvgpu_timeout_expired_msg(&timeout,
+                                                    "L2 flush timeout!"));
+        }
+        /* And invalidate. */
+        gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(),
+             ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() |
+             ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() |
+             ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() |
+             ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() |
+             ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f());
+        /* Wait on each LTC individually. */
+        for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                u32 op_pending;
+                /* Again, 5ms. */
+                nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
+                do {
+                        int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
+                                     ltc * ltc_stride;
+                        op_pending = gk20a_readl(g, cmgmt0);
+                } while ((op_pending &
+                          ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
+                         !nvgpu_timeout_expired_msg(&timeout,
+                                                    "L2 flush timeout!"));
+        }
+}
+int gm20b_determine_L2_size_bytes(struct gk20a *g)
+{
+        u32 lts_per_ltc;
+        u32 ways;
+        u32 sets;
+        u32 bytes_per_line;
+        u32 active_ltcs;
+        u32 cache_size;
+        u32 tmp;
+        u32 active_sets_value;
+        tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
+        ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));
+        active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
+        if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
+                sets = 64;
+        } else if (active_sets_value ==
+                 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
+                sets = 32;
+        } else if (active_sets_value ==
+                 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
+                sets = 16;
+        } else {
+                nvgpu_err(g, "Unknown constant %u for active sets",
+                       (unsigned)active_sets_value);
+                sets = 0;
+        }
+        active_ltcs = g->gr.num_fbps;
+        /* chip-specific values */
+        lts_per_ltc = 2;
+        bytes_per_line = 128;
+        cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;
+        return cache_size;
+}
+/*
+ * Sets the ZBC color for the passed index.
+ */
+void gm20b_ltc_set_zbc_color_entry(struct gk20a *g,
+                                          struct zbc_entry *color_val,
+                                          u32 index)
+{
+        u32 i;
+        u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+        gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+                     ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+        for (i = 0;
+             i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) {
+                gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
+                             color_val->color_l2[i]);
+        }
+        gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r());
+}
+/*
+ * Sets the ZBC depth for the passed index.
+ */
+void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g,
+                                          struct zbc_entry *depth_val,
+                                          u32 index)
+{
+        u32 real_index = index + GK20A_STARTOF_ZBC_TABLE;
+        gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
+                     ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index));
+        gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
+                     depth_val->depth);
+        gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r());
+}
+void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
+{
+        u32 max_size = gr->max_comptag_mem;
+        u32 max_comptag_lines = max_size << 3;
+        u32 compbit_base_post_divide;
+        u64 compbit_base_post_multiply64;
+        u64 compbit_store_iova;
+        u64 compbit_base_post_divide64;
+        if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+                compbit_store_iova = nvgpu_mem_get_phys_addr(g,
+                                                        &gr->compbit_store.mem);
+        else
+                compbit_store_iova = nvgpu_mem_get_addr(g,
+                                                        &gr->compbit_store.mem);
+        compbit_base_post_divide64 = compbit_store_iova >>
+                ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+        do_div(compbit_base_post_divide64, g->ltc_count);
+        compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
+        compbit_base_post_multiply64 = ((u64)compbit_base_post_divide *
+                g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
+        if (compbit_base_post_multiply64 < compbit_store_iova)
+                compbit_base_post_divide++;
+        /* Bug 1477079 indicates sw adjustment on the posted divided base. */
+        if (g->ops.ltc.cbc_fix_config)
+                compbit_base_post_divide =
+                        g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide);
+        gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
+                compbit_base_post_divide);
+        gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
+                   "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n",
+                   (u32)(compbit_store_iova >> 32),
+                   (u32)(compbit_store_iova & 0xffffffff),
+                   compbit_base_post_divide);
+        gr->compbit_store.base_hw = compbit_base_post_divide;
+        g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate,
+                            0, max_comptag_lines - 1);
+}
+void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled)
+{
+        u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
+        u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
+        if (enabled)
+                /* bypass disabled (normal caching ops)*/
+                reg &= ~reg_f;
+        else
+                /* bypass enabled (no caching) */
+                reg |= reg_f;
+        gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
+}
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
new file mode 100644
index 00000000..0f9145be
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
@@ -0,0 +1,49 @@
+/*
+ * GM20B L2
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_GM20B_LTC
+#define _NVHOST_GM20B_LTC
+struct gpu_ops;
+int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
+int gm20b_determine_L2_size_bytes(struct gk20a *g);
+void gm20b_ltc_set_zbc_color_entry(struct gk20a *g,
+                                          struct zbc_entry *color_val,
+                                          u32 index);
+void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g,
+                                          struct zbc_entry *depth_val,
+                                          u32 index);
+void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr);
+void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled);
+void gm20b_ltc_init_fs_state(struct gk20a *g);
+int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
+                       u32 min, u32 max);
+void gm20b_ltc_isr(struct gk20a *g);
+u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base);
+void gm20b_flush_ltc(struct gk20a *g);
+int gm20b_ltc_alloc_phys_cbc(struct gk20a *g,
+                             size_t compbit_backing_size);
+int gm20b_ltc_alloc_virt_cbc(struct gk20a *g,
+                             size_t compbit_backing_size);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
new file mode 100644
index 00000000..5cd7706d
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -0,0 +1,86 @@
+/*
+ * GM20B MMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "mm_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
+void gm20b_mm_set_big_page_size(struct gk20a *g,
+                                struct nvgpu_mem *mem, int size)
+{
+        u32 val;
+        gk20a_dbg_fn("");
+        gk20a_dbg_info("big page size %d\n", size);
+        val = nvgpu_mem_rd32(g, mem, ram_in_big_page_size_w());
+        val &= ~ram_in_big_page_size_m();
+        if (size == SZ_64K)
+                val |= ram_in_big_page_size_64kb_f();
+        else
+                val |= ram_in_big_page_size_128kb_f();
+        nvgpu_mem_wr32(g, mem, ram_in_big_page_size_w(), val);
+        gk20a_dbg_fn("done");
+}
+u32 gm20b_mm_get_big_page_sizes(void)
+{
+        return SZ_64K | SZ_128K;
+}
+u32 gm20b_mm_get_default_big_page_size(void)
+{
+        return SZ_128K;
+}
+bool gm20b_mm_support_sparse(struct gk20a *g)
+{
+        return true;
+}
+bool gm20b_mm_is_bar1_supported(struct gk20a *g)
+{
+        return true;
+}
+u64 gm20b_gpu_phys_addr(struct gk20a *g,
+                        struct nvgpu_gmmu_attrs *attrs, u64 phys)
+{
+        return phys;
+}
+u32 gm20b_get_kind_invalid(void)
+{
+        return gmmu_pte_kind_invalid_v();
+}
+u32 gm20b_get_kind_pitch(void)
+{
+        return gmmu_pte_kind_pitch_v();
+}
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
new file mode 100644
index 00000000..af67845a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -0,0 +1,43 @@
+/*
+ * GM20B GMMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _NVHOST_GM20B_MM
+#define _NVHOST_GM20B_MM
+struct gk20a;
+#define PDE_ADDR_START(x, y)    ((x) &  ~((0x1UL << (y)) - 1))
+#define PDE_ADDR_END(x, y)      ((x) | ((0x1UL << (y)) - 1))
+void gm20b_mm_set_big_page_size(struct gk20a *g,
+                                struct nvgpu_mem *mem, int size);
+u32 gm20b_mm_get_big_page_sizes(void);
+u32 gm20b_mm_get_default_big_page_size(void);
+bool gm20b_mm_support_sparse(struct gk20a *g);
+bool gm20b_mm_is_bar1_supported(struct gk20a *g);
+int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
+u64 gm20b_gpu_phys_addr(struct gk20a *g,
+                        struct nvgpu_gmmu_attrs *attrs, u64 phys);
+u32 gm20b_get_kind_invalid(void);
+u32 gm20b_get_kind_pitch(void);
+#endif
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
new file mode 100644
index 00000000..664134f9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -0,0 +1,283 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/timers.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/fuse.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "acr_gm20b.h"
+#include "pmu_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
+/*!
+ * Structure/object which single register write need to be done during PG init
+ * sequence to set PROD values.
+ */
+struct pg_init_sequence_list {
+        u32 regaddr;
+        u32 writeval;
+};
+#define gm20b_dbg_pmu(fmt, arg...) \
+        gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
+/* PROD settings for ELPG sequencing registers*/
+static struct pg_init_sequence_list _pginitseq_gm20b[] = {
+                { 0x0010ab10, 0x8180},
+                { 0x0010e118, 0x83828180},
+                { 0x0010e068, 0},
+                { 0x0010e06c, 0x00000080},
+                { 0x0010e06c, 0x00000081},
+                { 0x0010e06c, 0x00000082},
+                { 0x0010e06c, 0x00000083},
+                { 0x0010e06c, 0x00000084},
+                { 0x0010e06c, 0x00000085},
+                { 0x0010e06c, 0x00000086},
+                { 0x0010e06c, 0x00000087},
+                { 0x0010e06c, 0x00000088},
+                { 0x0010e06c, 0x00000089},
+                { 0x0010e06c, 0x0000008a},
+                { 0x0010e06c, 0x0000008b},
+                { 0x0010e06c, 0x0000008c},
+                { 0x0010e06c, 0x0000008d},
+                { 0x0010e06c, 0x0000008e},
+                { 0x0010e06c, 0x0000008f},
+                { 0x0010e06c, 0x00000090},
+                { 0x0010e06c, 0x00000091},
+                { 0x0010e06c, 0x00000092},
+                { 0x0010e06c, 0x00000093},
+                { 0x0010e06c, 0x00000094},
+                { 0x0010e06c, 0x00000095},
+                { 0x0010e06c, 0x00000096},
+                { 0x0010e06c, 0x00000097},
+                { 0x0010e06c, 0x00000098},
+                { 0x0010e06c, 0x00000099},
+                { 0x0010e06c, 0x0000009a},
+                { 0x0010e06c, 0x0000009b},
+                { 0x0010ab14, 0x00000000},
+                { 0x0010ab18, 0x00000000},
+                { 0x0010e024, 0x00000000},
+                { 0x0010e028, 0x00000000},
+                { 0x0010e11c, 0x00000000},
+                { 0x0010e120, 0x00000000},
+                { 0x0010ab1c, 0x02010155},
+                { 0x0010e020, 0x001b1b55},
+                { 0x0010e124, 0x01030355},
+                { 0x0010ab20, 0x89abcdef},
+                { 0x0010ab24, 0x00000000},
+                { 0x0010e02c, 0x89abcdef},
+                { 0x0010e030, 0x00000000},
+                { 0x0010e128, 0x89abcdef},
+                { 0x0010e12c, 0x00000000},
+                { 0x0010ab28, 0x74444444},
+                { 0x0010ab2c, 0x70000000},
+                { 0x0010e034, 0x74444444},
+                { 0x0010e038, 0x70000000},
+                { 0x0010e130, 0x74444444},
+                { 0x0010e134, 0x70000000},
+                { 0x0010ab30, 0x00000000},
+                { 0x0010ab34, 0x00000001},
+                { 0x00020004, 0x00000000},
+                { 0x0010e138, 0x00000000},
+                { 0x0010e040, 0x00000000},
+};
+int gm20b_pmu_setup_elpg(struct gk20a *g)
+{
+        int ret = 0;
+        u32 reg_writes;
+        u32 index;
+        gk20a_dbg_fn("");
+        if (g->elpg_enabled) {
+                reg_writes = ((sizeof(_pginitseq_gm20b) /
+                                sizeof((_pginitseq_gm20b)[0])));
+                /* Initialize registers with production values*/
+                for (index = 0; index < reg_writes; index++) {
+                        gk20a_writel(g, _pginitseq_gm20b[index].regaddr,
+                                _pginitseq_gm20b[index].writeval);
+                }
+        }
+        gk20a_dbg_fn("done");
+        return ret;
+}
+static void pmu_handle_acr_init_wpr_msg(struct gk20a *g, struct pmu_msg *msg,
+                        void *param, u32 handle, u32 status)
+{
+        gk20a_dbg_fn("");
+        gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_INIT_WPR_REGION");
+        if (msg->msg.acr.acrmsg.errorcode == PMU_ACR_SUCCESS)
+                g->pmu_lsf_pmu_wpr_init_done = 1;
+        gk20a_dbg_fn("done");
+}
+int gm20b_pmu_init_acr(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 seq;
+        gk20a_dbg_fn("");
+        /* init ACR */
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_ACR;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE +
+          sizeof(struct pmu_acr_cmd_init_wpr_details);
+        cmd.cmd.acr.init_wpr.cmd_type = PMU_ACR_CMD_ID_INIT_WPR_REGION;
+        cmd.cmd.acr.init_wpr.regionid = 0x01;
+        cmd.cmd.acr.init_wpr.wproffset = 0x00;
+        gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_INIT_WPR_REGION");
+        nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                        pmu_handle_acr_init_wpr_msg, pmu, &seq, ~0);
+        gk20a_dbg_fn("done");
+        return 0;
+}
+void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
+                        void *param, u32 handle, u32 status)
+{
+        gk20a_dbg_fn("");
+        gm20b_dbg_pmu("reply PMU_ACR_CMD_ID_BOOTSTRAP_FALCON");
+        gm20b_dbg_pmu("response code = %x\n", msg->msg.acr.acrmsg.falconid);
+        g->pmu_lsf_loaded_falcon_id = msg->msg.acr.acrmsg.falconid;
+        gk20a_dbg_fn("done");
+}
+static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout_ms,
+                                        u32 val)
+{
+        unsigned long delay = GR_FECS_POLL_INTERVAL;
+        u32 reg;
+        struct nvgpu_timeout timeout;
+        gk20a_dbg_fn("");
+        reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+        nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
+        do {
+                reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+                if (reg == val)
+                        return 0;
+                nvgpu_udelay(delay);
+        } while (!nvgpu_timeout_expired(&timeout));
+        return -ETIMEDOUT;
+}
+void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 seq;
+        gk20a_dbg_fn("");
+        gm20b_dbg_pmu("wprinit status = %x\n", g->pmu_lsf_pmu_wpr_init_done);
+        if (g->pmu_lsf_pmu_wpr_init_done) {
+                /* send message to load FECS falcon */
+                memset(&cmd, 0, sizeof(struct pmu_cmd));
+                cmd.hdr.unit_id = PMU_UNIT_ACR;
+                cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                  sizeof(struct pmu_acr_cmd_bootstrap_falcon);
+                cmd.cmd.acr.bootstrap_falcon.cmd_type =
+                  PMU_ACR_CMD_ID_BOOTSTRAP_FALCON;
+                cmd.cmd.acr.bootstrap_falcon.flags = flags;
+                cmd.cmd.acr.bootstrap_falcon.falconid = falcon_id;
+                gm20b_dbg_pmu("cmd post PMU_ACR_CMD_ID_BOOTSTRAP_FALCON: %x\n",
+                                falcon_id);
+                nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                                pmu_handle_fecs_boot_acr_msg, pmu, &seq, ~0);
+        }
+        gk20a_dbg_fn("done");
+        return;
+}
+int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask)
+{
+        u32  err = 0;
+        u32 flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
+        unsigned long timeout = gk20a_get_gr_idle_timeout(g);
+        /* GM20B PMU supports loading FECS only */
+        if (!(falconidmask == (1 << LSF_FALCON_ID_FECS)))
+                return -EINVAL;
+        /* check whether pmu is ready to bootstrap lsf if not wait for it */
+        if (!g->pmu_lsf_pmu_wpr_init_done) {
+                pmu_wait_message_cond(&g->pmu,
+                                gk20a_get_gr_idle_timeout(g),
+                                &g->pmu_lsf_pmu_wpr_init_done, 1);
+                /* check again if it still not ready indicate an error */
+                if (!g->pmu_lsf_pmu_wpr_init_done) {
+                        nvgpu_err(g, "PMU not ready to load LSF");
+                        return -ETIMEDOUT;
+                }
+        }
+        /* load FECS */
+        gk20a_writel(g,
+                gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
+        gm20b_pmu_load_lsf(g, LSF_FALCON_ID_FECS, flags);
+        err = pmu_gm20b_ctx_wait_lsf_ready(g, timeout,
+                        0x55AA55AA);
+        return err;
+}
+void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr)
+{
+        gk20a_writel(g, pwr_falcon_dmatrfbase_r(), addr);
+}
+/*Dump Security related fuses*/
+void pmu_dump_security_fuses_gm20b(struct gk20a *g)
+{
+        u32 val;
+        nvgpu_err(g, "FUSE_OPT_SEC_DEBUG_EN_0: 0x%x",
+                        gk20a_readl(g, fuse_opt_sec_debug_en_r()));
+        nvgpu_err(g, "FUSE_OPT_PRIV_SEC_EN_0: 0x%x",
+                        gk20a_readl(g, fuse_opt_priv_sec_en_r()));
+        nvgpu_tegra_fuse_read_gcplex_config_fuse(g, &val);
+        nvgpu_err(g, "FUSE_GCPLEX_CONFIG_FUSE_0: 0x%x", val);
+}
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
new file mode 100644
index 00000000..ec50fb06
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -0,0 +1,37 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __PMU_GM20B_H_
+#define __PMU_GM20B_H_
+struct gk20a;
+int gm20b_load_falcon_ucode(struct gk20a *g, u32 falconidmask);
+int gm20b_pmu_setup_elpg(struct gk20a *g);
+void pmu_dump_security_fuses_gm20b(struct gk20a *g);
+void gm20b_pmu_load_lsf(struct gk20a *g, u32 falcon_id, u32 flags);
+int gm20b_pmu_init_acr(struct gk20a *g);
+void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr);
+#endif /*__PMU_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.c b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c
new file mode 100644
index 00000000..79c980f4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.c
@@ -0,0 +1,450 @@
+/*
+ * Tegra GK20A GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2013-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "regops_gm20b.h"
+#include <nvgpu/bsearch.h>
+static const struct regop_offset_range gm20b_global_whitelist_ranges[] = {
+        { 0x00001a00,   3 },
+        { 0x0000259c,   1 },
+        { 0x0000280c,   1 },
+        { 0x00009400,   1 },
+        { 0x00009410,   1 },
+        { 0x00021970,   1 },
+        { 0x00021c00,   4 },
+        { 0x00021c14,   3 },
+        { 0x00021c24,   1 },
+        { 0x00021c2c,   5 },
+        { 0x00021cb8,   2 },
+        { 0x00021d38,   2 },
+        { 0x00021d44,   1 },
+        { 0x00021d4c,   1 },
+        { 0x00021d54,   1 },
+        { 0x00021d5c,   1 },
+        { 0x00021d64,   2 },
+        { 0x00021d70,   1 },
+        { 0x00022430,   7 },
+        { 0x00100c18,   3 },
+        { 0x00100c84,   1 },
+        { 0x00100cc4,   1 },
+        { 0x00106640,   1 },
+        { 0x0010a0a8,   1 },
+        { 0x0010a4f0,   1 },
+        { 0x0010e064,   1 },
+        { 0x0010e164,   1 },
+        { 0x0010e490,   1 },
+        { 0x00140028,   1 },
+        { 0x00140350,   1 },
+        { 0x00140550,   1 },
+        { 0x00142028,   1 },
+        { 0x00142350,   1 },
+        { 0x00142550,   1 },
+        { 0x0017e028,   1 },
+        { 0x0017e350,   1 },
+        { 0x0017e550,   1 },
+        { 0x00180040,  52 },
+        { 0x00180240,  52 },
+        { 0x00180440,  52 },
+        { 0x001a0040,  52 },
+        { 0x001b0040,  52 },
+        { 0x001b0240,  52 },
+        { 0x001b0440,  52 },
+        { 0x001b0640,  52 },
+        { 0x001b4000,   3 },
+        { 0x001b4010,   3 },
+        { 0x001b4020,   3 },
+        { 0x001b4030,   3 },
+        { 0x001b4040,   3 },
+        { 0x001b4050,   3 },
+        { 0x001b4060,   4 },
+        { 0x001b4074,  11 },
+        { 0x001b40a4,   1 },
+        { 0x001b4100,   6 },
+        { 0x001b4124,   2 },
+        { 0x001b8000,   7 },
+        { 0x001bc000,   7 },
+        { 0x001be000,   7 },
+        { 0x00400500,   1 },
+        { 0x00400700,   1 },
+        { 0x0040415c,   1 },
+        { 0x00405850,   1 },
+        { 0x00405908,   1 },
+        { 0x00405b40,   1 },
+        { 0x00405b50,   1 },
+        { 0x00406024,   1 },
+        { 0x00407010,   1 },
+        { 0x00407808,   1 },
+        { 0x0040803c,   1 },
+        { 0x0040880c,   1 },
+        { 0x00408910,   1 },
+        { 0x00408984,   1 },
+        { 0x004090a8,   1 },
+        { 0x004098a0,   1 },
+        { 0x00409b00,   1 },
+        { 0x0041000c,   1 },
+        { 0x00410110,   1 },
+        { 0x00410184,   1 },
+        { 0x0041040c,   1 },
+        { 0x00410510,   1 },
+        { 0x00410584,   1 },
+        { 0x00418384,   1 },
+        { 0x004184a0,   1 },
+        { 0x00418604,   1 },
+        { 0x00418680,   1 },
+        { 0x00418714,   1 },
+        { 0x0041881c,   1 },
+        { 0x00418884,   1 },
+        { 0x004188b0,   1 },
+        { 0x004188c8,   2 },
+        { 0x00418b04,   1 },
+        { 0x00418c04,   1 },
+        { 0x00418c1c,   1 },
+        { 0x00418c88,   1 },
+        { 0x00418d00,   1 },
+        { 0x00418e08,   1 },
+        { 0x00418f08,   1 },
+        { 0x00419000,   1 },
+        { 0x0041900c,   1 },
+        { 0x00419018,   1 },
+        { 0x00419854,   1 },
+        { 0x00419ab0,   1 },
+        { 0x00419ab8,   3 },
+        { 0x00419c0c,   1 },
+        { 0x00419c90,   1 },
+        { 0x00419d08,   2 },
+        { 0x00419e00,   4 },
+        { 0x00419e24,   2 },
+        { 0x00419e44,  11 },
+        { 0x00419e74,   9 },
+        { 0x00419ea4,   1 },
+        { 0x00419eb0,   1 },
+        { 0x00419ef0,  26 },
+        { 0x0041a0a0,   1 },
+        { 0x0041a0a8,   1 },
+        { 0x0041a17c,   1 },
+        { 0x0041a890,   2 },
+        { 0x0041a8a0,   3 },
+        { 0x0041a8b0,   2 },
+        { 0x0041b014,   1 },
+        { 0x0041b0a0,   1 },
+        { 0x0041b0cc,   1 },
+        { 0x0041b0e8,   2 },
+        { 0x0041b1dc,   1 },
+        { 0x0041be14,   1 },
+        { 0x0041bea0,   1 },
+        { 0x0041becc,   1 },
+        { 0x0041bee8,   2 },
+        { 0x0041bfdc,   1 },
+        { 0x0041c054,   1 },
+        { 0x0041c2b0,   1 },
+        { 0x0041c2b8,   3 },
+        { 0x0041c40c,   1 },
+        { 0x0041c490,   1 },
+        { 0x0041c508,   2 },
+        { 0x0041c600,   4 },
+        { 0x0041c624,   2 },
+        { 0x0041c644,  11 },
+        { 0x0041c674,   9 },
+        { 0x0041c6a4,   1 },
+        { 0x0041c6b0,   1 },
+        { 0x0041c6f0,  26 },
+        { 0x0041c854,   1 },
+        { 0x0041cab0,   1 },
+        { 0x0041cab8,   3 },
+        { 0x0041cc0c,   1 },
+        { 0x0041cc90,   1 },
+        { 0x0041cd08,   2 },
+        { 0x0041ce00,   4 },
+        { 0x0041ce24,   2 },
+        { 0x0041ce44,  11 },
+        { 0x0041ce74,   9 },
+        { 0x0041cea4,   1 },
+        { 0x0041ceb0,   1 },
+        { 0x0041cef0,  26 },
+        { 0x00500384,   1 },
+        { 0x005004a0,   1 },
+        { 0x00500604,   1 },
+        { 0x00500680,   1 },
+        { 0x00500714,   1 },
+        { 0x0050081c,   1 },
+        { 0x00500884,   1 },
+        { 0x005008c8,   2 },
+        { 0x00500b04,   1 },
+        { 0x00500c04,   1 },
+        { 0x00500c88,   1 },
+        { 0x00500d00,   1 },
+        { 0x00500e08,   1 },
+        { 0x00500f08,   1 },
+        { 0x00501000,   1 },
+        { 0x0050100c,   1 },
+        { 0x00501018,   1 },
+        { 0x00501854,   1 },
+        { 0x00501ab0,   1 },
+        { 0x00501ab8,   3 },
+        { 0x00501c0c,   1 },
+        { 0x00501c90,   1 },
+        { 0x00501d08,   2 },
+        { 0x00501e00,   4 },
+        { 0x00501e24,   2 },
+        { 0x00501e44,  11 },
+        { 0x00501e74,   9 },
+        { 0x00501ea4,   1 },
+        { 0x00501eb0,   1 },
+        { 0x00501ef0,  26 },
+        { 0x005020a0,   1 },
+        { 0x005020a8,   1 },
+        { 0x0050217c,   1 },
+        { 0x00502890,   2 },
+        { 0x005028a0,   3 },
+        { 0x005028b0,   2 },
+        { 0x00503014,   1 },
+        { 0x005030a0,   1 },
+        { 0x005030cc,   1 },
+        { 0x005030e8,   2 },
+        { 0x005031dc,   1 },
+        { 0x00503e14,   1 },
+        { 0x00503ea0,   1 },
+        { 0x00503ecc,   1 },
+        { 0x00503ee8,   2 },
+        { 0x00503fdc,   1 },
+        { 0x00504054,   1 },
+        { 0x005042b0,   1 },
+        { 0x005042b8,   3 },
+        { 0x0050440c,   1 },
+        { 0x00504490,   1 },
+        { 0x00504508,   2 },
+        { 0x00504600,   4 },
+        { 0x00504614,   6 },
+        { 0x00504634,   2 },
+        { 0x00504644,  11 },
+        { 0x00504674,   9 },
+        { 0x005046a4,   1 },
+        { 0x005046b0,   1 },
+        { 0x005046f0,  28 },
+        { 0x00504854,   1 },
+        { 0x00504ab0,   1 },
+        { 0x00504ab8,   3 },
+        { 0x00504c0c,   1 },
+        { 0x00504c90,   1 },
+        { 0x00504d08,   2 },
+        { 0x00504e00,   4 },
+        { 0x00504e14,   6 },
+        { 0x00504e34,   2 },
+        { 0x00504e44,  11 },
+        { 0x00504e74,   9 },
+        { 0x00504ea4,   1 },
+        { 0x00504eb0,   1 },
+        { 0x00504ef0,  28 },
+};
+static const u32 gm20b_global_whitelist_ranges_count =
+        ARRAY_SIZE(gm20b_global_whitelist_ranges);
+/* context */
+static const struct regop_offset_range gm20b_context_whitelist_ranges[] = {
+        { 0x0000259c,   1 },
+        { 0x0000280c,   1 },
+        { 0x00400500,   1 },
+        { 0x00405b40,   1 },
+        { 0x00418e00,   1 },
+        { 0x00418e34,   1 },
+        { 0x00418e40,   2 },
+        { 0x00418e58,   2 },
+        { 0x00419000,   1 },
+        { 0x00419864,   1 },
+        { 0x00419c90,   1 },
+        { 0x00419d08,   2 },
+        { 0x00419e04,   3 },
+        { 0x00419e24,   2 },
+        { 0x00419e44,  11 },
+        { 0x00419e74,  10 },
+        { 0x00419ea4,   1 },
+        { 0x00419eac,   2 },
+        { 0x00419ee8,   1 },
+        { 0x00419ef0,  26 },
+        { 0x0041b0e8,   2 },
+        { 0x0041bee8,   2 },
+        { 0x0041c490,   1 },
+        { 0x0041c508,   2 },
+        { 0x0041c604,   3 },
+        { 0x0041c624,   2 },
+        { 0x0041c644,  11 },
+        { 0x0041c674,  10 },
+        { 0x0041c6a4,   1 },
+        { 0x0041c6ac,   2 },
+        { 0x0041c6e8,   1 },
+        { 0x0041c6f0,  26 },
+        { 0x0041cc90,   1 },
+        { 0x0041cd08,   2 },
+        { 0x0041ce04,   3 },
+        { 0x0041ce24,   2 },
+        { 0x0041ce44,  11 },
+        { 0x0041ce74,  10 },
+        { 0x0041cea4,   1 },
+        { 0x0041ceac,   2 },
+        { 0x0041cee8,   1 },
+        { 0x0041cef0,  26 },
+        { 0x00501000,   1 },
+        { 0x00501c90,   1 },
+        { 0x00501d08,   2 },
+        { 0x00501e04,   3 },
+        { 0x00501e24,   2 },
+        { 0x00501e44,  11 },
+        { 0x00501e74,  10 },
+        { 0x00501ea4,   1 },
+        { 0x00501eac,   2 },
+        { 0x00501ee8,   1 },
+        { 0x00501ef0,  26 },
+        { 0x005030e8,   2 },
+        { 0x00503ee8,   2 },
+        { 0x00504490,   1 },
+        { 0x00504508,   2 },
+        { 0x00504604,   3 },
+        { 0x00504614,   6 },
+        { 0x00504634,   2 },
+        { 0x00504644,  11 },
+        { 0x00504674,  10 },
+        { 0x005046a4,   1 },
+        { 0x005046ac,   2 },
+        { 0x005046e8,   1 },
+        { 0x005046f0,  28 },
+        { 0x00504c90,   1 },
+        { 0x00504d08,   2 },
+        { 0x00504e04,   3 },
+        { 0x00504e14,   6 },
+        { 0x00504e34,   2 },
+        { 0x00504e44,  11 },
+        { 0x00504e74,  10 },
+        { 0x00504ea4,   1 },
+        { 0x00504eac,   2 },
+        { 0x00504ee8,   1 },
+        { 0x00504ef0,  28 },
+};
+static const u32 gm20b_context_whitelist_ranges_count =
+        ARRAY_SIZE(gm20b_context_whitelist_ranges);
+/* runcontrol */
+static const u32 gm20b_runcontrol_whitelist[] = {
+        0x00419e10,
+        0x0041c610,
+        0x0041ce10,
+        0x00501e10,
+        0x00504610,
+        0x00504e10,
+};
+static const u32 gm20b_runcontrol_whitelist_count =
+        ARRAY_SIZE(gm20b_runcontrol_whitelist);
+static const struct regop_offset_range gm20b_runcontrol_whitelist_ranges[] = {
+        { 0x00419e10,   1 },
+        { 0x0041c610,   1 },
+        { 0x0041ce10,   1 },
+        { 0x00501e10,   1 },
+        { 0x00504610,   1 },
+        { 0x00504e10,   1 },
+};
+static const u32 gm20b_runcontrol_whitelist_ranges_count =
+        ARRAY_SIZE(gm20b_runcontrol_whitelist_ranges);
+/* quad ctl */
+static const u32 gm20b_qctl_whitelist[] = {
+};
+static const u32 gm20b_qctl_whitelist_count =
+        ARRAY_SIZE(gm20b_qctl_whitelist);
+static const struct regop_offset_range gm20b_qctl_whitelist_ranges[] = {
+};
+static const u32 gm20b_qctl_whitelist_ranges_count =
+        ARRAY_SIZE(gm20b_qctl_whitelist_ranges);
+const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void)
+{
+        return gm20b_global_whitelist_ranges;
+}
+int gm20b_get_global_whitelist_ranges_count(void)
+{
+        return gm20b_global_whitelist_ranges_count;
+}
+const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void)
+{
+        return gm20b_context_whitelist_ranges;
+}
+int gm20b_get_context_whitelist_ranges_count(void)
+{
+        return gm20b_context_whitelist_ranges_count;
+}
+const u32 *gm20b_get_runcontrol_whitelist(void)
+{
+        return gm20b_runcontrol_whitelist;
+}
+int gm20b_get_runcontrol_whitelist_count(void)
+{
+        return gm20b_runcontrol_whitelist_count;
+}
+const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void)
+{
+        return gm20b_runcontrol_whitelist_ranges;
+}
+int gm20b_get_runcontrol_whitelist_ranges_count(void)
+{
+        return gm20b_runcontrol_whitelist_ranges_count;
+}
+const u32 *gm20b_get_qctl_whitelist(void)
+{
+        return gm20b_qctl_whitelist;
+}
+int gm20b_get_qctl_whitelist_count(void)
+{
+        return gm20b_qctl_whitelist_count;
+}
+const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void)
+{
+        return gm20b_qctl_whitelist_ranges;
+}
+int gm20b_get_qctl_whitelist_ranges_count(void)
+{
+        return gm20b_qctl_whitelist_ranges_count;
+}
+int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s)
+{
+        /* Not needed on gm20b */
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/regops_gm20b.h b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h
new file mode 100644
index 00000000..f0246e0e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/regops_gm20b.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * Tegra GK20A GPU Debugger Driver Register Ops
+ *
+ * Copyright (c) 2013-2017, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __REGOPS_GM20B_H_
+#define __REGOPS_GM20B_H_
+struct dbg_session_gk20a;
+const struct regop_offset_range *gm20b_get_global_whitelist_ranges(void);
+int gm20b_get_global_whitelist_ranges_count(void);
+const struct regop_offset_range *gm20b_get_context_whitelist_ranges(void);
+int gm20b_get_context_whitelist_ranges_count(void);
+const u32 *gm20b_get_runcontrol_whitelist(void);
+int gm20b_get_runcontrol_whitelist_count(void);
+const struct regop_offset_range *gm20b_get_runcontrol_whitelist_ranges(void);
+int gm20b_get_runcontrol_whitelist_ranges_count(void);
+const u32 *gm20b_get_qctl_whitelist(void);
+int gm20b_get_qctl_whitelist_count(void);
+const struct regop_offset_range *gm20b_get_qctl_whitelist_ranges(void);
+int gm20b_get_qctl_whitelist_ranges_count(void);
+int gm20b_apply_smpc_war(struct dbg_session_gk20a *dbg_s);
+#endif /* __REGOPS_GM20B_H_ */
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.c b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c
new file mode 100644
index 00000000..ce4d4fab
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.c
@@ -0,0 +1,78 @@
+/*
+ * GM20B THERMAL
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "therm_gm20b.h"
+#include <nvgpu/hw/gm20b/hw_therm_gm20b.h>
+int gm20b_init_therm_setup_hw(struct gk20a *g)
+{
+        u32 v;
+        gk20a_dbg_fn("");
+        /* program NV_THERM registers */
+        gk20a_writel(g, therm_use_a_r(), therm_use_a_ext_therm_0_enable_f() |
+                        therm_use_a_ext_therm_1_enable_f()  |
+                        therm_use_a_ext_therm_2_enable_f());
+        gk20a_writel(g, therm_evt_ext_therm_0_r(),
+                        therm_evt_ext_therm_0_slow_factor_f(0x2));
+        gk20a_writel(g, therm_evt_ext_therm_1_r(),
+                        therm_evt_ext_therm_1_slow_factor_f(0x6));
+        gk20a_writel(g, therm_evt_ext_therm_2_r(),
+                        therm_evt_ext_therm_2_slow_factor_f(0xe));
+        gk20a_writel(g, therm_grad_stepping_table_r(0),
+                therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) |
+                therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) |
+                therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) |
+                therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+                therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
+        gk20a_writel(g, therm_grad_stepping_table_r(1),
+                therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+                therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+                therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+                therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) |
+                therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()));
+        v = gk20a_readl(g, therm_clk_timing_r(0));
+        v |= therm_clk_timing_grad_slowdown_enabled_f();
+        gk20a_writel(g, therm_clk_timing_r(0), v);
+        v = gk20a_readl(g, therm_config2_r());
+        v |= therm_config2_grad_enable_f(1);
+        v |= therm_config2_slowdown_factor_extended_f(1);
+        gk20a_writel(g, therm_config2_r(), v);
+        gk20a_writel(g, therm_grad_stepping1_r(),
+                        therm_grad_stepping1_pdiv_duration_f(32));
+        v = gk20a_readl(g, therm_grad_stepping0_r());
+        v |= therm_grad_stepping0_feature_enable_f();
+        gk20a_writel(g, therm_grad_stepping0_r(), v);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/gm20b/therm_gm20b.h b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h
new file mode 100644
index 00000000..df0b4219
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/therm_gm20b.h
@@ -0,0 +1,30 @@
+/*
+ * GM20B THERMAL
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef THERM_GM20B_H
+#define THERM_GM20B_H
+struct gk20a;
+int gm20b_init_therm_setup_hw(struct gk20a *g);
+#endif /* THERM_GM20B_H */