6 files changed, 4947 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c
new file mode 100644
index 00000000..3447f40d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/pmu.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/log.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bug.h>
+#include "gk20a/gk20a.h"
+#define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
+#define PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT 10
+static int nvgpu_pg_init_task(void *arg);
+static int pmu_enable_hw(struct nvgpu_pmu *pmu, bool enable)
+{
+        struct gk20a *g = pmu->g;
+        struct nvgpu_timeout timeout;
+        int err = 0;
+        nvgpu_log_fn(g, " %s ", g->name);
+        if (enable) {
+                /* bring PMU falcon/engine out of reset */
+                g->ops.pmu.reset_engine(g, true);
+                if (g->ops.clock_gating.slcg_pmu_load_gating_prod)
+                        g->ops.clock_gating.slcg_pmu_load_gating_prod(g,
+                                g->slcg_enabled);
+                if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
+                        g->ops.clock_gating.blcg_pmu_load_gating_prod(g,
+                                g->blcg_enabled);
+                /* check for PMU IMEM/DMEM scrubbing complete status */
+                nvgpu_timeout_init(g, &timeout,
+                        PMU_MEM_SCRUBBING_TIMEOUT_MAX /
+                        PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT,
+                        NVGPU_TIMER_RETRY_TIMER);
+                do {
+                        if (nvgpu_flcn_get_mem_scrubbing_status(pmu->flcn))
+                                goto exit;
+                        nvgpu_udelay(PMU_MEM_SCRUBBING_TIMEOUT_DEFAULT);
+                } while (!nvgpu_timeout_expired(&timeout));
+                /* keep PMU falcon/engine in reset
+                *  if IMEM/DMEM scrubbing fails
+                */
+                g->ops.pmu.reset_engine(g, false);
+                nvgpu_err(g, "Falcon mem scrubbing timeout");
+                err = -ETIMEDOUT;
+        } else
+                /* keep PMU falcon/engine in reset */
+                g->ops.pmu.reset_engine(g, false);
+exit:
+        nvgpu_log_fn(g, "%s Done, status - %d ", g->name, err);
+        return err;
+}
+static int pmu_enable(struct nvgpu_pmu *pmu, bool enable)
+{
+        struct gk20a *g = pmu->g;
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        if (!enable) {
+                if (!g->ops.pmu.is_engine_in_reset(g)) {
+                        pmu_enable_irq(pmu, false);
+                        pmu_enable_hw(pmu, false);
+                }
+        } else {
+                err = pmu_enable_hw(pmu, true);
+                if (err)
+                        goto exit;
+                err = nvgpu_flcn_wait_idle(pmu->flcn);
+                if (err)
+                        goto exit;
+                pmu_enable_irq(pmu, true);
+        }
+exit:
+        nvgpu_log_fn(g, "Done, status - %d ", err);
+        return err;
+}
+int nvgpu_pmu_reset(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        int err = 0;
+        nvgpu_log_fn(g, " %s ", g->name);
+        err = nvgpu_flcn_wait_idle(pmu->flcn);
+        if (err)
+                goto exit;
+        err = pmu_enable(pmu, false);
+        if (err)
+                goto exit;
+        err = pmu_enable(pmu, true);
+exit:
+        nvgpu_log_fn(g, " %s Done, status - %d ", g->name, err);
+        return err;
+}
+static int nvgpu_init_task_pg_init(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        char thread_name[64];
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        nvgpu_cond_init(&pmu->pg_init.wq);
+        snprintf(thread_name, sizeof(thread_name),
+                                "nvgpu_pg_init_%s", g->name);
+        err = nvgpu_thread_create(&pmu->pg_init.state_task, g,
+                        nvgpu_pg_init_task, thread_name);
+        if (err)
+                nvgpu_err(g, "failed to start nvgpu_pg_init thread");
+        return err;
+}
+static int nvgpu_init_pmu_setup_sw(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        unsigned int i;
+        int err = 0;
+        u8 *ptr;
+        nvgpu_log_fn(g, " ");
+        /* start with elpg disabled until first enable call */
+        pmu->elpg_refcnt = 0;
+        /* Create thread to handle PMU state machine */
+        nvgpu_init_task_pg_init(g);
+        if (pmu->sw_ready) {
+                for (i = 0; i < pmu->mutex_cnt; i++) {
+                        pmu->mutex[i].id    = i;
+                        pmu->mutex[i].index = i;
+                }
+                nvgpu_pmu_seq_init(pmu);
+                nvgpu_log_fn(g, "skip init");
+                goto skip_init;
+        }
+        /* no infoRom script from vbios? */
+        /* TBD: sysmon subtask */
+        if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
+                pmu->perfmon_sampling_enabled = true;
+        pmu->mutex_cnt = g->ops.pmu.pmu_mutex_size();
+        pmu->mutex = nvgpu_kzalloc(g, pmu->mutex_cnt *
+                sizeof(struct pmu_mutex));
+        if (!pmu->mutex) {
+                err = -ENOMEM;
+                goto err;
+        }
+        for (i = 0; i < pmu->mutex_cnt; i++) {
+                pmu->mutex[i].id    = i;
+                pmu->mutex[i].index = i;
+        }
+        pmu->seq = nvgpu_kzalloc(g, PMU_MAX_NUM_SEQUENCES *
+                sizeof(struct pmu_sequence));
+        if (!pmu->seq) {
+                err = -ENOMEM;
+                goto err_free_mutex;
+        }
+        nvgpu_pmu_seq_init(pmu);
+        err = nvgpu_dma_alloc_map_sys(vm, GK20A_PMU_SEQ_BUF_SIZE,
+                        &pmu->seq_buf);
+        if (err) {
+                nvgpu_err(g, "failed to allocate memory");
+                goto err_free_seq;
+        }
+        ptr = (u8 *)pmu->seq_buf.cpu_va;
+        /* TBD: remove this if ZBC save/restore is handled by PMU
+         * end an empty ZBC sequence for now
+         */
+        ptr[0] = 0x16; /* opcode EXIT */
+        ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
+        ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
+        pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
+        err = nvgpu_dma_alloc_map(vm, GK20A_PMU_TRACE_BUFSIZE,
+                        &pmu->trace_buf);
+        if (err) {
+                nvgpu_err(g, "failed to allocate pmu trace buffer\n");
+                goto err_free_seq_buf;
+        }
+        pmu->sw_ready = true;
+skip_init:
+        nvgpu_log_fn(g, "done");
+        return 0;
+ err_free_seq_buf:
+        nvgpu_dma_unmap_free(vm, &pmu->seq_buf);
+ err_free_seq:
+        nvgpu_kfree(g, pmu->seq);
+ err_free_mutex:
+        nvgpu_kfree(g, pmu->mutex);
+ err:
+        nvgpu_log_fn(g, "fail");
+        return err;
+}
+int nvgpu_init_pmu_support(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        u32 err;
+        nvgpu_log_fn(g, " ");
+        if (pmu->initialized)
+                return 0;
+        err = pmu_enable_hw(pmu, true);
+        if (err)
+                return err;
+        if (g->support_pmu) {
+                err = nvgpu_init_pmu_setup_sw(g);
+                if (err)
+                        return err;
+                err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g);
+                if (err)
+                        return err;
+                nvgpu_pmu_state_change(g, PMU_STATE_STARTING, false);
+        }
+        return err;
+}
+int nvgpu_pmu_process_init_msg(struct nvgpu_pmu *pmu,
+                        struct pmu_msg *msg)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        union pmu_init_msg_pmu *init;
+        struct pmu_sha1_gid_data gid_data;
+        u32 i, tail = 0;
+        nvgpu_log_fn(g, " ");
+        nvgpu_pmu_dbg(g, "init received\n");
+        g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_GET);
+        nvgpu_flcn_copy_from_dmem(pmu->flcn, tail,
+                (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
+        if (msg->hdr.unit_id != PMU_UNIT_INIT) {
+                nvgpu_err(g, "expecting init msg");
+                return -EINVAL;
+        }
+        nvgpu_flcn_copy_from_dmem(pmu->flcn, tail + PMU_MSG_HDR_SIZE,
+                (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
+        if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
+                nvgpu_err(g, "expecting init msg");
+                return -EINVAL;
+        }
+        tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
+        g->ops.pmu.pmu_msgq_tail(pmu, &tail, QUEUE_SET);
+        init = pv->get_pmu_msg_pmu_init_msg_ptr(&(msg->msg.init));
+        if (!pmu->gid_info.valid) {
+                nvgpu_flcn_copy_from_dmem(pmu->flcn,
+                        pv->get_pmu_init_msg_pmu_sw_mg_off(init),
+                        (u8 *)&gid_data,
+                        sizeof(struct pmu_sha1_gid_data), 0);
+                pmu->gid_info.valid =
+                        (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
+                if (pmu->gid_info.valid) {
+                        BUG_ON(sizeof(pmu->gid_info.gid) !=
+                                sizeof(gid_data.gid));
+                        memcpy(pmu->gid_info.gid, gid_data.gid,
+                                sizeof(pmu->gid_info.gid));
+                }
+        }
+        for (i = 0; i < PMU_QUEUE_COUNT; i++)
+                nvgpu_pmu_queue_init(pmu, i, init);
+        if (!nvgpu_alloc_initialized(&pmu->dmem)) {
+                /* Align start and end addresses */
+                u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
+                        PMU_DMEM_ALLOC_ALIGNMENT);
+                u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) +
+                        pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
+                        ~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
+                u32 size = end - start;
+                nvgpu_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem",
+                        start, size, PMU_DMEM_ALLOC_ALIGNMENT, 0);
+        }
+        pmu->pmu_ready = true;
+        nvgpu_pmu_state_change(g, PMU_STATE_INIT_RECEIVED, true);
+        nvgpu_pmu_dbg(g, "init received end\n");
+        return 0;
+}
+static void pmu_setup_hw_enable_elpg(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        nvgpu_log_fn(g, " ");
+        pmu->initialized = true;
+        nvgpu_pmu_state_change(g, PMU_STATE_STARTED, false);
+        if (nvgpu_is_enabled(g, NVGPU_PMU_ZBC_SAVE)) {
+                /* Save zbc table after PMU is initialized. */
+                pmu->zbc_ready = true;
+                gk20a_pmu_save_zbc(g, 0xf);
+        }
+        if (g->elpg_enabled) {
+                /* Init reg with prod values*/
+                if (g->ops.pmu.pmu_setup_elpg)
+                        g->ops.pmu.pmu_setup_elpg(g);
+                nvgpu_pmu_enable_elpg(g);
+        }
+        nvgpu_udelay(50);
+        /* Enable AELPG */
+        if (g->aelpg_enabled) {
+                nvgpu_aelpg_init(g);
+                nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
+        }
+}
+void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state,
+                bool post_change_event)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        nvgpu_pmu_dbg(g, "pmu_state - %d", pmu_state);
+        pmu->pmu_state = pmu_state;
+        if (post_change_event) {
+                pmu->pg_init.state_change = true;
+                nvgpu_cond_signal(&pmu->pg_init.wq);
+        }
+        /* make status visible */
+        nvgpu_smp_mb();
+}
+static int nvgpu_pg_init_task(void *arg)
+{
+        struct gk20a *g = (struct gk20a *)arg;
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct nvgpu_pg_init *pg_init = &pmu->pg_init;
+        u32 pmu_state = 0;
+        nvgpu_log_fn(g, "thread start");
+        while (true) {
+                NVGPU_COND_WAIT_INTERRUPTIBLE(&pg_init->wq,
+                        (pg_init->state_change == true), 0);
+                pmu->pg_init.state_change = false;
+                pmu_state = NV_ACCESS_ONCE(pmu->pmu_state);
+                if (pmu_state == PMU_STATE_EXIT) {
+                        nvgpu_pmu_dbg(g, "pmu state exit");
+                        break;
+                }
+                switch (pmu_state) {
+                case PMU_STATE_INIT_RECEIVED:
+                        nvgpu_pmu_dbg(g, "pmu starting");
+                        if (g->can_elpg)
+                                nvgpu_pmu_init_powergating(g);
+                        break;
+                case PMU_STATE_ELPG_BOOTED:
+                        nvgpu_pmu_dbg(g, "elpg booted");
+                        nvgpu_pmu_init_bind_fecs(g);
+                        break;
+                case PMU_STATE_LOADING_PG_BUF:
+                        nvgpu_pmu_dbg(g, "loaded pg buf");
+                        nvgpu_pmu_setup_hw_load_zbc(g);
+                        break;
+                case PMU_STATE_LOADING_ZBC:
+                        nvgpu_pmu_dbg(g, "loaded zbc");
+                        pmu_setup_hw_enable_elpg(g);
+                        nvgpu_pmu_dbg(g, "PMU booted, thread exiting");
+                        return 0;
+                default:
+                        nvgpu_pmu_dbg(g, "invalid state");
+                        break;
+                }
+        }
+        while (!nvgpu_thread_should_stop(&pg_init->state_task))
+                nvgpu_usleep_range(5000, 5100);
+        nvgpu_log_fn(g, "thread exit");
+        return 0;
+}
+int nvgpu_pmu_destroy(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_pg_stats_data pg_stat_data = { 0 };
+        struct nvgpu_timeout timeout;
+        int i;
+        nvgpu_log_fn(g, " ");
+        if (!g->support_pmu)
+                return 0;
+        /* make sure the pending operations are finished before we continue */
+        if (nvgpu_thread_is_running(&pmu->pg_init.state_task)) {
+                /* post PMU_STATE_EXIT to exit PMU state machine loop */
+                nvgpu_pmu_state_change(g, PMU_STATE_EXIT, true);
+                /* Make thread stop*/
+                nvgpu_thread_stop(&pmu->pg_init.state_task);
+                /* wait to confirm thread stopped */
+                nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
+                do {
+                        if (!nvgpu_thread_is_running(&pmu->pg_init.state_task))
+                                break;
+                        nvgpu_udelay(2);
+                } while (!nvgpu_timeout_expired_msg(&timeout,
+                        "timeout - waiting PMU state machine thread stop"));
+        }
+        nvgpu_pmu_get_pg_stats(g,
+                PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+        if (nvgpu_pmu_disable_elpg(g))
+                nvgpu_err(g, "failed to set disable elpg");
+        pmu->initialized = false;
+        /* update the s/w ELPG residency counters */
+        g->pg_ingating_time_us += (u64)pg_stat_data.ingating_time;
+        g->pg_ungating_time_us += (u64)pg_stat_data.ungating_time;
+        g->pg_gating_cnt += pg_stat_data.gating_cnt;
+        nvgpu_mutex_acquire(&pmu->isr_mutex);
+        pmu->isr_enabled = false;
+        nvgpu_mutex_release(&pmu->isr_mutex);
+        for (i = 0; i < PMU_QUEUE_COUNT; i++)
+                nvgpu_mutex_destroy(&pmu->queue[i].mutex);
+        nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
+        pmu->pmu_ready = false;
+        pmu->perfmon_ready = false;
+        pmu->zbc_ready = false;
+        g->pmu_lsf_pmu_wpr_init_done = false;
+        __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
+        nvgpu_log_fn(g, "done");
+        return 0;
+}
+void nvgpu_pmu_surface_describe(struct gk20a *g, struct nvgpu_mem *mem,
+                struct flcn_mem_desc_v0 *fb)
+{
+        fb->address.lo = u64_lo32(mem->gpu_va);
+        fb->address.hi = u64_hi32(mem->gpu_va);
+        fb->params = ((u32)mem->size & 0xFFFFFF);
+        fb->params |= (GK20A_PMU_DMAIDX_VIRT << 24);
+}
+int nvgpu_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
+                u32 size)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        int err;
+        err = nvgpu_dma_alloc_map_vid(vm, size, mem);
+        if (err) {
+                nvgpu_err(g, "memory allocation failed");
+                return -ENOMEM;
+        }
+        return 0;
+}
+int nvgpu_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
+                u32 size)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        int err;
+        err = nvgpu_dma_alloc_map_sys(vm, size, mem);
+        if (err) {
+                nvgpu_err(g, "failed to allocate memory\n");
+                return -ENOMEM;
+        }
+        return 0;
+}
+void nvgpu_pmu_surface_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        nvgpu_dma_free(g, mem);
+        memset(mem, 0, sizeof(struct nvgpu_mem));
+}
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_debug.c b/drivers/gpu/nvgpu/common/pmu/pmu_debug.c
new file mode 100644
index 00000000..6ad82ca8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_debug.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/pmu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include "gk20a/gk20a.h"
+void nvgpu_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = pmu->g;
+        /* Print PG stats */
+        nvgpu_err(g, "Print PG stats");
+        nvgpu_flcn_print_dmem(pmu->flcn,
+                pmu->stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_GRAPHICS],
+                sizeof(struct pmu_pg_stats_v2));
+        gk20a_pmu_dump_elpg_stats(pmu);
+}
+void nvgpu_pmu_dump_falcon_stats(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = pmu->g;
+        nvgpu_flcn_dump_stats(pmu->flcn);
+        gk20a_pmu_dump_falcon_stats(pmu);
+        nvgpu_err(g, "pmu state: %d", pmu->pmu_state);
+        nvgpu_err(g, "elpg state: %d", pmu->elpg_stat);
+        /* PMU may crash due to FECS crash. Dump FECS status */
+        gk20a_fecs_dump_falcon_stats(g);
+}
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
new file mode 100644
index 00000000..654fde21
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
@@ -0,0 +1,2368 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/pmu.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/log.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+#include "boardobj/boardobj.h"
+#include "boardobj/boardobjgrp.h"
+/* PMU NS UCODE IMG */
+#define NVGPU_PMU_NS_UCODE_IMAGE        "gpmu_ucode.bin"
+/* PMU F/W version */
+#define APP_VERSION_BIGGPU      22836594
+#define APP_VERSION_NC_3        22204331
+#define APP_VERSION_NC_2        20429989
+#define APP_VERSION_NC_1        20313802
+#define APP_VERSION_NC_0        20360931
+#define APP_VERSION_GM206       20652057
+#define APP_VERSION_NV_GPU      21307569
+#define APP_VERSION_NV_GPU_1    21308030
+#define APP_VERSION_GM20B_5 20490253
+#define APP_VERSION_GM20B_4 19008461
+#define APP_VERSION_GM20B_3 18935575
+#define APP_VERSION_GM20B_2 18694072
+#define APP_VERSION_GM20B_1 18547257
+#define APP_VERSION_GM20B 17615280
+#define APP_VERSION_3 18357968
+#define APP_VERSION_2 18542378
+#define APP_VERSION_1 17997577 /*Obsolete this once 18357968 gets in*/
+#define APP_VERSION_0 16856675
+/* PMU version specific functions */
+static u32 pmu_perfmon_cntr_sz_v0(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_perfmon_counter_v0);
+}
+static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_perfmon_counter_v2);
+}
+static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->perfmon_counter_v2);
+}
+static void *get_perfmon_cntr_ptr_v0(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->perfmon_counter_v0);
+}
+static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut)
+{
+        pmu->perfmon_counter_v2.upper_threshold = ut;
+}
+static void set_perfmon_cntr_ut_v0(struct nvgpu_pmu *pmu, u16 ut)
+{
+        pmu->perfmon_counter_v0.upper_threshold = ut;
+}
+static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt)
+{
+        pmu->perfmon_counter_v2.lower_threshold = lt;
+}
+static void set_perfmon_cntr_lt_v0(struct nvgpu_pmu *pmu, u16 lt)
+{
+        pmu->perfmon_counter_v0.lower_threshold = lt;
+}
+static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid)
+{
+        pmu->perfmon_counter_v2.valid = valid;
+}
+static void set_perfmon_cntr_valid_v0(struct nvgpu_pmu *pmu, u8 valid)
+{
+        pmu->perfmon_counter_v0.valid = valid;
+}
+static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index)
+{
+        pmu->perfmon_counter_v2.index = index;
+}
+static void set_perfmon_cntr_index_v0(struct nvgpu_pmu *pmu, u8 index)
+{
+        pmu->perfmon_counter_v0.index = index;
+}
+static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid)
+{
+        pmu->perfmon_counter_v2.group_id = gid;
+}
+static void set_perfmon_cntr_group_id_v0(struct nvgpu_pmu *pmu, u8 gid)
+{
+        pmu->perfmon_counter_v0.group_id = gid;
+}
+static u32 pmu_cmdline_size_v0(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v0);
+}
+static u32 pmu_cmdline_size_v1(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v1);
+}
+static u32 pmu_cmdline_size_v2(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v2);
+}
+static void set_pmu_cmdline_args_cpufreq_v2(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v2.cpu_freq_hz = freq;
+}
+static void set_pmu_cmdline_args_secure_mode_v2(struct nvgpu_pmu *pmu, u32 val)
+{
+        pmu->args_v2.secure_mode = val;
+}
+static void set_pmu_cmdline_args_falctracesize_v2(
+                        struct nvgpu_pmu *pmu, u32 size)
+{
+        pmu->args_v2.falc_trace_size = size;
+}
+static void set_pmu_cmdline_args_falctracedmabase_v2(struct nvgpu_pmu *pmu)
+{
+        pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
+}
+static void set_pmu_cmdline_args_falctracedmaidx_v2(
+                        struct nvgpu_pmu *pmu, u32 idx)
+{
+        pmu->args_v2.falc_trace_dma_idx = idx;
+}
+static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu)
+{
+        pmu->args_v4.dma_addr.dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
+        pmu->args_v4.dma_addr.dma_base1 = 0;
+        pmu->args_v4.dma_addr.dma_offset = 0;
+}
+static u32 pmu_cmdline_size_v4(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v4);
+}
+static void set_pmu_cmdline_args_cpufreq_v4(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v4.cpu_freq_hz = freq;
+}
+static void set_pmu_cmdline_args_secure_mode_v4(struct nvgpu_pmu *pmu, u32 val)
+{
+        pmu->args_v4.secure_mode = val;
+}
+static void set_pmu_cmdline_args_falctracesize_v4(
+                        struct nvgpu_pmu *pmu, u32 size)
+{
+        pmu->args_v4.falc_trace_size = size;
+}
+static void set_pmu_cmdline_args_falctracedmaidx_v4(
+                        struct nvgpu_pmu *pmu, u32 idx)
+{
+        pmu->args_v4.falc_trace_dma_idx = idx;
+}
+static u32 pmu_cmdline_size_v5(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v5);
+}
+static u32 pmu_cmdline_size_v6(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v6);
+}
+static void set_pmu_cmdline_args_cpufreq_v5(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v5.cpu_freq_hz = 204000000;
+}
+static void set_pmu_cmdline_args_secure_mode_v5(struct nvgpu_pmu *pmu, u32 val)
+{
+        pmu->args_v5.secure_mode = val;
+}
+static void set_pmu_cmdline_args_falctracesize_v5(
+                        struct nvgpu_pmu *pmu, u32 size)
+{
+        /* set by surface describe */
+}
+static void set_pmu_cmdline_args_falctracedmabase_v5(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        nvgpu_pmu_surface_describe(g, &pmu->trace_buf, &pmu->args_v5.trace_buf);
+}
+static void set_pmu_cmdline_args_falctracedmaidx_v5(
+                        struct nvgpu_pmu *pmu, u32 idx)
+{
+        /* set by surface describe */
+}
+static u32 pmu_cmdline_size_v3(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_cmdline_args_v3);
+}
+static void set_pmu_cmdline_args_cpufreq_v3(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v3.cpu_freq_hz = freq;
+}
+static void set_pmu_cmdline_args_secure_mode_v3(struct nvgpu_pmu *pmu, u32 val)
+{
+        pmu->args_v3.secure_mode = val;
+}
+static void set_pmu_cmdline_args_falctracesize_v3(
+                        struct nvgpu_pmu *pmu, u32 size)
+{
+        pmu->args_v3.falc_trace_size = size;
+}
+static void set_pmu_cmdline_args_falctracedmabase_v3(struct nvgpu_pmu *pmu)
+{
+        pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
+}
+static void set_pmu_cmdline_args_falctracedmaidx_v3(
+                        struct nvgpu_pmu *pmu, u32 idx)
+{
+        pmu->args_v3.falc_trace_dma_idx = idx;
+}
+static void set_pmu_cmdline_args_cpufreq_v1(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v1.cpu_freq_hz = freq;
+}
+static void set_pmu_cmdline_args_secure_mode_v1(struct nvgpu_pmu *pmu, u32 val)
+{
+        pmu->args_v1.secure_mode = val;
+}
+static void set_pmu_cmdline_args_falctracesize_v1(
+                        struct nvgpu_pmu *pmu, u32 size)
+{
+        pmu->args_v1.falc_trace_size = size;
+}
+static void set_pmu_cmdline_args_falctracedmabase_v1(struct nvgpu_pmu *pmu)
+{
+        pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
+}
+static void set_pmu_cmdline_args_falctracedmaidx_v1(
+                        struct nvgpu_pmu *pmu, u32 idx)
+{
+        pmu->args_v1.falc_trace_dma_idx = idx;
+}
+static void set_pmu_cmdline_args_cpufreq_v0(struct nvgpu_pmu *pmu, u32 freq)
+{
+        pmu->args_v0.cpu_freq_hz = freq;
+}
+static void *get_pmu_cmdline_args_ptr_v4(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v4);
+}
+static void *get_pmu_cmdline_args_ptr_v3(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v3);
+}
+static void *get_pmu_cmdline_args_ptr_v2(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v2);
+}
+static void *get_pmu_cmdline_args_ptr_v5(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v5);
+}
+static void *get_pmu_cmdline_args_ptr_v1(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v1);
+}
+static void *get_pmu_cmdline_args_ptr_v0(struct nvgpu_pmu *pmu)
+{
+        return (void *)(&pmu->args_v0);
+}
+static u32 get_pmu_allocation_size_v3(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_allocation_v3);
+}
+static u32 get_pmu_allocation_size_v2(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_allocation_v2);
+}
+static u32 get_pmu_allocation_size_v1(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_allocation_v1);
+}
+static u32 get_pmu_allocation_size_v0(struct nvgpu_pmu *pmu)
+{
+        return sizeof(struct pmu_allocation_v0);
+}
+static void set_pmu_allocation_ptr_v3(struct nvgpu_pmu *pmu,
+        void **pmu_alloc_ptr, void *assign_ptr)
+{
+        struct pmu_allocation_v3 **pmu_a_ptr =
+                (struct pmu_allocation_v3 **)pmu_alloc_ptr;
+        *pmu_a_ptr = (struct pmu_allocation_v3 *)assign_ptr;
+}
+static void set_pmu_allocation_ptr_v2(struct nvgpu_pmu *pmu,
+        void **pmu_alloc_ptr, void *assign_ptr)
+{
+        struct pmu_allocation_v2 **pmu_a_ptr =
+                (struct pmu_allocation_v2 **)pmu_alloc_ptr;
+        *pmu_a_ptr = (struct pmu_allocation_v2 *)assign_ptr;
+}
+static void set_pmu_allocation_ptr_v1(struct nvgpu_pmu *pmu,
+        void **pmu_alloc_ptr, void *assign_ptr)
+{
+        struct pmu_allocation_v1 **pmu_a_ptr =
+                (struct pmu_allocation_v1 **)pmu_alloc_ptr;
+        *pmu_a_ptr = (struct pmu_allocation_v1 *)assign_ptr;
+}
+static void set_pmu_allocation_ptr_v0(struct nvgpu_pmu *pmu,
+        void **pmu_alloc_ptr, void *assign_ptr)
+{
+        struct pmu_allocation_v0 **pmu_a_ptr =
+                (struct pmu_allocation_v0 **)pmu_alloc_ptr;
+        *pmu_a_ptr = (struct pmu_allocation_v0 *)assign_ptr;
+}
+static void pmu_allocation_set_dmem_size_v3(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u16 size)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.size = size;
+}
+static void pmu_allocation_set_dmem_size_v2(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u16 size)
+{
+        struct pmu_allocation_v2 *pmu_a_ptr =
+                (struct pmu_allocation_v2 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.size = size;
+}
+static void pmu_allocation_set_dmem_size_v1(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u16 size)
+{
+        struct pmu_allocation_v1 *pmu_a_ptr =
+                (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.size = size;
+}
+static void pmu_allocation_set_dmem_size_v0(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u16 size)
+{
+        struct pmu_allocation_v0 *pmu_a_ptr =
+                (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.size = size;
+}
+static u16 pmu_allocation_get_dmem_size_v3(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.size;
+}
+static u16 pmu_allocation_get_dmem_size_v2(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v2 *pmu_a_ptr =
+                (struct pmu_allocation_v2 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.size;
+}
+static u16 pmu_allocation_get_dmem_size_v1(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v1 *pmu_a_ptr =
+                (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.size;
+}
+static u16 pmu_allocation_get_dmem_size_v0(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v0 *pmu_a_ptr =
+                (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.size;
+}
+static u32 pmu_allocation_get_dmem_offset_v3(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 pmu_allocation_get_dmem_offset_v2(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v2 *pmu_a_ptr =
+                (struct pmu_allocation_v2 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 pmu_allocation_get_dmem_offset_v1(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v1 *pmu_a_ptr =
+                (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 pmu_allocation_get_dmem_offset_v0(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v0 *pmu_a_ptr =
+                (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+        return pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 *pmu_allocation_get_dmem_offset_addr_v3(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        return &pmu_a_ptr->alloc.dmem.offset;
+}
+static void *pmu_allocation_get_fb_addr_v3(
+                                struct nvgpu_pmu *pmu, void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                        (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        return (void *)&pmu_a_ptr->alloc.fb;
+}
+static u32 pmu_allocation_get_fb_size_v3(
+                                struct nvgpu_pmu *pmu, void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                        (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        return sizeof(pmu_a_ptr->alloc.fb);
+}
+static u32 *pmu_allocation_get_dmem_offset_addr_v2(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v2 *pmu_a_ptr =
+                (struct pmu_allocation_v2 *)pmu_alloc_ptr;
+        return &pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 *pmu_allocation_get_dmem_offset_addr_v1(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v1 *pmu_a_ptr =
+                (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+        return &pmu_a_ptr->alloc.dmem.offset;
+}
+static u32 *pmu_allocation_get_dmem_offset_addr_v0(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr)
+{
+        struct pmu_allocation_v0 *pmu_a_ptr =
+                (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+        return &pmu_a_ptr->alloc.dmem.offset;
+}
+static void pmu_allocation_set_dmem_offset_v3(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u32 offset)
+{
+        struct pmu_allocation_v3 *pmu_a_ptr =
+                (struct pmu_allocation_v3 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.offset = offset;
+}
+static void pmu_allocation_set_dmem_offset_v2(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u32 offset)
+{
+        struct pmu_allocation_v2 *pmu_a_ptr =
+                (struct pmu_allocation_v2 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.offset = offset;
+}
+static void pmu_allocation_set_dmem_offset_v1(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u32 offset)
+{
+        struct pmu_allocation_v1 *pmu_a_ptr =
+                (struct pmu_allocation_v1 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.offset = offset;
+}
+static void pmu_allocation_set_dmem_offset_v0(struct nvgpu_pmu *pmu,
+        void *pmu_alloc_ptr, u32 offset)
+{
+        struct pmu_allocation_v0 *pmu_a_ptr =
+                (struct pmu_allocation_v0 *)pmu_alloc_ptr;
+        pmu_a_ptr->alloc.dmem.offset = offset;
+}
+static void *get_pmu_msg_pmu_init_msg_ptr_v4(struct pmu_init_msg *init)
+{
+        return (void *)(&(init->pmu_init_v4));
+}
+static void *get_pmu_msg_pmu_init_msg_ptr_v3(struct pmu_init_msg *init)
+{
+        return (void *)(&(init->pmu_init_v3));
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v4(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v4 *init =
+                (struct pmu_init_msg_pmu_v4 *)(&init_msg->v4);
+        return init->sw_managed_area_offset;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v3(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v3 *init =
+                (struct pmu_init_msg_pmu_v3 *)(&init_msg->v3);
+        return init->sw_managed_area_offset;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v4(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v4 *init =
+                (struct pmu_init_msg_pmu_v4 *)(&init_msg->v4);
+        return init->sw_managed_area_size;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v3(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v3 *init =
+                (struct pmu_init_msg_pmu_v3 *)(&init_msg->v3);
+        return init->sw_managed_area_size;
+}
+static void *get_pmu_msg_pmu_init_msg_ptr_v2(struct pmu_init_msg *init)
+{
+        return (void *)(&(init->pmu_init_v2));
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v2(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v2 *init =
+                (struct pmu_init_msg_pmu_v2 *)(&init_msg->v1);
+        return init->sw_managed_area_offset;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v2(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v2 *init =
+                (struct pmu_init_msg_pmu_v2 *)(&init_msg->v1);
+        return init->sw_managed_area_size;
+}
+static void *get_pmu_msg_pmu_init_msg_ptr_v1(struct pmu_init_msg *init)
+{
+        return (void *)(&(init->pmu_init_v1));
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v1(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v1 *init =
+                (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
+        return init->sw_managed_area_offset;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v1(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v1 *init =
+                (struct pmu_init_msg_pmu_v1 *)(&init_msg->v1);
+        return init->sw_managed_area_size;
+}
+static void *get_pmu_msg_pmu_init_msg_ptr_v0(struct pmu_init_msg *init)
+{
+        return (void *)(&(init->pmu_init_v0));
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_off_v0(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v0 *init =
+                (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
+        return init->sw_managed_area_offset;
+}
+static u16 get_pmu_init_msg_pmu_sw_mg_size_v0(union pmu_init_msg_pmu *init_msg)
+{
+        struct pmu_init_msg_pmu_v0 *init =
+                (struct pmu_init_msg_pmu_v0 *)(&init_msg->v0);
+        return init->sw_managed_area_size;
+}
+static u32 get_pmu_perfmon_cmd_start_size_v3(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_start_v3);
+}
+static u32 get_pmu_perfmon_cmd_start_size_v2(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_start_v2);
+}
+static u32 get_pmu_perfmon_cmd_start_size_v1(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_start_v1);
+}
+static u32 get_pmu_perfmon_cmd_start_size_v0(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_start_v0);
+}
+static int get_perfmon_cmd_start_offsetofvar_v3(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_start_v3,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_start_offsetofvar_v2(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_start_v2,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_start_offsetofvar_v1(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_start_v1,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_start_offsetofvar_v0(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_start_v0,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static u32 get_pmu_perfmon_cmd_init_size_v3(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_init_v3);
+}
+static u32 get_pmu_perfmon_cmd_init_size_v2(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_init_v2);
+}
+static u32 get_pmu_perfmon_cmd_init_size_v1(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_init_v1);
+}
+static u32 get_pmu_perfmon_cmd_init_size_v0(void)
+{
+        return sizeof(struct pmu_perfmon_cmd_init_v0);
+}
+static int get_perfmon_cmd_init_offsetofvar_v3(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_init_v3,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_init_offsetofvar_v2(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_init_v2,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_init_offsetofvar_v1(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_init_v1,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static int get_perfmon_cmd_init_offsetofvar_v0(
+        enum pmu_perfmon_cmd_start_fields field)
+{
+        switch (field) {
+        case COUNTER_ALLOC:
+                return offsetof(struct pmu_perfmon_cmd_init_v0,
+                counter_alloc);
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+static void perfmon_start_set_cmd_type_v3(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;
+        start->cmd_type = value;
+}
+static void perfmon_start_set_cmd_type_v2(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;
+        start->cmd_type = value;
+}
+static void perfmon_start_set_cmd_type_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+        start->cmd_type = value;
+}
+static void perfmon_start_set_cmd_type_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+        start->cmd_type = value;
+}
+static void perfmon_start_set_group_id_v3(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;
+        start->group_id = value;
+}
+static void perfmon_start_set_group_id_v2(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;
+        start->group_id = value;
+}
+static void perfmon_start_set_group_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+        start->group_id = value;
+}
+static void perfmon_start_set_group_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+        start->group_id = value;
+}
+static void perfmon_start_set_state_id_v3(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;
+        start->state_id = value;
+}
+static void perfmon_start_set_state_id_v2(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;
+        start->state_id = value;
+}
+static void perfmon_start_set_state_id_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+        start->state_id = value;
+}
+static void perfmon_start_set_state_id_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+        start->state_id = value;
+}
+static void perfmon_start_set_flags_v3(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;
+        start->flags = value;
+}
+static void perfmon_start_set_flags_v2(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;
+        start->flags = value;
+}
+static void perfmon_start_set_flags_v1(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+        start->flags = value;
+}
+static void perfmon_start_set_flags_v0(struct pmu_perfmon_cmd *pc, u8 value)
+{
+        struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+        start->flags = value;
+}
+static u8 perfmon_start_get_flags_v3(struct pmu_perfmon_cmd *pc)
+{
+        struct pmu_perfmon_cmd_start_v3 *start = &pc->start_v3;
+        return start->flags;
+}
+static u8 perfmon_start_get_flags_v2(struct pmu_perfmon_cmd *pc)
+{
+        struct pmu_perfmon_cmd_start_v2 *start = &pc->start_v2;
+        return start->flags;
+}
+static u8 perfmon_start_get_flags_v1(struct pmu_perfmon_cmd *pc)
+{
+        struct pmu_perfmon_cmd_start_v1 *start = &pc->start_v1;
+        return start->flags;
+}
+static u8 perfmon_start_get_flags_v0(struct pmu_perfmon_cmd *pc)
+{
+        struct pmu_perfmon_cmd_start_v0 *start = &pc->start_v0;
+        return start->flags;
+}
+static void perfmon_cmd_init_set_sample_buffer_v3(struct pmu_perfmon_cmd *pc,
+        u16 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->sample_buffer = value;
+}
+static void perfmon_cmd_init_set_sample_buffer_v2(struct pmu_perfmon_cmd *pc,
+        u16 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->sample_buffer = value;
+}
+static void perfmon_cmd_init_set_sample_buffer_v1(struct pmu_perfmon_cmd *pc,
+        u16 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->sample_buffer = value;
+}
+static void perfmon_cmd_init_set_sample_buffer_v0(struct pmu_perfmon_cmd *pc,
+        u16 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->sample_buffer = value;
+}
+static void perfmon_cmd_init_set_dec_cnt_v3(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->to_decrease_count = value;
+}
+static void perfmon_cmd_init_set_dec_cnt_v2(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->to_decrease_count = value;
+}
+static void perfmon_cmd_init_set_dec_cnt_v1(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->to_decrease_count = value;
+}
+static void perfmon_cmd_init_set_dec_cnt_v0(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->to_decrease_count = value;
+}
+static void perfmon_cmd_init_set_base_cnt_id_v3(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->base_counter_id = value;
+}
+static void perfmon_cmd_init_set_base_cnt_id_v2(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->base_counter_id = value;
+}
+static void perfmon_cmd_init_set_base_cnt_id_v1(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->base_counter_id = value;
+}
+static void perfmon_cmd_init_set_base_cnt_id_v0(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->base_counter_id = value;
+}
+static void perfmon_cmd_init_set_samp_period_us_v3(struct pmu_perfmon_cmd *pc,
+        u32 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->sample_period_us = value;
+}
+static void perfmon_cmd_init_set_samp_period_us_v2(struct pmu_perfmon_cmd *pc,
+        u32 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->sample_period_us = value;
+}
+static void perfmon_cmd_init_set_samp_period_us_v1(struct pmu_perfmon_cmd *pc,
+        u32 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->sample_period_us = value;
+}
+static void perfmon_cmd_init_set_samp_period_us_v0(struct pmu_perfmon_cmd *pc,
+        u32 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->sample_period_us = value;
+}
+static void perfmon_cmd_init_set_num_cnt_v3(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->num_counters = value;
+}
+static void perfmon_cmd_init_set_num_cnt_v2(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->num_counters = value;
+}
+static void perfmon_cmd_init_set_num_cnt_v1(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->num_counters = value;
+}
+static void perfmon_cmd_init_set_num_cnt_v0(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->num_counters = value;
+}
+static void perfmon_cmd_init_set_mov_avg_v3(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v3 *init = &pc->init_v3;
+        init->samples_in_moving_avg = value;
+}
+static void perfmon_cmd_init_set_mov_avg_v2(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v2 *init = &pc->init_v2;
+        init->samples_in_moving_avg = value;
+}
+static void perfmon_cmd_init_set_mov_avg_v1(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v1 *init = &pc->init_v1;
+        init->samples_in_moving_avg = value;
+}
+static void perfmon_cmd_init_set_mov_avg_v0(struct pmu_perfmon_cmd *pc,
+        u8 value)
+{
+        struct pmu_perfmon_cmd_init_v0 *init = &pc->init_v0;
+        init->samples_in_moving_avg = value;
+}
+static void get_pmu_init_msg_pmu_queue_params_v0(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v0 *init =
+                (struct pmu_init_msg_pmu_v0 *)pmu_init_msg;
+        queue->index    = init->queue_info[id].index;
+        queue->offset   = init->queue_info[id].offset;
+        queue->size = init->queue_info[id].size;
+}
+static void get_pmu_init_msg_pmu_queue_params_v1(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v1 *init =
+                (struct pmu_init_msg_pmu_v1 *)pmu_init_msg;
+        queue->index    = init->queue_info[id].index;
+        queue->offset   = init->queue_info[id].offset;
+        queue->size = init->queue_info[id].size;
+}
+static void get_pmu_init_msg_pmu_queue_params_v2(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v2 *init =
+                (struct pmu_init_msg_pmu_v2 *)pmu_init_msg;
+        queue->index    = init->queue_info[id].index;
+        queue->offset   = init->queue_info[id].offset;
+        queue->size = init->queue_info[id].size;
+}
+static void get_pmu_init_msg_pmu_queue_params_v4(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v4 *init = pmu_init_msg;
+        u32 current_ptr = 0;
+        u8 i;
+        u8 tmp_id = id;
+        if (tmp_id == PMU_COMMAND_QUEUE_HPQ)
+                tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_COMMAND_QUEUE_LPQ)
+                tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_MESSAGE_QUEUE)
+                tmp_id = PMU_QUEUE_MSG_IDX_FOR_V3;
+        else
+                return;
+        queue->index    = init->queue_index[tmp_id];
+        queue->size = init->queue_size[tmp_id];
+        if (tmp_id != 0) {
+                for (i = 0 ; i < tmp_id; i++)
+                        current_ptr += init->queue_size[i];
+        }
+        queue->offset   = init->queue_offset + current_ptr;
+}
+static void get_pmu_init_msg_pmu_queue_params_v5(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v4 *init = pmu_init_msg;
+        u32 current_ptr = 0;
+        u8 i;
+        u8 tmp_id = id;
+        if (tmp_id == PMU_COMMAND_QUEUE_HPQ)
+                tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_COMMAND_QUEUE_LPQ)
+                tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_MESSAGE_QUEUE)
+                tmp_id = PMU_QUEUE_MSG_IDX_FOR_V4;
+        else
+                return;
+        queue->index    = init->queue_index[tmp_id];
+        queue->size = init->queue_size[tmp_id];
+        if (tmp_id != 0) {
+                for (i = 0 ; i < tmp_id; i++)
+                        current_ptr += init->queue_size[i];
+        }
+        queue->offset   = init->queue_offset + current_ptr;
+}
+static void get_pmu_init_msg_pmu_queue_params_v3(struct pmu_queue *queue,
+        u32 id, void *pmu_init_msg)
+{
+        struct pmu_init_msg_pmu_v3 *init =
+                (struct pmu_init_msg_pmu_v3 *)pmu_init_msg;
+        u32 current_ptr = 0;
+        u8 i;
+        u8 tmp_id = id;
+        if (tmp_id == PMU_COMMAND_QUEUE_HPQ)
+                tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_COMMAND_QUEUE_LPQ)
+                tmp_id = PMU_QUEUE_LPQ_IDX_FOR_V3;
+        else if (tmp_id == PMU_MESSAGE_QUEUE)
+                tmp_id = PMU_QUEUE_MSG_IDX_FOR_V3;
+        else
+                return;
+        queue->index    = init->queue_index[tmp_id];
+        queue->size = init->queue_size[tmp_id];
+        if (tmp_id != 0) {
+                for (i = 0 ; i < tmp_id; i++)
+                        current_ptr += init->queue_size[i];
+        }
+        queue->offset   = init->queue_offset + current_ptr;
+}
+static void *get_pmu_sequence_in_alloc_ptr_v3(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->in_v3);
+}
+static void *get_pmu_sequence_in_alloc_ptr_v1(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->in_v1);
+}
+static void *get_pmu_sequence_in_alloc_ptr_v0(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->in_v0);
+}
+static void *get_pmu_sequence_out_alloc_ptr_v3(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->out_v3);
+}
+static void *get_pmu_sequence_out_alloc_ptr_v1(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->out_v1);
+}
+static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
+{
+        return (void *)(&seq->out_v0);
+}
+static u8 pg_cmd_eng_buf_load_size_v0(struct pmu_pg_cmd *pg)
+{
+        return sizeof(pg->eng_buf_load_v0);
+}
+static u8 pg_cmd_eng_buf_load_size_v1(struct pmu_pg_cmd *pg)
+{
+        return sizeof(pg->eng_buf_load_v1);
+}
+static u8 pg_cmd_eng_buf_load_size_v2(struct pmu_pg_cmd *pg)
+{
+        return sizeof(pg->eng_buf_load_v2);
+}
+static void pg_cmd_eng_buf_load_set_cmd_type_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.cmd_type = value;
+}
+static void pg_cmd_eng_buf_load_set_cmd_type_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.cmd_type = value;
+}
+static void pg_cmd_eng_buf_load_set_cmd_type_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.cmd_type = value;
+}
+static void pg_cmd_eng_buf_load_set_engine_id_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.engine_id = value;
+}
+static void pg_cmd_eng_buf_load_set_engine_id_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.engine_id = value;
+}
+static void pg_cmd_eng_buf_load_set_engine_id_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.engine_id = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_idx_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.buf_idx = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_idx_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.buf_idx = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_idx_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.buf_idx = value;
+}
+static void pg_cmd_eng_buf_load_set_pad_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.pad = value;
+}
+static void pg_cmd_eng_buf_load_set_pad_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.pad = value;
+}
+static void pg_cmd_eng_buf_load_set_pad_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.pad = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_size_v0(struct pmu_pg_cmd *pg,
+        u16 value)
+{
+        pg->eng_buf_load_v0.buf_size = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_size_v1(struct pmu_pg_cmd *pg,
+        u16 value)
+{
+        pg->eng_buf_load_v1.dma_desc.dma_size = value;
+}
+static void pg_cmd_eng_buf_load_set_buf_size_v2(struct pmu_pg_cmd *pg,
+        u16 value)
+{
+        pg->eng_buf_load_v2.dma_desc.params = value;
+}
+static void pg_cmd_eng_buf_load_set_dma_base_v0(struct pmu_pg_cmd *pg,
+        u32 value)
+{
+        pg->eng_buf_load_v0.dma_base = (value >> 8);
+}
+static void pg_cmd_eng_buf_load_set_dma_base_v1(struct pmu_pg_cmd *pg,
+        u32 value)
+{
+        pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= u64_lo32(value);
+        pg->eng_buf_load_v1.dma_desc.dma_addr.hi |= u64_hi32(value);
+}
+static void pg_cmd_eng_buf_load_set_dma_base_v2(struct pmu_pg_cmd *pg,
+        u32 value)
+{
+        pg->eng_buf_load_v2.dma_desc.address.lo = u64_lo32(value);
+        pg->eng_buf_load_v2.dma_desc.address.hi = u64_lo32(value);
+}
+static void pg_cmd_eng_buf_load_set_dma_offset_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.dma_offset = value;
+}
+static void pg_cmd_eng_buf_load_set_dma_offset_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.dma_desc.dma_addr.lo |= value;
+}
+static void pg_cmd_eng_buf_load_set_dma_offset_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.dma_desc.address.lo |= u64_lo32(value);
+        pg->eng_buf_load_v2.dma_desc.address.hi |= u64_lo32(value);
+}
+static void pg_cmd_eng_buf_load_set_dma_idx_v0(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v0.dma_idx = value;
+}
+static void pg_cmd_eng_buf_load_set_dma_idx_v1(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v1.dma_desc.dma_idx = value;
+}
+static void pg_cmd_eng_buf_load_set_dma_idx_v2(struct pmu_pg_cmd *pg,
+        u8 value)
+{
+        pg->eng_buf_load_v2.dma_desc.params |= (value << 24);
+}
+static int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        switch (pmu->desc->app_version) {
+        case APP_VERSION_NC_2:
+        case APP_VERSION_NC_1:
+        case APP_VERSION_NC_0:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v1;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v1;
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v4;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v4;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v4;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v4;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v4;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v4;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v4;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v2;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v2;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v2;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v2;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v2;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v2;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v2;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                        get_pmu_init_msg_pmu_queue_params_v1;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v2;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v2;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v2;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v2;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v2;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v2;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v2;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v2;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v2;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v2;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v1;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v1;
+                break;
+        case APP_VERSION_NC_3:
+        case APP_VERSION_BIGGPU:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, false);
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v6;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v5;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v5;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v3;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v3;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v3;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v3;
+                g->ops.pmu_ver.pmu_allocation_get_fb_addr =
+                                pmu_allocation_get_fb_addr_v3;
+                g->ops.pmu_ver.pmu_allocation_get_fb_size =
+                                pmu_allocation_get_fb_size_v3;
+                if (pmu->desc->app_version == APP_VERSION_BIGGPU)
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                                get_pmu_init_msg_pmu_queue_params_v5;
+                else
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                                get_pmu_init_msg_pmu_queue_params_v4;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v4;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v4;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v4;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v3;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v3;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v3;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v3;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v3;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v3;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v3;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v3;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v3;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v3;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v3;
+                break;
+        case APP_VERSION_GM206:
+        case APP_VERSION_NV_GPU:
+        case APP_VERSION_NV_GPU_1:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v2;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v5;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v5;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v5;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v3;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v3;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v3;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v3;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v3;
+                g->ops.pmu_ver.pmu_allocation_get_fb_addr =
+                                pmu_allocation_get_fb_addr_v3;
+                g->ops.pmu_ver.pmu_allocation_get_fb_size =
+                                pmu_allocation_get_fb_size_v3;
+                if (pmu->desc->app_version != APP_VERSION_NV_GPU &&
+                        pmu->desc->app_version != APP_VERSION_NV_GPU_1) {
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                                        get_pmu_init_msg_pmu_queue_params_v2;
+                        g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                                        get_pmu_msg_pmu_init_msg_ptr_v2;
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                                        get_pmu_init_msg_pmu_sw_mg_off_v2;
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                                        get_pmu_init_msg_pmu_sw_mg_size_v2;
+                } else {
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                                get_pmu_init_msg_pmu_queue_params_v3;
+                        g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                                get_pmu_msg_pmu_init_msg_ptr_v3;
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                                get_pmu_init_msg_pmu_sw_mg_off_v3;
+                        g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                                get_pmu_init_msg_pmu_sw_mg_size_v3;
+                }
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v3;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v3;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v3;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v3;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v3;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v3;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v3;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v3;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v3;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v3;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v3;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v3;
+                break;
+        case APP_VERSION_GM20B_5:
+        case APP_VERSION_GM20B_4:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v0;
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v3;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v3;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v3;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v3;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v3;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v3;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v3;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v1;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                        get_pmu_init_msg_pmu_queue_params_v1;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v1;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v1;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v1;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v1;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v1;
+                break;
+        case APP_VERSION_GM20B_3:
+        case APP_VERSION_GM20B_2:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v0;
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v2;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v2;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v2;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v2;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v2;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v2;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v2;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v2;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v2;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v1;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                        get_pmu_init_msg_pmu_queue_params_v1;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v1;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v1;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v1;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v1;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v1;
+                break;
+        case APP_VERSION_GM20B_1:
+        case APP_VERSION_GM20B:
+        case APP_VERSION_1:
+        case APP_VERSION_2:
+        case APP_VERSION_3:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v0;
+                g->pmu_ver_cmd_id_zbc_table_update = 16;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v0;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v0;
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v1;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v1;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        set_pmu_cmdline_args_secure_mode_v1;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
+                        set_pmu_cmdline_args_falctracesize_v1;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
+                        set_pmu_cmdline_args_falctracedmabase_v1;
+                g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
+                        set_pmu_cmdline_args_falctracedmaidx_v1;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v1;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v1;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v1;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v1;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                        get_pmu_init_msg_pmu_queue_params_v1;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v1;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v1;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v1;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v1;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v1;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v1;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v1;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v1;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v1;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v1;
+                break;
+        case APP_VERSION_0:
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_size =
+                                pg_cmd_eng_buf_load_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type =
+                                pg_cmd_eng_buf_load_set_cmd_type_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id =
+                                pg_cmd_eng_buf_load_set_engine_id_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx =
+                                pg_cmd_eng_buf_load_set_buf_idx_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_pad =
+                                pg_cmd_eng_buf_load_set_pad_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size =
+                                pg_cmd_eng_buf_load_set_buf_size_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base =
+                                pg_cmd_eng_buf_load_set_dma_base_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset =
+                                pg_cmd_eng_buf_load_set_dma_offset_v0;
+                g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx =
+                                pg_cmd_eng_buf_load_set_dma_idx_v0;
+                g->pmu_ver_cmd_id_zbc_table_update = 14;
+                __nvgpu_set_enabled(g, NVGPU_PMU_ZBC_SAVE, true);
+                g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_valid =
+                        set_perfmon_cntr_valid_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_index =
+                        set_perfmon_cntr_index_v0;
+                g->ops.pmu_ver.set_perfmon_cntr_group_id =
+                        set_perfmon_cntr_group_id_v0;
+                g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v0;
+                g->ops.pmu_ver.get_pmu_cmdline_args_size =
+                        pmu_cmdline_size_v0;
+                g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
+                        set_pmu_cmdline_args_cpufreq_v0;
+                g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+                        NULL;
+                g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
+                        get_pmu_cmdline_args_ptr_v0;
+                g->ops.pmu_ver.get_pmu_allocation_struct_size =
+                        get_pmu_allocation_size_v0;
+                g->ops.pmu_ver.set_pmu_allocation_ptr =
+                        set_pmu_allocation_ptr_v0;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_size =
+                        pmu_allocation_set_dmem_size_v0;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_size =
+                        pmu_allocation_get_dmem_size_v0;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
+                        pmu_allocation_get_dmem_offset_v0;
+                g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
+                        pmu_allocation_get_dmem_offset_addr_v0;
+                g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
+                        pmu_allocation_set_dmem_offset_v0;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
+                        get_pmu_init_msg_pmu_queue_params_v0;
+                g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
+                        get_pmu_msg_pmu_init_msg_ptr_v0;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
+                        get_pmu_init_msg_pmu_sw_mg_off_v0;
+                g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
+                        get_pmu_init_msg_pmu_sw_mg_size_v0;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
+                        get_pmu_perfmon_cmd_start_size_v0;
+                g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
+                        get_perfmon_cmd_start_offsetofvar_v0;
+                g->ops.pmu_ver.perfmon_start_set_cmd_type =
+                        perfmon_start_set_cmd_type_v0;
+                g->ops.pmu_ver.perfmon_start_set_group_id =
+                        perfmon_start_set_group_id_v0;
+                g->ops.pmu_ver.perfmon_start_set_state_id =
+                        perfmon_start_set_state_id_v0;
+                g->ops.pmu_ver.perfmon_start_set_flags =
+                        perfmon_start_set_flags_v0;
+                g->ops.pmu_ver.perfmon_start_get_flags =
+                        perfmon_start_get_flags_v0;
+                g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
+                        get_pmu_perfmon_cmd_init_size_v0;
+                g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
+                        get_perfmon_cmd_init_offsetofvar_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
+                        perfmon_cmd_init_set_sample_buffer_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
+                        perfmon_cmd_init_set_dec_cnt_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
+                        perfmon_cmd_init_set_base_cnt_id_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
+                        perfmon_cmd_init_set_samp_period_us_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
+                        perfmon_cmd_init_set_num_cnt_v0;
+                g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
+                        perfmon_cmd_init_set_mov_avg_v0;
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
+                        get_pmu_sequence_in_alloc_ptr_v0;
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
+                        get_pmu_sequence_out_alloc_ptr_v0;
+                break;
+        default:
+                nvgpu_err(g, "PMU code version not supported version: %d\n",
+                        pmu->desc->app_version);
+                err = -EINVAL;
+        }
+        pv->set_perfmon_cntr_index(pmu, 3); /* GR & CE2 */
+        pv->set_perfmon_cntr_group_id(pmu, PMU_DOMAIN_GROUP_PSTATE);
+        return err;
+}
+static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        struct boardobj *pboardobj, *pboardobj_tmp;
+        struct boardobjgrp *pboardobjgrp, *pboardobjgrp_tmp;
+        nvgpu_log_fn(g, " ");
+        if (nvgpu_alloc_initialized(&pmu->dmem))
+                nvgpu_alloc_destroy(&pmu->dmem);
+        nvgpu_list_for_each_entry_safe(pboardobjgrp, pboardobjgrp_tmp,
+                &g->boardobjgrp_head, boardobjgrp, node) {
+                pboardobjgrp->destruct(pboardobjgrp);
+        }
+        nvgpu_list_for_each_entry_safe(pboardobj, pboardobj_tmp,
+                        &g->boardobj_head, boardobj, node) {
+                pboardobj->destruct(pboardobj);
+        }
+        if (pmu->fw)
+                nvgpu_release_firmware(g, pmu->fw);
+        if (g->acr.pmu_fw)
+                nvgpu_release_firmware(g, g->acr.pmu_fw);
+        if (g->acr.pmu_desc)
+                nvgpu_release_firmware(g, g->acr.pmu_desc);
+        if (g->acr.acr_fw)
+                nvgpu_release_firmware(g, g->acr.acr_fw);
+        if (g->acr.hsbl_fw)
+                nvgpu_release_firmware(g, g->acr.hsbl_fw);
+        nvgpu_dma_unmap_free(vm, &g->acr.acr_ucode);
+        nvgpu_dma_unmap_free(vm, &g->acr.hsbl_ucode);
+        nvgpu_dma_unmap_free(vm, &pmu->seq_buf);
+        nvgpu_mutex_destroy(&pmu->elpg_mutex);
+        nvgpu_mutex_destroy(&pmu->pg_mutex);
+        nvgpu_mutex_destroy(&pmu->isr_mutex);
+        nvgpu_mutex_destroy(&pmu->pmu_copy_lock);
+        nvgpu_mutex_destroy(&pmu->pmu_seq_lock);
+}
+int nvgpu_init_pmu_fw_support(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        err = nvgpu_mutex_init(&pmu->elpg_mutex);
+        if (err)
+                return err;
+        err = nvgpu_mutex_init(&pmu->pg_mutex);
+        if (err)
+                goto fail_elpg;
+        err = nvgpu_mutex_init(&pmu->isr_mutex);
+        if (err)
+                goto fail_pg;
+        err = nvgpu_mutex_init(&pmu->pmu_copy_lock);
+        if (err)
+                goto fail_isr;
+        err = nvgpu_mutex_init(&pmu->pmu_seq_lock);
+        if (err)
+                goto fail_pmu_copy;
+        pmu->remove_support = nvgpu_remove_pmu_support;
+        err = nvgpu_init_pmu_fw_ver_ops(pmu);
+        if (err)
+                goto fail_pmu_seq;
+        goto exit;
+fail_pmu_seq:
+        nvgpu_mutex_destroy(&pmu->pmu_seq_lock);
+fail_pmu_copy:
+        nvgpu_mutex_destroy(&pmu->pmu_copy_lock);
+fail_isr:
+        nvgpu_mutex_destroy(&pmu->isr_mutex);
+fail_pg:
+        nvgpu_mutex_destroy(&pmu->pg_mutex);
+fail_elpg:
+        nvgpu_mutex_destroy(&pmu->elpg_mutex);
+exit:
+        return err;
+}
+int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        int err = 0;
+        struct mm_gk20a *mm = &g->mm;
+        struct vm_gk20a *vm = mm->pmu.vm;
+        nvgpu_log_fn(g, " ");
+        if (pmu->fw)
+                return nvgpu_init_pmu_fw_support(pmu);
+        pmu->fw = nvgpu_request_firmware(g, NVGPU_PMU_NS_UCODE_IMAGE, 0);
+        if (!pmu->fw) {
+                nvgpu_err(g, "failed to load pmu ucode!!");
+                return err;
+        }
+        nvgpu_log_fn(g, "firmware loaded");
+        pmu->desc = (struct pmu_ucode_desc *)pmu->fw->data;
+        pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
+                        pmu->desc->descriptor_size);
+        err = nvgpu_dma_alloc_map_sys(vm, GK20A_PMU_UCODE_SIZE_MAX,
+                        &pmu->ucode);
+        if (err)
+                goto err_release_fw;
+        nvgpu_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image,
+                        pmu->desc->app_start_offset + pmu->desc->app_size);
+        return nvgpu_init_pmu_fw_support(pmu);
+ err_release_fw:
+        nvgpu_release_firmware(g, pmu->fw);
+        pmu->fw = NULL;
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
new file mode 100644
index 00000000..4c706e57
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/enabled.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include "gk20a/gk20a.h"
+void nvgpu_pmu_seq_init(struct nvgpu_pmu *pmu)
+{
+        u32 i;
+        memset(pmu->seq, 0,
+                sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
+        memset(pmu->pmu_seq_tbl, 0,
+                sizeof(pmu->pmu_seq_tbl));
+        for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
+                pmu->seq[i].id = i;
+}
+static int pmu_seq_acquire(struct nvgpu_pmu *pmu,
+                        struct pmu_sequence **pseq)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_sequence *seq;
+        u32 index;
+        nvgpu_mutex_acquire(&pmu->pmu_seq_lock);
+        index = find_first_zero_bit(pmu->pmu_seq_tbl,
+                                sizeof(pmu->pmu_seq_tbl));
+        if (index >= sizeof(pmu->pmu_seq_tbl)) {
+                nvgpu_err(g, "no free sequence available");
+                nvgpu_mutex_release(&pmu->pmu_seq_lock);
+                return -EAGAIN;
+        }
+        set_bit(index, pmu->pmu_seq_tbl);
+        nvgpu_mutex_release(&pmu->pmu_seq_lock);
+        seq = &pmu->seq[index];
+        seq->state = PMU_SEQ_STATE_PENDING;
+        *pseq = seq;
+        return 0;
+}
+static void pmu_seq_release(struct nvgpu_pmu *pmu,
+                        struct pmu_sequence *seq)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        seq->state      = PMU_SEQ_STATE_FREE;
+        seq->desc       = PMU_INVALID_SEQ_DESC;
+        seq->callback   = NULL;
+        seq->cb_params  = NULL;
+        seq->msg        = NULL;
+        seq->out_payload = NULL;
+        g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
+                g->ops.pmu_ver.get_pmu_seq_in_a_ptr(seq), 0);
+        g->ops.pmu_ver.pmu_allocation_set_dmem_size(pmu,
+                g->ops.pmu_ver.get_pmu_seq_out_a_ptr(seq), 0);
+        clear_bit(seq->id, pmu->pmu_seq_tbl);
+}
+/* mutex */
+int nvgpu_pmu_mutex_acquire(struct nvgpu_pmu *pmu, u32 id, u32 *token)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        return g->ops.pmu.pmu_mutex_acquire(pmu, id, token);
+}
+int nvgpu_pmu_mutex_release(struct nvgpu_pmu *pmu, u32 id, u32 *token)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        return g->ops.pmu.pmu_mutex_release(pmu, id, token);
+}
+/* queue */
+int nvgpu_pmu_queue_init(struct nvgpu_pmu *pmu,
+                u32 id, union pmu_init_msg_pmu *init)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_queue *queue = &pmu->queue[id];
+        int err;
+        err = nvgpu_mutex_init(&queue->mutex);
+        if (err)
+                return err;
+        queue->id       = id;
+        g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params(queue, id, init);
+        queue->mutex_id = id;
+        nvgpu_pmu_dbg(g, "queue %d: index %d, offset 0x%08x, size 0x%08x",
+                id, queue->index, queue->offset, queue->size);
+        return 0;
+}
+static int pmu_queue_head(struct nvgpu_pmu *pmu, struct pmu_queue *queue,
+                        u32 *head, bool set)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        return g->ops.pmu.pmu_queue_head(pmu, queue, head, set);
+}
+static int pmu_queue_tail(struct nvgpu_pmu *pmu, struct pmu_queue *queue,
+                        u32 *tail, bool set)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        return g->ops.pmu.pmu_queue_tail(pmu, queue, tail, set);
+}
+static inline void pmu_queue_read(struct nvgpu_pmu *pmu,
+                        u32 offset, u8 *dst, u32 size)
+{
+        nvgpu_flcn_copy_from_dmem(pmu->flcn, offset, dst, size, 0);
+}
+static inline void pmu_queue_write(struct nvgpu_pmu *pmu,
+                        u32 offset, u8 *src, u32 size)
+{
+        nvgpu_flcn_copy_to_dmem(pmu->flcn, offset, src, size, 0);
+}
+static int pmu_queue_lock(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue)
+{
+        int err;
+        if (PMU_IS_MESSAGE_QUEUE(queue->id))
+                return 0;
+        if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+                nvgpu_mutex_acquire(&queue->mutex);
+                return 0;
+        }
+        err = nvgpu_pmu_mutex_acquire(pmu, queue->mutex_id, &queue->mutex_lock);
+        return err;
+}
+static int pmu_queue_unlock(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue)
+{
+        int err;
+        if (PMU_IS_MESSAGE_QUEUE(queue->id))
+                return 0;
+        if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+                nvgpu_mutex_release(&queue->mutex);
+                return 0;
+        }
+        err = nvgpu_pmu_mutex_release(pmu, queue->mutex_id, &queue->mutex_lock);
+        return err;
+}
+/* called by pmu_read_message, no lock */
+bool nvgpu_pmu_queue_is_empty(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue)
+{
+        u32 head, tail;
+        pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+        if (queue->opened && queue->oflag == OFLAG_READ)
+                tail = queue->position;
+        else
+                pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+        return head == tail;
+}
+static bool pmu_queue_has_room(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue, u32 size, bool *need_rewind)
+{
+        u32 head, tail;
+        bool rewind = false;
+        unsigned int free;
+        size = ALIGN(size, QUEUE_ALIGNMENT);
+        pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+        pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+        if (head >= tail) {
+                free = queue->offset + queue->size - head;
+                free -= PMU_CMD_HDR_SIZE;
+                if (size > free) {
+                        rewind = true;
+                        head = queue->offset;
+                }
+        }
+        if (head < tail)
+                free = tail - head - 1;
+        if (need_rewind)
+                *need_rewind = rewind;
+        return size <= free;
+}
+static int pmu_queue_push(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue, void *data, u32 size)
+{
+        struct gk20a *g = pmu->g;
+        nvgpu_log_fn(g, " ");
+        if (!queue->opened && queue->oflag == OFLAG_WRITE) {
+                nvgpu_err(gk20a_from_pmu(pmu), "queue not opened for write");
+                return -EINVAL;
+        }
+        pmu_queue_write(pmu, queue->position, data, size);
+        queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+        return 0;
+}
+static int pmu_queue_pop(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue, void *data, u32 size,
+                        u32 *bytes_read)
+{
+        u32 head, tail, used;
+        *bytes_read = 0;
+        if (!queue->opened && queue->oflag == OFLAG_READ) {
+                nvgpu_err(gk20a_from_pmu(pmu), "queue not opened for read");
+                return -EINVAL;
+        }
+        pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+        tail = queue->position;
+        if (head == tail)
+                return 0;
+        if (head > tail)
+                used = head - tail;
+        else
+                used = queue->offset + queue->size - tail;
+        if (size > used) {
+                nvgpu_warn(gk20a_from_pmu(pmu),
+                        "queue size smaller than request read");
+                size = used;
+        }
+        pmu_queue_read(pmu, tail, data, size);
+        queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+        *bytes_read = size;
+        return 0;
+}
+static void pmu_queue_rewind(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_cmd cmd;
+        nvgpu_log_fn(g, " ");
+        if (!queue->opened) {
+                nvgpu_err(gk20a_from_pmu(pmu), "queue not opened");
+                return;
+        }
+        if (queue->oflag == OFLAG_WRITE) {
+                cmd.hdr.unit_id = PMU_UNIT_REWIND;
+                cmd.hdr.size = PMU_CMD_HDR_SIZE;
+                pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
+                nvgpu_pmu_dbg(g, "queue %d rewinded", queue->id);
+        }
+        queue->position = queue->offset;
+}
+/* open for read and lock the queue */
+static int pmu_queue_open_read(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue)
+{
+        int err;
+        err = pmu_queue_lock(pmu, queue);
+        if (err)
+                return err;
+        if (queue->opened)
+                BUG();
+        pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
+        queue->oflag = OFLAG_READ;
+        queue->opened = true;
+        return 0;
+}
+/* open for write and lock the queue
+ * make sure there's enough free space for the write
+ * */
+static int pmu_queue_open_write(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue, u32 size)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        bool rewind = false;
+        int err;
+        err = pmu_queue_lock(pmu, queue);
+        if (err)
+                return err;
+        if (queue->opened)
+                BUG();
+        if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
+                nvgpu_pmu_dbg(g, "queue full: queue-id %d: index %d",
+                                queue->id, queue->index);
+                pmu_queue_unlock(pmu, queue);
+                return -EAGAIN;
+        }
+        pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
+        queue->oflag = OFLAG_WRITE;
+        queue->opened = true;
+        if (rewind)
+                pmu_queue_rewind(pmu, queue);
+        return 0;
+}
+/* close and unlock the queue */
+static int pmu_queue_close(struct nvgpu_pmu *pmu,
+                        struct pmu_queue *queue, bool commit)
+{
+        if (!queue->opened)
+                return 0;
+        if (commit) {
+                if (queue->oflag == OFLAG_READ)
+                        pmu_queue_tail(pmu, queue,
+                                &queue->position, QUEUE_SET);
+                else
+                        pmu_queue_head(pmu, queue,
+                                &queue->position, QUEUE_SET);
+        }
+        queue->opened = false;
+        pmu_queue_unlock(pmu, queue);
+        return 0;
+}
+static bool pmu_validate_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd,
+                        struct pmu_msg *msg, struct pmu_payload *payload,
+                        u32 queue_id)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_queue *queue;
+        u32 in_size, out_size;
+        if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
+                goto invalid_cmd;
+        queue = &pmu->queue[queue_id];
+        if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
+                goto invalid_cmd;
+        if (cmd->hdr.size > (queue->size >> 1))
+                goto invalid_cmd;
+        if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
+                goto invalid_cmd;
+        if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
+                goto invalid_cmd;
+        if (payload == NULL)
+                return true;
+        if (payload->in.buf == NULL && payload->out.buf == NULL)
+                goto invalid_cmd;
+        if ((payload->in.buf != NULL && payload->in.size == 0) ||
+            (payload->out.buf != NULL && payload->out.size == 0))
+                goto invalid_cmd;
+        in_size = PMU_CMD_HDR_SIZE;
+        if (payload->in.buf) {
+                in_size += payload->in.offset;
+                in_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
+        }
+        out_size = PMU_CMD_HDR_SIZE;
+        if (payload->out.buf) {
+                out_size += payload->out.offset;
+                out_size += g->ops.pmu_ver.get_pmu_allocation_struct_size(pmu);
+        }
+        if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
+                goto invalid_cmd;
+        if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
+            (payload->out.offset != 0 && payload->out.buf == NULL))
+                goto invalid_cmd;
+        return true;
+invalid_cmd:
+        nvgpu_err(g, "invalid pmu cmd :\n"
+                "queue_id=%d,\n"
+                "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
+                "payload in=%p, in_size=%d, in_offset=%d,\n"
+                "payload out=%p, out_size=%d, out_offset=%d",
+                queue_id, cmd->hdr.size, cmd->hdr.unit_id,
+                msg, msg ? msg->hdr.unit_id : ~0,
+                &payload->in, payload->in.size, payload->in.offset,
+                &payload->out, payload->out.size, payload->out.offset);
+        return false;
+}
+static int pmu_write_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd,
+                        u32 queue_id, unsigned long timeout_ms)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_queue *queue;
+        struct nvgpu_timeout timeout;
+        int err;
+        nvgpu_log_fn(g, " ");
+        queue = &pmu->queue[queue_id];
+        nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
+        do {
+                err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
+                if (err == -EAGAIN && !nvgpu_timeout_expired(&timeout))
+                        nvgpu_usleep_range(1000, 2000);
+                else
+                        break;
+        } while (1);
+        if (err)
+                goto clean_up;
+        pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
+        err = pmu_queue_close(pmu, queue, true);
+clean_up:
+        if (err)
+                nvgpu_err(g, "fail to write cmd to queue %d", queue_id);
+        else
+                nvgpu_log_fn(g, "done");
+        return err;
+}
+int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
+                struct pmu_msg *msg, struct pmu_payload *payload,
+                u32 queue_id, pmu_callback callback, void *cb_param,
+                u32 *seq_desc, unsigned long timeout)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        struct pmu_sequence *seq;
+        void *in = NULL, *out = NULL;
+        int err;
+        nvgpu_log_fn(g, " ");
+        if ((!cmd) || (!seq_desc) || (!pmu->pmu_ready)) {
+                if (!cmd)
+                        nvgpu_warn(g, "%s(): PMU cmd buffer is NULL", __func__);
+                else if (!seq_desc)
+                        nvgpu_warn(g, "%s(): Seq descriptor is NULL", __func__);
+                else
+                        nvgpu_warn(g, "%s(): PMU is not ready", __func__);
+                WARN_ON(1);
+                return -EINVAL;
+        }
+        if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
+                return -EINVAL;
+        err = pmu_seq_acquire(pmu, &seq);
+        if (err)
+                return err;
+        cmd->hdr.seq_id = seq->id;
+        cmd->hdr.ctrl_flags = 0;
+        cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
+        cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
+        seq->callback = callback;
+        seq->cb_params = cb_param;
+        seq->msg = msg;
+        seq->out_payload = NULL;
+        seq->desc = pmu->next_seq_desc++;
+        if (payload)
+                seq->out_payload = payload->out.buf;
+        *seq_desc = seq->desc;
+        if (payload && payload->in.offset != 0) {
+                pv->set_pmu_allocation_ptr(pmu, &in,
+                ((u8 *)&cmd->cmd + payload->in.offset));
+                if (payload->in.buf != payload->out.buf)
+                        pv->pmu_allocation_set_dmem_size(pmu, in,
+                        (u16)payload->in.size);
+                else
+                        pv->pmu_allocation_set_dmem_size(pmu, in,
+                        (u16)max(payload->in.size, payload->out.size));
+                *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
+                        nvgpu_alloc(&pmu->dmem,
+                                     pv->pmu_allocation_get_dmem_size(pmu, in));
+                if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
+                        goto clean_up;
+                if (payload->in.fb_size != 0x0) {
+                        seq->in_mem = nvgpu_kzalloc(g,
+                                        sizeof(struct nvgpu_mem));
+                        if (!seq->in_mem) {
+                                err = -ENOMEM;
+                                goto clean_up;
+                        }
+                        nvgpu_pmu_vidmem_surface_alloc(g, seq->in_mem,
+                                payload->in.fb_size);
+                        nvgpu_pmu_surface_describe(g, seq->in_mem,
+                                (struct flcn_mem_desc_v0 *)
+                                pv->pmu_allocation_get_fb_addr(pmu, in));
+                        nvgpu_mem_wr_n(g, seq->in_mem, 0,
+                                payload->in.buf, payload->in.fb_size);
+                } else {
+                        nvgpu_flcn_copy_to_dmem(pmu->flcn,
+                                (pv->pmu_allocation_get_dmem_offset(pmu, in)),
+                                payload->in.buf, payload->in.size, 0);
+                }
+                pv->pmu_allocation_set_dmem_size(pmu,
+                pv->get_pmu_seq_in_a_ptr(seq),
+                pv->pmu_allocation_get_dmem_size(pmu, in));
+                pv->pmu_allocation_set_dmem_offset(pmu,
+                pv->get_pmu_seq_in_a_ptr(seq),
+                pv->pmu_allocation_get_dmem_offset(pmu, in));
+        }
+        if (payload && payload->out.offset != 0) {
+                pv->set_pmu_allocation_ptr(pmu, &out,
+                ((u8 *)&cmd->cmd + payload->out.offset));
+                pv->pmu_allocation_set_dmem_size(pmu, out,
+                (u16)payload->out.size);
+                if (payload->in.buf != payload->out.buf) {
+                        *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
+                                nvgpu_alloc(&pmu->dmem,
+                                    pv->pmu_allocation_get_dmem_size(pmu, out));
+                        if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
+                                        out)))
+                                goto clean_up;
+                        if (payload->out.fb_size != 0x0) {
+                                seq->out_mem = nvgpu_kzalloc(g,
+                                        sizeof(struct nvgpu_mem));
+                                if (!seq->out_mem) {
+                                        err = -ENOMEM;
+                                        goto clean_up;
+                                }
+                                nvgpu_pmu_vidmem_surface_alloc(g, seq->out_mem,
+                                        payload->out.fb_size);
+                                nvgpu_pmu_surface_describe(g, seq->out_mem,
+                                        (struct flcn_mem_desc_v0 *)
+                                        pv->pmu_allocation_get_fb_addr(pmu,
+                                        out));
+                        }
+                } else {
+                        BUG_ON(in == NULL);
+                        seq->out_mem = seq->in_mem;
+                        pv->pmu_allocation_set_dmem_offset(pmu, out,
+                        pv->pmu_allocation_get_dmem_offset(pmu, in));
+                }
+                pv->pmu_allocation_set_dmem_size(pmu,
+                pv->get_pmu_seq_out_a_ptr(seq),
+                pv->pmu_allocation_get_dmem_size(pmu, out));
+                pv->pmu_allocation_set_dmem_offset(pmu,
+                pv->get_pmu_seq_out_a_ptr(seq),
+                pv->pmu_allocation_get_dmem_offset(pmu, out));
+        }
+        seq->state = PMU_SEQ_STATE_USED;
+        err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
+        if (err)
+                seq->state = PMU_SEQ_STATE_PENDING;
+        nvgpu_log_fn(g, "done");
+        return err;
+clean_up:
+        nvgpu_log_fn(g, "fail");
+        if (in)
+                nvgpu_free(&pmu->dmem,
+                        pv->pmu_allocation_get_dmem_offset(pmu, in));
+        if (out)
+                nvgpu_free(&pmu->dmem,
+                        pv->pmu_allocation_get_dmem_offset(pmu, out));
+        pmu_seq_release(pmu, seq);
+        return err;
+}
+static int pmu_response_handle(struct nvgpu_pmu *pmu,
+                        struct pmu_msg *msg)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_sequence *seq;
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        int ret = 0;
+        nvgpu_log_fn(g, " ");
+        seq = &pmu->seq[msg->hdr.seq_id];
+        if (seq->state != PMU_SEQ_STATE_USED &&
+            seq->state != PMU_SEQ_STATE_CANCELLED) {
+                nvgpu_err(g, "msg for an unknown sequence %d", seq->id);
+                return -EINVAL;
+        }
+        if (msg->hdr.unit_id == PMU_UNIT_RC &&
+            msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
+                nvgpu_err(g, "unhandled cmd: seq %d", seq->id);
+        } else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
+                if (seq->msg) {
+                        if (seq->msg->hdr.size >= msg->hdr.size) {
+                                memcpy(seq->msg, msg, msg->hdr.size);
+                        }  else {
+                                nvgpu_err(g, "sequence %d msg buffer too small",
+                                        seq->id);
+                        }
+                }
+                if (pv->pmu_allocation_get_dmem_size(pmu,
+                pv->get_pmu_seq_out_a_ptr(seq)) != 0) {
+                        nvgpu_flcn_copy_from_dmem(pmu->flcn,
+                        pv->pmu_allocation_get_dmem_offset(pmu,
+                        pv->get_pmu_seq_out_a_ptr(seq)),
+                        seq->out_payload,
+                        pv->pmu_allocation_get_dmem_size(pmu,
+                        pv->get_pmu_seq_out_a_ptr(seq)), 0);
+                }
+        } else
+                seq->callback = NULL;
+        if (pv->pmu_allocation_get_dmem_size(pmu,
+                        pv->get_pmu_seq_in_a_ptr(seq)) != 0)
+                nvgpu_free(&pmu->dmem,
+                        pv->pmu_allocation_get_dmem_offset(pmu,
+                        pv->get_pmu_seq_in_a_ptr(seq)));
+        if (pv->pmu_allocation_get_dmem_size(pmu,
+                        pv->get_pmu_seq_out_a_ptr(seq)) != 0)
+                nvgpu_free(&pmu->dmem,
+                        pv->pmu_allocation_get_dmem_offset(pmu,
+                        pv->get_pmu_seq_out_a_ptr(seq)));
+        if (seq->out_mem != NULL) {
+                memset(pv->pmu_allocation_get_fb_addr(pmu,
+                        pv->get_pmu_seq_out_a_ptr(seq)), 0x0,
+                        pv->pmu_allocation_get_fb_size(pmu,
+                                pv->get_pmu_seq_out_a_ptr(seq)));
+                nvgpu_pmu_surface_free(g, seq->out_mem);
+                if (seq->out_mem != seq->in_mem)
+                        nvgpu_kfree(g, seq->out_mem);
+                else
+                        seq->out_mem = NULL;
+        }
+        if (seq->in_mem != NULL) {
+                memset(pv->pmu_allocation_get_fb_addr(pmu,
+                        pv->get_pmu_seq_in_a_ptr(seq)), 0x0,
+                        pv->pmu_allocation_get_fb_size(pmu,
+                                pv->get_pmu_seq_in_a_ptr(seq)));
+                nvgpu_pmu_surface_free(g, seq->in_mem);
+                nvgpu_kfree(g, seq->in_mem);
+                seq->in_mem = NULL;
+        }
+        if (seq->callback)
+                seq->callback(g, msg, seq->cb_params, seq->desc, ret);
+        pmu_seq_release(pmu, seq);
+        /* TBD: notify client waiting for available dmem */
+        nvgpu_log_fn(g, "done");
+        return 0;
+}
+static int pmu_handle_event(struct nvgpu_pmu *pmu, struct pmu_msg *msg)
+{
+        int err = 0;
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        nvgpu_log_fn(g, " ");
+        switch (msg->hdr.unit_id) {
+        case PMU_UNIT_PERFMON:
+        case PMU_UNIT_PERFMON_T18X:
+                err = nvgpu_pmu_handle_perfmon_event(pmu, &msg->msg.perfmon);
+                break;
+        case PMU_UNIT_PERF:
+                if (g->ops.perf.handle_pmu_perf_event != NULL) {
+                        err = g->ops.perf.handle_pmu_perf_event(g,
+                                (void *)&msg->msg.perf);
+                } else {
+                        WARN_ON(1);
+                }
+                break;
+        case PMU_UNIT_THERM:
+                err = nvgpu_pmu_handle_therm_event(pmu, &msg->msg.therm);
+                break;
+        default:
+                break;
+        }
+        return err;
+}
+static bool pmu_read_message(struct nvgpu_pmu *pmu, struct pmu_queue *queue,
+                        struct pmu_msg *msg, int *status)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        u32 read_size, bytes_read;
+        int err;
+        *status = 0;
+        if (nvgpu_pmu_queue_is_empty(pmu, queue))
+                return false;
+        err = pmu_queue_open_read(pmu, queue);
+        if (err) {
+                nvgpu_err(g, "fail to open queue %d for read", queue->id);
+                *status = err;
+                return false;
+        }
+        err = pmu_queue_pop(pmu, queue, &msg->hdr,
+                        PMU_MSG_HDR_SIZE, &bytes_read);
+        if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+                nvgpu_err(g, "fail to read msg from queue %d", queue->id);
+                *status = err | -EINVAL;
+                goto clean_up;
+        }
+        if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
+                pmu_queue_rewind(pmu, queue);
+                /* read again after rewind */
+                err = pmu_queue_pop(pmu, queue, &msg->hdr,
+                                PMU_MSG_HDR_SIZE, &bytes_read);
+                if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+                        nvgpu_err(g,
+                                "fail to read msg from queue %d", queue->id);
+                        *status = err | -EINVAL;
+                        goto clean_up;
+                }
+        }
+        if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
+                nvgpu_err(g, "read invalid unit_id %d from queue %d",
+                        msg->hdr.unit_id, queue->id);
+                        *status = -EINVAL;
+                        goto clean_up;
+        }
+        if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
+                read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
+                err = pmu_queue_pop(pmu, queue, &msg->msg,
+                        read_size, &bytes_read);
+                if (err || bytes_read != read_size) {
+                        nvgpu_err(g,
+                                "fail to read msg from queue %d", queue->id);
+                        *status = err;
+                        goto clean_up;
+                }
+        }
+        err = pmu_queue_close(pmu, queue, true);
+        if (err) {
+                nvgpu_err(g, "fail to close queue %d", queue->id);
+                *status = err;
+                return false;
+        }
+        return true;
+clean_up:
+        err = pmu_queue_close(pmu, queue, false);
+        if (err)
+                nvgpu_err(g, "fail to close queue %d", queue->id);
+        return false;
+}
+int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
+{
+        struct pmu_msg msg;
+        int status;
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        if (unlikely(!pmu->pmu_ready)) {
+                nvgpu_pmu_process_init_msg(pmu, &msg);
+                if (g->ops.pmu.init_wpr_region != NULL)
+                        g->ops.pmu.init_wpr_region(g);
+                if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+                        nvgpu_pmu_init_perfmon(pmu);
+                return 0;
+        }
+        while (pmu_read_message(pmu,
+                &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
+                nvgpu_pmu_dbg(g, "read msg hdr: ");
+                nvgpu_pmu_dbg(g, "unit_id = 0x%08x, size = 0x%08x",
+                        msg.hdr.unit_id, msg.hdr.size);
+                nvgpu_pmu_dbg(g, "ctrl_flags = 0x%08x, seq_id = 0x%08x",
+                        msg.hdr.ctrl_flags, msg.hdr.seq_id);
+                msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
+                if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT)
+                        pmu_handle_event(pmu, &msg);
+                else
+                        pmu_response_handle(pmu, &msg);
+        }
+        return 0;
+}
+int pmu_wait_message_cond(struct nvgpu_pmu *pmu, u32 timeout_ms,
+                                 u32 *var, u32 val)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct nvgpu_timeout timeout;
+        unsigned long delay = GR_IDLE_CHECK_DEFAULT;
+        nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
+        do {
+                if (*var == val)
+                        return 0;
+                if (gk20a_pmu_is_interrupted(pmu))
+                        gk20a_pmu_isr(g);
+                nvgpu_usleep_range(delay, delay * 2);
+                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
+        } while (!nvgpu_timeout_expired(&timeout));
+        return -ETIMEDOUT;
+}
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
new file mode 100644
index 00000000..2b952868
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/enabled.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include "gk20a/gk20a.h"
+#ifdef CONFIG_TEGRA_19x_GPU
+#include "nvgpu_gpuid_t19x.h"
+#endif
+static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        u32 ver = g->params.gpu_arch + g->params.gpu_impl;
+        u8 unit_id;
+        switch (ver) {
+        case GK20A_GPUID_GK20A:
+        case GK20A_GPUID_GM20B:
+        case GK20A_GPUID_GM20B_B:
+                unit_id = PMU_UNIT_PERFMON;
+                break;
+        case NVGPU_GPUID_GP10B:
+        case NVGPU_GPUID_GP104:
+        case NVGPU_GPUID_GP106:
+                unit_id = PMU_UNIT_PERFMON_T18X;
+                break;
+#if defined(CONFIG_TEGRA_19x_GPU)
+        case TEGRA_19x_GPUID:
+                unit_id = PMU_UNIT_PERFMON_T18X;
+                break;
+#endif
+        default:
+                unit_id = PMU_UNIT_INVALID;
+                nvgpu_err(g, "no support for %x", ver);
+                WARN_ON(1);
+        }
+        return unit_id;
+}
+int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        struct pmu_cmd cmd;
+        struct pmu_payload payload;
+        u32 seq;
+        if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+                return 0;
+        nvgpu_log_fn(g, " ");
+        pmu->perfmon_ready = 0;
+        gk20a_pmu_init_perfmon_counter(g);
+        if (!pmu->sample_buffer)
+                pmu->sample_buffer = nvgpu_alloc(&pmu->dmem,
+                                                  2 * sizeof(u16));
+        if (!pmu->sample_buffer) {
+                nvgpu_err(g, "failed to allocate perfmon sample buffer");
+                return -ENOMEM;
+        }
+        /* init PERFMON */
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = get_perfmon_id(pmu);
+        if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
+                nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
+                return -EINVAL;
+        }
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_init_size();
+        cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
+        /* buffer to save counter values for pmu perfmon */
+        pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
+        (u16)pmu->sample_buffer);
+        /* number of sample periods below lower threshold
+         * before pmu triggers perfmon decrease event
+         * TBD: = 15
+         */
+        pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
+        /* index of base counter, aka. always ticking counter */
+        pv->perfmon_cmd_init_set_base_cnt_id(&cmd.cmd.perfmon, 6);
+        /* microseconds interval between pmu polls perf counters */
+        pv->perfmon_cmd_init_set_samp_period_us(&cmd.cmd.perfmon, 16700);
+        /* number of perfmon counters
+         * counter #3 (GR and CE2) for gk20a
+         */
+        pv->perfmon_cmd_init_set_num_cnt(&cmd.cmd.perfmon, 1);
+        /* moving average window for sample periods
+         * TBD: = 3000000 / sample_period_us = 17
+         */
+        pv->perfmon_cmd_init_set_mov_avg(&cmd.cmd.perfmon, 17);
+        memset(&payload, 0, sizeof(struct pmu_payload));
+        payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
+        payload.in.size = pv->get_perfmon_cntr_sz(pmu);
+        payload.in.offset = pv->get_perfmon_cmd_init_offsetofvar(COUNTER_ALLOC);
+        nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_INIT");
+        nvgpu_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
+                        NULL, NULL, &seq, ~0);
+        return 0;
+}
+int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_v *pv = &g->ops.pmu_ver;
+        struct pmu_cmd cmd;
+        struct pmu_payload payload;
+        u32 seq;
+        if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+                return 0;
+        /* PERFMON Start */
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = get_perfmon_id(pmu);
+        if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
+                nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
+                return -EINVAL;
+        }
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + pv->get_pmu_perfmon_cmd_start_size();
+        pv->perfmon_start_set_cmd_type(&cmd.cmd.perfmon,
+                PMU_PERFMON_CMD_ID_START);
+        pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
+                PMU_DOMAIN_GROUP_PSTATE);
+        pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
+                pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
+        pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
+                PMU_PERFMON_FLAG_ENABLE_INCREASE |
+                PMU_PERFMON_FLAG_ENABLE_DECREASE |
+                PMU_PERFMON_FLAG_CLEAR_PREV);
+        memset(&payload, 0, sizeof(struct pmu_payload));
+        /* TBD: PMU_PERFMON_PCT_TO_INC * 100 */
+        pv->set_perfmon_cntr_ut(pmu, 3000); /* 30% */
+        /* TBD: PMU_PERFMON_PCT_TO_DEC * 100 */
+        pv->set_perfmon_cntr_lt(pmu, 1000); /* 10% */
+        pv->set_perfmon_cntr_valid(pmu, true);
+        payload.in.buf = pv->get_perfmon_cntr_ptr(pmu);
+        payload.in.size = pv->get_perfmon_cntr_sz(pmu);
+        payload.in.offset =
+                pv->get_perfmon_cmd_start_offsetofvar(COUNTER_ALLOC);
+        nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_START");
+        nvgpu_pmu_cmd_post(g, &cmd, NULL, &payload, PMU_COMMAND_QUEUE_LPQ,
+                        NULL, NULL, &seq, ~0);
+        return 0;
+}
+int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        struct pmu_cmd cmd;
+        u32 seq;
+        if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
+                return 0;
+        /* PERFMON Stop */
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = get_perfmon_id(pmu);
+        if (cmd.hdr.unit_id == PMU_UNIT_INVALID) {
+                nvgpu_err(g, "failed to get perfmon UNIT ID, command skipped");
+                return -EINVAL;
+        }
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_perfmon_cmd_stop);
+        cmd.cmd.perfmon.stop.cmd_type = PMU_PERFMON_CMD_ID_STOP;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PERFMON_CMD_ID_STOP");
+        nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+                        NULL, NULL, &seq, ~0);
+        return 0;
+}
+int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load)
+{
+        *load = g->pmu.load_shadow;
+        return 0;
+}
+int nvgpu_pmu_load_update(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        u16 load = 0;
+        if (!pmu->perfmon_ready) {
+                pmu->load_shadow = 0;
+                return 0;
+        }
+        nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
+                (u8 *)&load, 2, 0);
+        pmu->load_shadow = load / 10;
+        pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
+        return 0;
+}
+void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
+                                 u32 *total_cycles)
+{
+        if (!g->power_on || gk20a_busy(g)) {
+                *busy_cycles = 0;
+                *total_cycles = 0;
+                return;
+        }
+        *busy_cycles = gk20a_pmu_read_idle_counter(g, 1);
+        *total_cycles = gk20a_pmu_read_idle_counter(g, 2);
+        gk20a_idle(g);
+}
+void nvgpu_pmu_reset_load_counters(struct gk20a *g)
+{
+        if (!g->power_on || gk20a_busy(g))
+                return;
+        gk20a_pmu_reset_idle_counter(g, 2);
+        gk20a_pmu_reset_idle_counter(g, 1);
+        gk20a_idle(g);
+}
+int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
+                        struct pmu_perfmon_msg *msg)
+{
+        struct gk20a *g = gk20a_from_pmu(pmu);
+        nvgpu_log_fn(g, " ");
+        switch (msg->msg_type) {
+        case PMU_PERFMON_MSG_ID_INCREASE_EVENT:
+                nvgpu_pmu_dbg(g, "perfmon increase event: ");
+                nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
+                        msg->gen.state_id, msg->gen.group_id, msg->gen.data);
+                (pmu->perfmon_events_cnt)++;
+                break;
+        case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
+                nvgpu_pmu_dbg(g, "perfmon decrease event: ");
+                nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
+                        msg->gen.state_id, msg->gen.group_id, msg->gen.data);
+                (pmu->perfmon_events_cnt)++;
+                break;
+        case PMU_PERFMON_MSG_ID_INIT_EVENT:
+                pmu->perfmon_ready = 1;
+                nvgpu_pmu_dbg(g, "perfmon init event");
+                break;
+        default:
+                break;
+        }
+        /* restart sampling */
+        if (pmu->perfmon_sampling_enabled)
+                return nvgpu_pmu_perfmon_start_sampling(pmu);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
new file mode 100644
index 00000000..bf39ce19
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_pg.c
@@ -0,0 +1,748 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/pmu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/bug.h>
+#include "gk20a/gk20a.h"
+/* state transition :
+ * OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
+ * ON => OFF is always synchronized
+ */
+/* elpg is off */
+#define PMU_ELPG_STAT_OFF       0
+/* elpg is on */
+#define PMU_ELPG_STAT_ON        1
+/* elpg is off, ALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_ON_PENDING        2
+/* elpg is on, DISALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_OFF_PENDING       3
+/* elpg is off, caller has requested on, but ALLOW
+ * cmd hasn't been sent due to ENABLE_ALLOW delay
+ */
+#define PMU_ELPG_STAT_OFF_ON_PENDING    4
+#define PMU_PGENG_GR_BUFFER_IDX_INIT    (0)
+#define PMU_PGENG_GR_BUFFER_IDX_ZBC             (1)
+#define PMU_PGENG_GR_BUFFER_IDX_FECS    (2)
+static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg,
+                        void *param, u32 handle, u32 status)
+{
+        struct nvgpu_pmu *pmu = param;
+        struct pmu_pg_msg_elpg_msg *elpg_msg = &msg->msg.pg.elpg_msg;
+        nvgpu_log_fn(g, " ");
+        if (status != 0) {
+                nvgpu_err(g, "ELPG cmd aborted");
+                /* TBD: disable ELPG */
+                return;
+        }
+        switch (elpg_msg->msg) {
+        case PMU_PG_ELPG_MSG_INIT_ACK:
+                nvgpu_pmu_dbg(g, "INIT_PG is ack from PMU, eng - %d",
+                        elpg_msg->engine_id);
+                break;
+        case PMU_PG_ELPG_MSG_ALLOW_ACK:
+                nvgpu_pmu_dbg(g, "ALLOW is ack from PMU, eng - %d",
+                        elpg_msg->engine_id);
+                if (elpg_msg->engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                        pmu->mscg_transition_state = PMU_ELPG_STAT_ON;
+                else
+                        pmu->elpg_stat = PMU_ELPG_STAT_ON;
+                break;
+        case PMU_PG_ELPG_MSG_DISALLOW_ACK:
+                nvgpu_pmu_dbg(g, "DISALLOW is ack from PMU, eng - %d",
+                        elpg_msg->engine_id);
+                if (elpg_msg->engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                        pmu->mscg_transition_state = PMU_ELPG_STAT_OFF;
+                else
+                        pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+                if (pmu->pmu_state == PMU_STATE_ELPG_BOOTING) {
+                        if (g->ops.pmu.pmu_pg_engines_feature_list &&
+                                g->ops.pmu.pmu_pg_engines_feature_list(g,
+                                PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+                                PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+                                pmu->initialized = true;
+                                nvgpu_pmu_state_change(g, PMU_STATE_STARTED,
+                                        true);
+                                WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
+                                /* make status visible */
+                                nvgpu_smp_mb();
+                        } else
+                                nvgpu_pmu_state_change(g, PMU_STATE_ELPG_BOOTED,
+                                        true);
+                }
+                break;
+        default:
+                nvgpu_err(g,
+                        "unsupported ELPG message : 0x%04x", elpg_msg->msg);
+        }
+}
+/* PG enable/disable */
+int nvgpu_pmu_pg_global_enable(struct gk20a *g, u32 enable_pg)
+{
+        u32 status = 0;
+        if (enable_pg == true) {
+                if (g->ops.pmu.pmu_pg_engines_feature_list &&
+                        g->ops.pmu.pmu_pg_engines_feature_list(g,
+                        PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+                        PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+                        if (g->ops.pmu.pmu_lpwr_enable_pg)
+                                status = g->ops.pmu.pmu_lpwr_enable_pg(g,
+                                                true);
+                } else if (g->support_pmu && g->can_elpg)
+                        status = nvgpu_pmu_enable_elpg(g);
+        } else if (enable_pg == false) {
+                if (g->ops.pmu.pmu_pg_engines_feature_list &&
+                        g->ops.pmu.pmu_pg_engines_feature_list(g,
+                        PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+                        PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+                        if (g->ops.pmu.pmu_lpwr_disable_pg)
+                                status = g->ops.pmu.pmu_lpwr_disable_pg(g,
+                                                true);
+                } else if (g->support_pmu && g->can_elpg)
+                        status = nvgpu_pmu_disable_elpg(g);
+        }
+        return status;
+}
+static int pmu_enable_elpg_locked(struct gk20a *g, u32 pg_engine_id)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 seq, status;
+        nvgpu_log_fn(g, " ");
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                sizeof(struct pmu_pg_cmd_elpg_cmd);
+        cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+        cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id;
+        cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_ALLOW;
+        /* no need to wait ack for ELPG enable but set
+        * pending to sync with follow up ELPG disable
+        */
+        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
+                pmu->elpg_stat = PMU_ELPG_STAT_ON_PENDING;
+        else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                pmu->mscg_transition_state = PMU_ELPG_STAT_ON_PENDING;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_ALLOW");
+        status = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL,
+                PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg,
+                pmu, &seq, ~0);
+        WARN_ON(status != 0);
+        nvgpu_log_fn(g, "done");
+        return 0;
+}
+int nvgpu_pmu_enable_elpg(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct gr_gk20a *gr = &g->gr;
+        u32 pg_engine_id;
+        u32 pg_engine_id_list = 0;
+        int ret = 0;
+        nvgpu_log_fn(g, " ");
+        if (!g->support_pmu)
+                return ret;
+        nvgpu_mutex_acquire(&pmu->elpg_mutex);
+        pmu->elpg_refcnt++;
+        if (pmu->elpg_refcnt <= 0)
+                goto exit_unlock;
+        /* something is not right if we end up in following code path */
+        if (unlikely(pmu->elpg_refcnt > 1)) {
+                nvgpu_warn(g,
+                        "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
+                        __func__, pmu->elpg_refcnt);
+                WARN_ON(1);
+        }
+        /* do NOT enable elpg until golden ctx is created,
+         * which is related with the ctx that ELPG save and restore.
+        */
+        if (unlikely(!gr->ctx_vars.golden_image_initialized))
+                goto exit_unlock;
+        /* return if ELPG is already on or on_pending or off_on_pending */
+        if (pmu->elpg_stat != PMU_ELPG_STAT_OFF)
+                goto exit_unlock;
+        if (g->ops.pmu.pmu_pg_supported_engines_list)
+                pg_engine_id_list = g->ops.pmu.pmu_pg_supported_engines_list(g);
+        for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS;
+                pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE;
+                pg_engine_id++) {
+                if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS &&
+                        pmu->mscg_stat == PMU_MSCG_DISABLED)
+                        continue;
+                if (BIT(pg_engine_id) & pg_engine_id_list)
+                        ret = pmu_enable_elpg_locked(g, pg_engine_id);
+        }
+exit_unlock:
+        nvgpu_mutex_release(&pmu->elpg_mutex);
+        nvgpu_log_fn(g, "done");
+        return ret;
+}
+int nvgpu_pmu_disable_elpg(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 seq;
+        int ret = 0;
+        u32 pg_engine_id;
+        u32 pg_engine_id_list = 0;
+        u32 *ptr = NULL;
+        nvgpu_log_fn(g, " ");
+        if (g->ops.pmu.pmu_pg_supported_engines_list)
+                pg_engine_id_list = g->ops.pmu.pmu_pg_supported_engines_list(g);
+        if (!g->support_pmu)
+                return ret;
+        nvgpu_mutex_acquire(&pmu->elpg_mutex);
+        pmu->elpg_refcnt--;
+        if (pmu->elpg_refcnt > 0) {
+                nvgpu_warn(g,
+                        "%s(): possible elpg refcnt mismatch. elpg refcnt=%d",
+                        __func__, pmu->elpg_refcnt);
+                WARN_ON(1);
+                ret = 0;
+                goto exit_unlock;
+        }
+        /* cancel off_on_pending and return */
+        if (pmu->elpg_stat == PMU_ELPG_STAT_OFF_ON_PENDING) {
+                pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+                ret = 0;
+                goto exit_reschedule;
+        }
+        /* wait if on_pending */
+        else if (pmu->elpg_stat == PMU_ELPG_STAT_ON_PENDING) {
+                pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
+                                      &pmu->elpg_stat, PMU_ELPG_STAT_ON);
+                if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
+                        nvgpu_err(g, "ELPG_ALLOW_ACK failed, elpg_stat=%d",
+                                pmu->elpg_stat);
+                        nvgpu_pmu_dump_elpg_stats(pmu);
+                        nvgpu_pmu_dump_falcon_stats(pmu);
+                        ret = -EBUSY;
+                        goto exit_unlock;
+                }
+        }
+        /* return if ELPG is already off */
+        else if (pmu->elpg_stat != PMU_ELPG_STAT_ON) {
+                ret = 0;
+                goto exit_reschedule;
+        }
+        for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS;
+                pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE;
+                pg_engine_id++) {
+                if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS &&
+                        pmu->mscg_stat == PMU_MSCG_DISABLED)
+                        continue;
+                if (BIT(pg_engine_id) & pg_engine_id_list) {
+                        memset(&cmd, 0, sizeof(struct pmu_cmd));
+                        cmd.hdr.unit_id = PMU_UNIT_PG;
+                        cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                                sizeof(struct pmu_pg_cmd_elpg_cmd);
+                        cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+                        cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id;
+                        cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
+                        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
+                                pmu->elpg_stat = PMU_ELPG_STAT_OFF_PENDING;
+                        else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                                pmu->mscg_transition_state =
+                                        PMU_ELPG_STAT_OFF_PENDING;
+                        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
+                                ptr = &pmu->elpg_stat;
+                        else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                                ptr = &pmu->mscg_transition_state;
+                        nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_DISALLOW");
+                        nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL,
+                                PMU_COMMAND_QUEUE_HPQ, pmu_handle_pg_elpg_msg,
+                                pmu, &seq, ~0);
+                        pmu_wait_message_cond(pmu,
+                                gk20a_get_gr_idle_timeout(g),
+                                ptr, PMU_ELPG_STAT_OFF);
+                        if (*ptr != PMU_ELPG_STAT_OFF) {
+                                nvgpu_err(g, "ELPG_DISALLOW_ACK failed");
+                                        nvgpu_pmu_dump_elpg_stats(pmu);
+                                        nvgpu_pmu_dump_falcon_stats(pmu);
+                                ret = -EBUSY;
+                                goto exit_unlock;
+                        }
+                }
+        }
+exit_reschedule:
+exit_unlock:
+        nvgpu_mutex_release(&pmu->elpg_mutex);
+        nvgpu_log_fn(g, "done");
+        return ret;
+}
+/* PG init */
+static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg,
+                        void *param, u32 handle, u32 status)
+{
+        struct nvgpu_pmu *pmu = param;
+        nvgpu_log_fn(g, " ");
+        if (status != 0) {
+                nvgpu_err(g, "ELPG cmd aborted");
+                /* TBD: disable ELPG */
+                return;
+        }
+        switch (msg->msg.pg.stat.sub_msg_id) {
+        case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET:
+                nvgpu_pmu_dbg(g, "ALLOC_DMEM_OFFSET is acknowledged from PMU");
+                pmu->stat_dmem_offset[msg->msg.pg.stat.engine_id] =
+                        msg->msg.pg.stat.data;
+                break;
+        default:
+                break;
+        }
+}
+static int pmu_pg_init_send(struct gk20a *g, u32 pg_engine_id)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 seq;
+        int err = 0;
+        nvgpu_log_fn(g, " ");
+        if (pmu->pmu_state == PMU_STATE_INIT_RECEIVED)
+                nvgpu_pmu_state_change(g,
+                                PMU_STATE_ELPG_BOOTING, false);
+        else
+                nvgpu_err(g, "PMU INIT not received\n");
+        gk20a_pmu_pg_idle_counter_config(g, pg_engine_id);
+        if (g->ops.pmu.pmu_pg_init_param)
+                g->ops.pmu.pmu_pg_init_param(g, pg_engine_id);
+        /* init ELPG */
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+        cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+        cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id;
+        cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_INIT;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_INIT");
+        err = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                        pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+        if (err)
+                nvgpu_err(g, "PMU_PG_ELPG_CMD_INIT cmd failed\n");
+        /* alloc dmem for powergating state log */
+        pmu->stat_dmem_offset[pg_engine_id] = 0;
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_stat);
+        cmd.cmd.pg.stat.cmd_type = PMU_PG_CMD_ID_PG_STAT;
+        cmd.cmd.pg.stat.engine_id = pg_engine_id;
+        cmd.cmd.pg.stat.sub_cmd_id = PMU_PG_STAT_CMD_ALLOC_DMEM;
+        cmd.cmd.pg.stat.data = 0;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_STAT_CMD_ALLOC_DMEM");
+        err = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+                        pmu_handle_pg_stat_msg, pmu, &seq, ~0);
+        if (err)
+                nvgpu_err(g, "PMU_PG_STAT_CMD_ALLOC_DMEM cmd failed\n");
+        /* disallow ELPG initially
+         * PMU ucode requires a disallow cmd before allow cmd
+        */
+        /* set for wait_event PMU_ELPG_STAT_OFF */
+        if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_GRAPHICS)
+                pmu->elpg_stat = PMU_ELPG_STAT_OFF;
+        else if (pg_engine_id == PMU_PG_ELPG_ENGINE_ID_MS)
+                pmu->mscg_transition_state = PMU_ELPG_STAT_OFF;
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd);
+        cmd.cmd.pg.elpg_cmd.cmd_type = PMU_PG_CMD_ID_ELPG_CMD;
+        cmd.cmd.pg.elpg_cmd.engine_id = pg_engine_id;
+        cmd.cmd.pg.elpg_cmd.cmd = PMU_PG_ELPG_CMD_DISALLOW;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_ELPG_CMD_DISALLOW");
+        err = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                pmu_handle_pg_elpg_msg, pmu, &seq, ~0);
+        if (err)
+                nvgpu_err(g, "PMU_PG_ELPG_CMD_DISALLOW cmd failed\n");
+        if (g->ops.pmu.pmu_pg_set_sub_feature_mask)
+                g->ops.pmu.pmu_pg_set_sub_feature_mask(g, pg_engine_id);
+        return 0;
+}
+int nvgpu_pmu_init_powergating(struct gk20a *g)
+{
+        u32 pg_engine_id;
+        u32 pg_engine_id_list = 0;
+        nvgpu_log_fn(g, " ");
+        if (g->ops.pmu.pmu_pg_supported_engines_list)
+                pg_engine_id_list = g->ops.pmu.pmu_pg_supported_engines_list(g);
+        gk20a_gr_wait_initialized(g);
+        for (pg_engine_id = PMU_PG_ELPG_ENGINE_ID_GRAPHICS;
+                pg_engine_id < PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE;
+                        pg_engine_id++) {
+                if (BIT(pg_engine_id) & pg_engine_id_list) {
+                        pmu_pg_init_send(g, pg_engine_id);
+                }
+        }
+        if (g->ops.pmu.pmu_pg_param_post_init)
+                g->ops.pmu.pmu_pg_param_post_init(g);
+        return 0;
+}
+static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg,
+                        void *param, u32 handle, u32 status)
+{
+        struct nvgpu_pmu *pmu = param;
+        struct pmu_pg_msg_eng_buf_stat *eng_buf_stat =
+                &msg->msg.pg.eng_buf_stat;
+        nvgpu_log_fn(g, " ");
+        nvgpu_pmu_dbg(g,
+                "reply PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
+        if (status != 0) {
+                nvgpu_err(g, "PGENG cmd aborted");
+                /* TBD: disable ELPG */
+                return;
+        }
+        pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED);
+        if ((!pmu->buf_loaded) &&
+                (pmu->pmu_state == PMU_STATE_LOADING_PG_BUF))
+                nvgpu_err(g, "failed to load PGENG buffer");
+        else {
+                nvgpu_pmu_state_change(g, pmu->pmu_state, true);
+        }
+}
+int nvgpu_pmu_init_bind_fecs(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 desc;
+        int err = 0;
+        u32 gr_engine_id;
+        nvgpu_log_fn(g, " ");
+        gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                        g->ops.pmu_ver.pg_cmd_eng_buf_load_size(&cmd.cmd.pg);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg,
+                        PMU_PG_CMD_ID_ENG_BUF_LOAD);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg,
+                        gr_engine_id);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg,
+                        PMU_PGENG_GR_BUFFER_IDX_FECS);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg,
+                        pmu->pg_buf.size);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg,
+                        u64_lo32(pmu->pg_buf.gpu_va));
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg,
+                        (u8)(pmu->pg_buf.gpu_va & 0xFF));
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg,
+                        PMU_DMAIDX_VIRT);
+        pmu->buf_loaded = false;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS");
+        nvgpu_pmu_state_change(g, PMU_STATE_LOADING_PG_BUF, false);
+        err = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+                        pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
+        if (err)
+                nvgpu_err(g, "cmd LOAD PMU_PGENG_GR_BUFFER_IDX_FECS failed\n");
+        return err;
+}
+void nvgpu_pmu_setup_hw_load_zbc(struct gk20a *g)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        struct pmu_cmd cmd;
+        u32 desc;
+        u32 gr_engine_id;
+        int err = 0;
+        gr_engine_id = gk20a_fifo_get_gr_engine_id(g);
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE +
+                        g->ops.pmu_ver.pg_cmd_eng_buf_load_size(&cmd.cmd.pg);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_cmd_type(&cmd.cmd.pg,
+                        PMU_PG_CMD_ID_ENG_BUF_LOAD);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_engine_id(&cmd.cmd.pg,
+                        gr_engine_id);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_idx(&cmd.cmd.pg,
+                        PMU_PGENG_GR_BUFFER_IDX_ZBC);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_buf_size(&cmd.cmd.pg,
+                        pmu->seq_buf.size);
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_base(&cmd.cmd.pg,
+                        u64_lo32(pmu->seq_buf.gpu_va));
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_offset(&cmd.cmd.pg,
+                        (u8)(pmu->seq_buf.gpu_va & 0xFF));
+        g->ops.pmu_ver.pg_cmd_eng_buf_load_set_dma_idx(&cmd.cmd.pg,
+                        PMU_DMAIDX_VIRT);
+        pmu->buf_loaded = false;
+        nvgpu_pmu_dbg(g, "cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC");
+        nvgpu_pmu_state_change(g, PMU_STATE_LOADING_ZBC, false);
+        err = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ,
+                        pmu_handle_pg_buf_config_msg, pmu, &desc, ~0);
+        if (err)
+                nvgpu_err(g, "CMD LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC failed\n");
+}
+/* stats */
+int nvgpu_pmu_get_pg_stats(struct gk20a *g, u32 pg_engine_id,
+                struct pmu_pg_stats_data *pg_stat_data)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        u32 pg_engine_id_list = 0;
+        if (!pmu->initialized) {
+                pg_stat_data->ingating_time = 0;
+                pg_stat_data->ungating_time = 0;
+                pg_stat_data->gating_cnt = 0;
+                return 0;
+        }
+        if (g->ops.pmu.pmu_pg_supported_engines_list)
+                pg_engine_id_list = g->ops.pmu.pmu_pg_supported_engines_list(g);
+        if (BIT(pg_engine_id) & pg_engine_id_list)
+                g->ops.pmu.pmu_elpg_statistics(g, pg_engine_id,
+                        pg_stat_data);
+        return 0;
+}
+/* AELPG */
+static void ap_callback_init_and_enable_ctrl(
+                struct gk20a *g, struct pmu_msg *msg,
+                void *param, u32 seq_desc, u32 status)
+{
+        /* Define p_ap (i.e pointer to pmu_ap structure) */
+        WARN_ON(!msg);
+        if (!status) {
+                switch (msg->msg.pg.ap_msg.cmn.msg_id) {
+                case PMU_AP_MSG_ID_INIT_ACK:
+                        nvgpu_pmu_dbg(g, "reply PMU_AP_CMD_ID_INIT");
+                        break;
+                default:
+                        nvgpu_pmu_dbg(g,
+                        "%s: Invalid Adaptive Power Message: %x\n",
+                        __func__, msg->msg.pg.ap_msg.cmn.msg_id);
+                        break;
+                }
+        }
+}
+/* Send an Adaptive Power (AP) related command to PMU */
+int nvgpu_pmu_ap_send_command(struct gk20a *g,
+                        union pmu_ap_cmd *p_ap_cmd, bool b_block)
+{
+        struct nvgpu_pmu *pmu = &g->pmu;
+        /* FIXME: where is the PG structure defined?? */
+        u32 status = 0;
+        struct pmu_cmd cmd;
+        u32 seq;
+        pmu_callback p_callback = NULL;
+        memset(&cmd, 0, sizeof(struct pmu_cmd));
+        /* Copy common members */
+        cmd.hdr.unit_id = PMU_UNIT_PG;
+        cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(union pmu_ap_cmd);
+        cmd.cmd.pg.ap_cmd.cmn.cmd_type = PMU_PG_CMD_ID_AP;
+        cmd.cmd.pg.ap_cmd.cmn.cmd_id = p_ap_cmd->cmn.cmd_id;
+        /* Copy other members of command */
+        switch (p_ap_cmd->cmn.cmd_id) {
+        case PMU_AP_CMD_ID_INIT:
+                nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_INIT");
+                cmd.cmd.pg.ap_cmd.init.pg_sampling_period_us =
+                        p_ap_cmd->init.pg_sampling_period_us;
+                break;
+        case PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL:
+                nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL");
+                cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.ctrl_id =
+                p_ap_cmd->init_and_enable_ctrl.ctrl_id;
+                memcpy(
+                (void *)&(cmd.cmd.pg.ap_cmd.init_and_enable_ctrl.params),
+                        (void *)&(p_ap_cmd->init_and_enable_ctrl.params),
+                        sizeof(struct pmu_ap_ctrl_init_params));
+                p_callback = ap_callback_init_and_enable_ctrl;
+                break;
+        case PMU_AP_CMD_ID_ENABLE_CTRL:
+                nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_ENABLE_CTRL");
+                cmd.cmd.pg.ap_cmd.enable_ctrl.ctrl_id =
+                        p_ap_cmd->enable_ctrl.ctrl_id;
+                break;
+        case PMU_AP_CMD_ID_DISABLE_CTRL:
+                nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_DISABLE_CTRL");
+                cmd.cmd.pg.ap_cmd.disable_ctrl.ctrl_id =
+                        p_ap_cmd->disable_ctrl.ctrl_id;
+                break;
+        case PMU_AP_CMD_ID_KICK_CTRL:
+                nvgpu_pmu_dbg(g, "cmd post PMU_AP_CMD_ID_KICK_CTRL");
+                cmd.cmd.pg.ap_cmd.kick_ctrl.ctrl_id =
+                        p_ap_cmd->kick_ctrl.ctrl_id;
+                cmd.cmd.pg.ap_cmd.kick_ctrl.skip_count =
+                        p_ap_cmd->kick_ctrl.skip_count;
+                break;
+        default:
+                nvgpu_pmu_dbg(g, "%s: Invalid Adaptive Power command %d\n",
+                        __func__, p_ap_cmd->cmn.cmd_id);
+                return 0x2f;
+        }
+        status = nvgpu_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_HPQ,
+                        p_callback, pmu, &seq, ~0);
+        if (status) {
+                nvgpu_pmu_dbg(g,
+                        "%s: Unable to submit Adaptive Power Command %d\n",
+                        __func__, p_ap_cmd->cmn.cmd_id);
+                goto err_return;
+        }
+        /* TODO: Implement blocking calls (b_block) */
+err_return:
+        return status;
+}
+int nvgpu_aelpg_init(struct gk20a *g)
+{
+        int status = 0;
+        /* Remove reliance on app_ctrl field. */
+        union pmu_ap_cmd ap_cmd;
+        /* TODO: Check for elpg being ready? */
+        ap_cmd.init.cmd_id = PMU_AP_CMD_ID_INIT;
+        ap_cmd.init.pg_sampling_period_us = g->pmu.aelpg_param[0];
+        status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
+        return status;
+}
+int nvgpu_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
+{
+        int status = 0;
+        union pmu_ap_cmd ap_cmd;
+        /* TODO: Probably check if ELPG is ready? */
+        ap_cmd.init_and_enable_ctrl.cmd_id = PMU_AP_CMD_ID_INIT_AND_ENABLE_CTRL;
+        ap_cmd.init_and_enable_ctrl.ctrl_id = ctrl_id;
+        ap_cmd.init_and_enable_ctrl.params.min_idle_filter_us =
+                        g->pmu.aelpg_param[1];
+        ap_cmd.init_and_enable_ctrl.params.min_target_saving_us =
+                        g->pmu.aelpg_param[2];
+        ap_cmd.init_and_enable_ctrl.params.power_break_even_us =
+                        g->pmu.aelpg_param[3];
+        ap_cmd.init_and_enable_ctrl.params.cycles_per_sample_max =
+                        g->pmu.aelpg_param[4];
+        switch (ctrl_id) {
+        case PMU_AP_CTRL_ID_GRAPHICS:
+                break;
+        default:
+                break;
+        }
+        status = nvgpu_pmu_ap_send_command(g, &ap_cmd, true);
+        return status;
+}