From 2a2c16af5f9f1ccfc93a13e820d5381e5c881e92 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Wed, 18 Apr 2018 12:59:00 -0700
Subject: gpu: nvgpu: Move Linux files away from common

Move all Linux source code files to drivers/gpu/nvgpu/os/linux from
drivers/gpu/nvgpu/common/linux. This changes the meaning of common
to be OS independent.

JIRA NVGPU-598
JIRA NVGPU-601

Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1747714
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                         |  213 ++-
 drivers/gpu/nvgpu/common/linux/cde.c               | 1786 -----------------
 drivers/gpu/nvgpu/common/linux/cde.h               |  326 ----
 drivers/gpu/nvgpu/common/linux/cde_gm20b.c         |   64 -
 drivers/gpu/nvgpu/common/linux/cde_gm20b.h         |   32 -
 drivers/gpu/nvgpu/common/linux/cde_gp10b.c         |  161 --
 drivers/gpu/nvgpu/common/linux/cde_gp10b.h         |   32 -
 drivers/gpu/nvgpu/common/linux/ce2.c               |  155 --
 drivers/gpu/nvgpu/common/linux/channel.c           | 1021 ----------
 drivers/gpu/nvgpu/common/linux/channel.h           |   96 -
 drivers/gpu/nvgpu/common/linux/clk.c               |  165 --
 drivers/gpu/nvgpu/common/linux/clk.h               |   22 -
 drivers/gpu/nvgpu/common/linux/comptags.c          |  140 --
 drivers/gpu/nvgpu/common/linux/cond.c              |   73 -
 drivers/gpu/nvgpu/common/linux/ctxsw_trace.c       |  730 -------
 drivers/gpu/nvgpu/common/linux/ctxsw_trace.h       |   39 -
 drivers/gpu/nvgpu/common/linux/debug.c             |  452 -----
 drivers/gpu/nvgpu/common/linux/debug_allocator.c   |   69 -
 drivers/gpu/nvgpu/common/linux/debug_allocator.h   |   21 -
 drivers/gpu/nvgpu/common/linux/debug_cde.c         |   53 -
 drivers/gpu/nvgpu/common/linux/debug_cde.h         |   21 -
 drivers/gpu/nvgpu/common/linux/debug_ce.c          |   30 -
 drivers/gpu/nvgpu/common/linux/debug_ce.h          |   21 -
 drivers/gpu/nvgpu/common/linux/debug_clk.c         |  271 ---
 drivers/gpu/nvgpu/common/linux/debug_fifo.c        |  378 ----
 drivers/gpu/nvgpu/common/linux/debug_fifo.h        |   22 -
 drivers/gpu/nvgpu/common/linux/debug_gr.c          |   31 -
 drivers/gpu/nvgpu/common/linux/debug_gr.h          |   21 -
 drivers/gpu/nvgpu/common/linux/debug_hal.c         |   95 -
 drivers/gpu/nvgpu/common/linux/debug_hal.h         |   22 -
 drivers/gpu/nvgpu/common/linux/debug_kmem.c        |  312 ---
 drivers/gpu/nvgpu/common/linux/debug_kmem.h        |   23 -
 drivers/gpu/nvgpu/common/linux/debug_pmu.c         |  481 -----
 drivers/gpu/nvgpu/common/linux/debug_pmu.h         |   21 -
 drivers/gpu/nvgpu/common/linux/debug_sched.c       |   80 -
 drivers/gpu/nvgpu/common/linux/debug_sched.h       |   21 -
 drivers/gpu/nvgpu/common/linux/debug_xve.c         |  176 --
 drivers/gpu/nvgpu/common/linux/debug_xve.h         |   21 -
 drivers/gpu/nvgpu/common/linux/dma.c               |  694 -------
 drivers/gpu/nvgpu/common/linux/dmabuf.c            |  218 ---
 drivers/gpu/nvgpu/common/linux/dmabuf.h            |   62 -
 drivers/gpu/nvgpu/common/linux/driver_common.c     |  334 ----
 drivers/gpu/nvgpu/common/linux/driver_common.h     |   22 -
 drivers/gpu/nvgpu/common/linux/dt.c                |   29 -
 drivers/gpu/nvgpu/common/linux/firmware.c          |  117 --
 drivers/gpu/nvgpu/common/linux/fuse.c              |   55 -
 drivers/gpu/nvgpu/common/linux/intr.c              |  122 --
 drivers/gpu/nvgpu/common/linux/intr.h              |   22 -
 drivers/gpu/nvgpu/common/linux/io.c                |  118 --
 drivers/gpu/nvgpu/common/linux/io_usermode.c       |   29 -
 drivers/gpu/nvgpu/common/linux/ioctl.c             |  296 ---
 drivers/gpu/nvgpu/common/linux/ioctl.h             |   23 -
 drivers/gpu/nvgpu/common/linux/ioctl_as.c          |  423 -----
 drivers/gpu/nvgpu/common/linux/ioctl_as.h          |   30 -
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c     | 1388 --------------
 drivers/gpu/nvgpu/common/linux/ioctl_channel.h     |   50 -
 drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c     |  562 ------
 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c        | 1962 -------------------
 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h        |   23 -
 drivers/gpu/nvgpu/common/linux/ioctl_dbg.c         | 2003 --------------------
 drivers/gpu/nvgpu/common/linux/ioctl_dbg.h         |   54 -
 drivers/gpu/nvgpu/common/linux/ioctl_tsg.c         |  677 -------
 drivers/gpu/nvgpu/common/linux/ioctl_tsg.h         |   28 -
 drivers/gpu/nvgpu/common/linux/kmem.c              |  654 -------
 drivers/gpu/nvgpu/common/linux/kmem_priv.h         |  105 -
 drivers/gpu/nvgpu/common/linux/log.c               |  132 --
 drivers/gpu/nvgpu/common/linux/module.c            | 1365 -------------
 drivers/gpu/nvgpu/common/linux/module.h            |   32 -
 drivers/gpu/nvgpu/common/linux/module_usermode.c   |   62 -
 drivers/gpu/nvgpu/common/linux/module_usermode.h   |   27 -
 drivers/gpu/nvgpu/common/linux/nvgpu_mem.c         |  613 ------
 drivers/gpu/nvgpu/common/linux/nvhost.c            |  294 ---
 drivers/gpu/nvgpu/common/linux/nvhost_priv.h       |   24 -
 drivers/gpu/nvgpu/common/linux/nvlink.c            |  106 --
 drivers/gpu/nvgpu/common/linux/os_fence_android.c  |   79 -
 .../gpu/nvgpu/common/linux/os_fence_android_sema.c |  111 --
 .../nvgpu/common/linux/os_fence_android_syncpt.c   |  121 --
 drivers/gpu/nvgpu/common/linux/os_linux.h          |  166 --
 drivers/gpu/nvgpu/common/linux/os_sched.c          |   26 -
 drivers/gpu/nvgpu/common/linux/pci.c               |  861 ---------
 drivers/gpu/nvgpu/common/linux/pci.h               |   27 -
 drivers/gpu/nvgpu/common/linux/pci_usermode.c      |   24 -
 drivers/gpu/nvgpu/common/linux/pci_usermode.h      |   23 -
 .../gpu/nvgpu/common/linux/platform_ecc_sysfs.c    |  269 ---
 .../gpu/nvgpu/common/linux/platform_ecc_sysfs.h    |   37 -
 drivers/gpu/nvgpu/common/linux/platform_gk20a.h    |  317 ----
 .../gpu/nvgpu/common/linux/platform_gk20a_tegra.c  |  957 ----------
 .../gpu/nvgpu/common/linux/platform_gk20a_tegra.h  |   23 -
 .../gpu/nvgpu/common/linux/platform_gp10b_tegra.c  |  607 ------
 .../gpu/nvgpu/common/linux/platform_gp10b_tegra.h  |   23 -
 .../gpu/nvgpu/common/linux/platform_gv11b_tegra.c  |  588 ------
 drivers/gpu/nvgpu/common/linux/rwsem.c             |   39 -
 drivers/gpu/nvgpu/common/linux/scale.c             |  428 -----
 drivers/gpu/nvgpu/common/linux/scale.h             |   66 -
 drivers/gpu/nvgpu/common/linux/sched.c             |  676 -------
 drivers/gpu/nvgpu/common/linux/sched.h             |   55 -
 drivers/gpu/nvgpu/common/linux/sim.c               |   95 -
 drivers/gpu/nvgpu/common/linux/sim_pci.c           |   91 -
 drivers/gpu/nvgpu/common/linux/soc.c               |  122 --
 drivers/gpu/nvgpu/common/linux/sync_sema_android.c |  419 ----
 drivers/gpu/nvgpu/common/linux/sync_sema_android.h |   51 -
 drivers/gpu/nvgpu/common/linux/sysfs.c             | 1205 ------------
 drivers/gpu/nvgpu/common/linux/sysfs.h             |   24 -
 drivers/gpu/nvgpu/common/linux/thread.c            |   63 -
 drivers/gpu/nvgpu/common/linux/timers.c            |  270 ---
 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c     |  168 --
 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h     |   27 -
 .../gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c  |  224 ---
 .../linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c   |   97 -
 .../nvgpu/common/linux/vgpu/platform_vgpu_tegra.c  |   69 -
 drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c   |   50 -
 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c     |   77 -
 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c     |   53 -
 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c   |  475 -----
 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h   |   57 -
 drivers/gpu/nvgpu/common/linux/vidmem.c            |  262 ---
 drivers/gpu/nvgpu/common/linux/vm.c                |  332 ----
 drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c         |    2 +-
 drivers/gpu/nvgpu/gp106/clk_gp106.c                |    2 +-
 drivers/gpu/nvgpu/gp106/mclk_gp106.c               |    2 +-
 drivers/gpu/nvgpu/gp106/therm_gp106.c              |    2 +-
 drivers/gpu/nvgpu/gp10b/platform_gp10b.h           |   39 -
 drivers/gpu/nvgpu/include/nvgpu/nvhost.h           |    2 +
 drivers/gpu/nvgpu/os/linux/cde.c                   | 1786 +++++++++++++++++
 drivers/gpu/nvgpu/os/linux/cde.h                   |  326 ++++
 drivers/gpu/nvgpu/os/linux/cde_gm20b.c             |   64 +
 drivers/gpu/nvgpu/os/linux/cde_gm20b.h             |   32 +
 drivers/gpu/nvgpu/os/linux/cde_gp10b.c             |  161 ++
 drivers/gpu/nvgpu/os/linux/cde_gp10b.h             |   32 +
 drivers/gpu/nvgpu/os/linux/ce2.c                   |  155 ++
 drivers/gpu/nvgpu/os/linux/channel.c               | 1021 ++++++++++
 drivers/gpu/nvgpu/os/linux/channel.h               |   96 +
 drivers/gpu/nvgpu/os/linux/clk.c                   |  165 ++
 drivers/gpu/nvgpu/os/linux/clk.h                   |   22 +
 drivers/gpu/nvgpu/os/linux/comptags.c              |  140 ++
 drivers/gpu/nvgpu/os/linux/cond.c                  |   73 +
 drivers/gpu/nvgpu/os/linux/ctxsw_trace.c           |  730 +++++++
 drivers/gpu/nvgpu/os/linux/ctxsw_trace.h           |   39 +
 drivers/gpu/nvgpu/os/linux/debug.c                 |  452 +++++
 drivers/gpu/nvgpu/os/linux/debug_allocator.c       |   69 +
 drivers/gpu/nvgpu/os/linux/debug_allocator.h       |   21 +
 drivers/gpu/nvgpu/os/linux/debug_cde.c             |   53 +
 drivers/gpu/nvgpu/os/linux/debug_cde.h             |   21 +
 drivers/gpu/nvgpu/os/linux/debug_ce.c              |   30 +
 drivers/gpu/nvgpu/os/linux/debug_ce.h              |   21 +
 drivers/gpu/nvgpu/os/linux/debug_clk.c             |  271 +++
 drivers/gpu/nvgpu/os/linux/debug_fifo.c            |  378 ++++
 drivers/gpu/nvgpu/os/linux/debug_fifo.h            |   22 +
 drivers/gpu/nvgpu/os/linux/debug_gr.c              |   31 +
 drivers/gpu/nvgpu/os/linux/debug_gr.h              |   21 +
 drivers/gpu/nvgpu/os/linux/debug_hal.c             |   95 +
 drivers/gpu/nvgpu/os/linux/debug_hal.h             |   22 +
 drivers/gpu/nvgpu/os/linux/debug_kmem.c            |  312 +++
 drivers/gpu/nvgpu/os/linux/debug_kmem.h            |   23 +
 drivers/gpu/nvgpu/os/linux/debug_pmu.c             |  481 +++++
 drivers/gpu/nvgpu/os/linux/debug_pmu.h             |   21 +
 drivers/gpu/nvgpu/os/linux/debug_sched.c           |   80 +
 drivers/gpu/nvgpu/os/linux/debug_sched.h           |   21 +
 drivers/gpu/nvgpu/os/linux/debug_xve.c             |  176 ++
 drivers/gpu/nvgpu/os/linux/debug_xve.h             |   21 +
 drivers/gpu/nvgpu/os/linux/dma.c                   |  694 +++++++
 drivers/gpu/nvgpu/os/linux/dmabuf.c                |  218 +++
 drivers/gpu/nvgpu/os/linux/dmabuf.h                |   62 +
 drivers/gpu/nvgpu/os/linux/driver_common.c         |  334 ++++
 drivers/gpu/nvgpu/os/linux/driver_common.h         |   22 +
 drivers/gpu/nvgpu/os/linux/dt.c                    |   29 +
 drivers/gpu/nvgpu/os/linux/firmware.c              |  117 ++
 drivers/gpu/nvgpu/os/linux/fuse.c                  |   55 +
 drivers/gpu/nvgpu/os/linux/intr.c                  |  122 ++
 drivers/gpu/nvgpu/os/linux/intr.h                  |   22 +
 drivers/gpu/nvgpu/os/linux/io.c                    |  118 ++
 drivers/gpu/nvgpu/os/linux/io_usermode.c           |   29 +
 drivers/gpu/nvgpu/os/linux/ioctl.c                 |  296 +++
 drivers/gpu/nvgpu/os/linux/ioctl.h                 |   23 +
 drivers/gpu/nvgpu/os/linux/ioctl_as.c              |  423 +++++
 drivers/gpu/nvgpu/os/linux/ioctl_as.h              |   30 +
 drivers/gpu/nvgpu/os/linux/ioctl_channel.c         | 1388 ++++++++++++++
 drivers/gpu/nvgpu/os/linux/ioctl_channel.h         |   50 +
 drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c         |  562 ++++++
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c            | 1962 +++++++++++++++++++
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h            |   23 +
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c             | 2003 ++++++++++++++++++++
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.h             |   54 +
 drivers/gpu/nvgpu/os/linux/ioctl_tsg.c             |  677 +++++++
 drivers/gpu/nvgpu/os/linux/ioctl_tsg.h             |   28 +
 drivers/gpu/nvgpu/os/linux/kmem.c                  |  654 +++++++
 drivers/gpu/nvgpu/os/linux/kmem_priv.h             |  105 +
 drivers/gpu/nvgpu/os/linux/log.c                   |  132 ++
 drivers/gpu/nvgpu/os/linux/module.c                | 1365 +++++++++++++
 drivers/gpu/nvgpu/os/linux/module.h                |   32 +
 drivers/gpu/nvgpu/os/linux/module_usermode.c       |   62 +
 drivers/gpu/nvgpu/os/linux/module_usermode.h       |   27 +
 drivers/gpu/nvgpu/os/linux/nvgpu_mem.c             |  613 ++++++
 drivers/gpu/nvgpu/os/linux/nvhost.c                |  294 +++
 drivers/gpu/nvgpu/os/linux/nvhost_priv.h           |   24 +
 drivers/gpu/nvgpu/os/linux/nvlink.c                |  106 ++
 drivers/gpu/nvgpu/os/linux/os_fence_android.c      |   79 +
 drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c |  111 ++
 .../gpu/nvgpu/os/linux/os_fence_android_syncpt.c   |  121 ++
 drivers/gpu/nvgpu/os/linux/os_linux.h              |  166 ++
 drivers/gpu/nvgpu/os/linux/os_sched.c              |   26 +
 drivers/gpu/nvgpu/os/linux/pci.c                   |  861 +++++++++
 drivers/gpu/nvgpu/os/linux/pci.h                   |   27 +
 drivers/gpu/nvgpu/os/linux/pci_usermode.c          |   24 +
 drivers/gpu/nvgpu/os/linux/pci_usermode.h          |   23 +
 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c    |  269 +++
 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h    |   37 +
 drivers/gpu/nvgpu/os/linux/platform_gk20a.h        |  317 ++++
 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c  |  957 ++++++++++
 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h  |   23 +
 drivers/gpu/nvgpu/os/linux/platform_gp10b.h        |   39 +
 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c  |  607 ++++++
 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h  |   23 +
 drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c  |  588 ++++++
 drivers/gpu/nvgpu/os/linux/rwsem.c                 |   39 +
 drivers/gpu/nvgpu/os/linux/scale.c                 |  428 +++++
 drivers/gpu/nvgpu/os/linux/scale.h                 |   66 +
 drivers/gpu/nvgpu/os/linux/sched.c                 |  676 +++++++
 drivers/gpu/nvgpu/os/linux/sched.h                 |   55 +
 drivers/gpu/nvgpu/os/linux/sim.c                   |   95 +
 drivers/gpu/nvgpu/os/linux/sim_pci.c               |   91 +
 drivers/gpu/nvgpu/os/linux/soc.c                   |  122 ++
 drivers/gpu/nvgpu/os/linux/sync_sema_android.c     |  419 ++++
 drivers/gpu/nvgpu/os/linux/sync_sema_android.h     |   51 +
 drivers/gpu/nvgpu/os/linux/sysfs.c                 | 1205 ++++++++++++
 drivers/gpu/nvgpu/os/linux/sysfs.h                 |   24 +
 drivers/gpu/nvgpu/os/linux/thread.c                |   63 +
 drivers/gpu/nvgpu/os/linux/timers.c                |  270 +++
 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c         |  168 ++
 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h         |   27 +
 drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c  |  224 +++
 .../linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c   |   97 +
 .../gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c  |   69 +
 drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c       |   50 +
 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c         |   77 +
 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c         |   53 +
 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c       |  475 +++++
 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h       |   57 +
 drivers/gpu/nvgpu/os/linux/vidmem.c                |  262 +++
 drivers/gpu/nvgpu/os/linux/vm.c                    |  332 ++++
 drivers/gpu/nvgpu/pmgr/pmgr.c                      |    2 +-
 241 files changed, 30151 insertions(+), 30138 deletions(-)
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gm20b.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gm20b.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gp10b.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gp10b.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ce2.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/channel.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/channel.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/clk.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/clk.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/comptags.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/cond.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ctxsw_trace.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_clk.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_hal.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_hal.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_xve.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_xve.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/dma.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/dmabuf.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/dmabuf.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/driver_common.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/driver_common.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/dt.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/firmware.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/fuse.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/intr.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/intr.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/io.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/io_usermode.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_as.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_as.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_channel.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_channel.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_dbg.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/kmem.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/kmem_priv.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/log.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/module.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/module.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/module_usermode.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/module_usermode.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/nvhost.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/nvhost_priv.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/nvlink.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/os_linux.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/os_sched.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/pci.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/pci.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/pci_usermode.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/pci_usermode.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/rwsem.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/scale.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/scale.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sched.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sched.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sim.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sim_pci.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/soc.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sysfs.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/sysfs.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/thread.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/timers.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vidmem.c
 delete mode 100644 drivers/gpu/nvgpu/common/linux/vm.c
 delete mode 100644 drivers/gpu/nvgpu/gp10b/platform_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gm20b.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gm20b.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gp10b.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ce2.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/channel.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/channel.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/clk.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/clk.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/comptags.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/cond.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ctxsw_trace.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_allocator.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_allocator.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_cde.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_cde.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_ce.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_ce.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_clk.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_fifo.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_fifo.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_gr.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_gr.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_hal.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_hal.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_kmem.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_kmem.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_pmu.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_pmu.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_sched.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_sched.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_xve.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/debug_xve.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/dma.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/dmabuf.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/dmabuf.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/driver_common.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/driver_common.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/dt.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/firmware.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/fuse.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/intr.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/intr.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/io.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/io_usermode.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_as.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_as.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_channel.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_channel.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_tsg.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/kmem.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/kmem_priv.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/log.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/module.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/module.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/module_usermode.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/module_usermode.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/nvhost.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/nvhost_priv.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/nvlink.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/os_linux.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/os_sched.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/pci.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/pci.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/pci_usermode.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/pci_usermode.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/rwsem.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/scale.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/scale.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/sched.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/sched.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/sim.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/sim_pci.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/soc.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/sync_sema_android.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/sync_sema_android.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/sysfs.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/sysfs.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/thread.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/timers.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h
 create mode 100644 drivers/gpu/nvgpu/os/linux/vidmem.c
 create mode 100644 drivers/gpu/nvgpu/os/linux/vm.c

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 5c7bb767..42d9855f 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -30,39 +30,118 @@ nvgpu-y += common/bus/bus_gk20a.o \
 	common/ptimer/ptimer.o \
 	common/ptimer/ptimer_gk20a.o
 
+# Linux specific parts of nvgpu.
+nvgpu-y += \
+	os/linux/kmem.o \
+	os/linux/timers.o \
+	os/linux/ioctl.o \
+	os/linux/ioctl_ctrl.o \
+	os/linux/ioctl_as.o \
+	os/linux/ioctl_channel.o \
+	os/linux/ioctl_tsg.o \
+	os/linux/ioctl_dbg.o \
+	os/linux/ioctl_clk_arb.o \
+	os/linux/log.o \
+	os/linux/cond.o \
+	os/linux/nvgpu_mem.o \
+	os/linux/dma.o \
+	os/linux/driver_common.o \
+	os/linux/firmware.o \
+	os/linux/thread.o \
+	os/linux/vm.o \
+	os/linux/intr.o \
+	os/linux/sysfs.o \
+	os/linux/io.o \
+	os/linux/io_usermode.o \
+	os/linux/rwsem.o \
+	os/linux/comptags.o \
+	os/linux/dmabuf.o \
+	os/linux/sched.o \
+	os/linux/channel.o \
+	os/linux/ce2.o \
+	os/linux/sim.o \
+	os/linux/sim_pci.o \
+	os/linux/os_sched.o \
+	os/linux/nvlink.o \
+	os/linux/dt.o
+
+nvgpu-$(CONFIG_GK20A_VIDMEM) += \
+	os/linux/vidmem.o
+
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug.o \
+	os/linux/debug_gr.o \
+	os/linux/debug_fifo.o \
+	os/linux/debug_ce.o \
+	os/linux/debug_pmu.o \
+	os/linux/debug_sched.o \
+	os/linux/debug_allocator.o \
+	os/linux/debug_hal.o \
+	os/linux/debug_clk.o \
+	os/linux/debug_xve.o
+
+ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y)
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug_kmem.o
+endif
+
+nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \
+	os/linux/ctxsw_trace.o
+
+nvgpu-$(CONFIG_TEGRA_GK20A) += \
+	os/linux/module.o \
+	os/linux/module_usermode.o \
+	os/linux/soc.o \
+	os/linux/fuse.o \
+	os/linux/platform_ecc_sysfs.o \
+	os/linux/platform_gk20a_tegra.o \
+	os/linux/platform_gp10b_tegra.o \
+	os/linux/platform_gv11b_tegra.o
+
+nvgpu-$(CONFIG_SYNC) += \
+	os/linux/sync_sema_android.o \
+	os/linux/os_fence_android.o \
+	os/linux/os_fence_android_sema.o
+
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y)
+nvgpu-$(CONFIG_SYNC) += \
+	os/linux/os_fence_android_syncpt.o
+endif
+
+nvgpu-$(CONFIG_GK20A_PCI) += \
+	os/linux/pci.o \
+	os/linux/pci_usermode.o
+
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	os/linux/nvhost.o
+
+nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
+	os/linux/vgpu/platform_vgpu_tegra.o \
+	os/linux/vgpu/fecs_trace_vgpu.o \
+	os/linux/vgpu/clk_vgpu.o \
+	os/linux/vgpu/sysfs_vgpu.o \
+	os/linux/vgpu/vgpu_ivc.o \
+	os/linux/vgpu/vgpu_ivm.o \
+	os/linux/vgpu/vgpu_linux.o \
+	os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o
+
+nvgpu-$(CONFIG_COMMON_CLK) += \
+	os/linux/clk.o
+
+nvgpu-$(CONFIG_GK20A_DEVFREQ) += \
+	os/linux/scale.o
+
+nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
+	os/linux/cde.o \
+	os/linux/cde_gm20b.o \
+	os/linux/cde_gp10b.o
+
+ifeq ($(CONFIG_DEBUG_FS),y)
+nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
+	os/linux/debug_cde.o
+endif
+
 nvgpu-y += \
-	common/linux/kmem.o \
-	common/linux/timers.o \
-	common/linux/ioctl.o \
-	common/linux/ioctl_ctrl.o \
-	common/linux/ioctl_as.o \
-	common/linux/ioctl_channel.o \
-	common/linux/ioctl_tsg.o \
-	common/linux/ioctl_dbg.o \
-	common/linux/ioctl_clk_arb.o \
-	common/linux/log.o \
-	common/linux/cond.o \
-	common/linux/nvgpu_mem.o \
-	common/linux/dma.o \
-	common/linux/driver_common.o \
-	common/linux/firmware.o \
-	common/linux/thread.o \
-	common/linux/vm.o \
-	common/linux/intr.o \
-	common/linux/sysfs.o \
-	common/linux/io.o \
-	common/linux/io_usermode.o \
-	common/linux/rwsem.o \
-	common/linux/comptags.o \
-	common/linux/dmabuf.o \
-	common/linux/sched.o \
-	common/linux/channel.o \
-	common/linux/ce2.o \
-	common/linux/sim.o \
-	common/linux/sim_pci.o \
-	common/linux/os_sched.o \
-	common/linux/nvlink.o \
-	common/linux/dt.o \
 	common/mm/nvgpu_allocator.o \
 	common/mm/bitmap_allocator.o \
 	common/mm/buddy_allocator.o \
@@ -138,51 +217,7 @@ nvgpu-y += \
 	boardobj/boardobjgrp_e32.o
 
 nvgpu-$(CONFIG_GK20A_VIDMEM) += \
-	common/mm/vidmem.o \
-	common/linux/vidmem.o
-
-nvgpu-$(CONFIG_DEBUG_FS) += \
-	common/linux/debug.o \
-	common/linux/debug_gr.o \
-	common/linux/debug_fifo.o \
-	common/linux/debug_ce.o \
-	common/linux/debug_pmu.o \
-	common/linux/debug_sched.o \
-	common/linux/debug_allocator.o \
-	common/linux/debug_hal.o \
-	common/linux/debug_clk.o \
-	common/linux/debug_xve.o
-
-ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y)
-nvgpu-$(CONFIG_DEBUG_FS) += \
-	common/linux/debug_kmem.o
-endif
-
-nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \
-	common/linux/ctxsw_trace.o
-
-nvgpu-$(CONFIG_TEGRA_GK20A) += \
-	common/linux/module.o \
-	common/linux/module_usermode.o \
-	common/linux/soc.o \
-	common/linux/fuse.o \
-	common/linux/platform_ecc_sysfs.o \
-	common/linux/platform_gk20a_tegra.o \
-	common/linux/platform_gp10b_tegra.o \
-	common/linux/platform_gv11b_tegra.o
-
-nvgpu-$(CONFIG_SYNC) += common/linux/sync_sema_android.o \
-		common/linux/os_fence_android.o \
-		common/linux/os_fence_android_sema.o
-
-ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y)
-nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o
-endif
-
-nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \
-	common/linux/pci_usermode.o \
-
-nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += common/linux/nvhost.o
+	common/mm/vidmem.o
 
 nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
 	vgpu/ltc_vgpu.o \
@@ -205,20 +240,6 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
 	vgpu/gv11b/vgpu_fifo_gv11b.o \
 	vgpu/gv11b/vgpu_subctx_gv11b.o \
 	vgpu/gv11b/vgpu_tsg_gv11b.o \
-	common/linux/vgpu/platform_vgpu_tegra.o \
-	common/linux/vgpu/fecs_trace_vgpu.o \
-	common/linux/vgpu/clk_vgpu.o \
-	common/linux/vgpu/sysfs_vgpu.o \
-	common/linux/vgpu/vgpu_ivc.o \
-	common/linux/vgpu/vgpu_ivm.o \
-	common/linux/vgpu/vgpu_linux.o \
-	common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o
-
-nvgpu-$(CONFIG_COMMON_CLK) += \
-	common/linux/clk.o
-
-nvgpu-$(CONFIG_GK20A_DEVFREQ) += \
-	common/linux/scale.o
 
 nvgpu-$(CONFIG_GK20A_CYCLE_STATS) += \
 	gk20a/css_gr_gk20a.o
@@ -316,13 +337,3 @@ nvgpu-y += \
 	therm/thrmpmu.o \
 	lpwr/rppg.o \
 	lpwr/lpwr.o
-
-nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
-	common/linux/cde.o \
-	common/linux/cde_gm20b.o \
-	common/linux/cde_gp10b.o
-
-ifeq ($(CONFIG_DEBUG_FS),y)
-nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
-	common/linux/debug_cde.o
-endif
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
deleted file mode 100644
index 32b333f1..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ /dev/null
@@ -1,1786 +0,0 @@
-/*
- * Color decompression engine support
- *
- * Copyright (c) 2014-2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/fs.h>
-#include <linux/dma-buf.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <trace/events/gk20a.h>
-
-#include <nvgpu/dma.h>
-#include <nvgpu/gmmu.h>
-#include <nvgpu/timers.h>
-#include <nvgpu/nvgpu_common.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/firmware.h>
-#include <nvgpu/os_sched.h>
-
-#include <nvgpu/linux/vm.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/channel_gk20a.h"
-#include "gk20a/mm_gk20a.h"
-#include "gk20a/fence_gk20a.h"
-#include "gk20a/gr_gk20a.h"
-
-#include "cde.h"
-#include "os_linux.h"
-#include "dmabuf.h"
-#include "channel.h"
-#include "cde_gm20b.h"
-#include "cde_gp10b.h"
-
-#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
-
-static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
-static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
-
-#define CTX_DELETE_TIME 1000
-
-#define MAX_CTX_USE_COUNT 42
-#define MAX_CTX_RETRY_TIME 2000
-
-static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
-{
-	struct nvgpu_mapped_buf *buffer;
-	dma_addr_t addr = 0;
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
-	if (buffer)
-		addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-
-	return addr;
-}
-
-static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
-{
-	unsigned int i;
-
-	for (i = 0; i < cde_ctx->num_bufs; i++) {
-		struct nvgpu_mem *mem = cde_ctx->mem + i;
-		nvgpu_dma_unmap_free(cde_ctx->vm, mem);
-	}
-
-	nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
-
-	cde_ctx->convert_cmd = NULL;
-	cde_ctx->init_convert_cmd = NULL;
-	cde_ctx->num_bufs = 0;
-	cde_ctx->num_params = 0;
-	cde_ctx->init_cmd_num_entries = 0;
-	cde_ctx->convert_cmd_num_entries = 0;
-	cde_ctx->init_cmd_executed = false;
-}
-
-static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
-__must_hold(&cde_app->mutex)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct channel_gk20a *ch = cde_ctx->ch;
-	struct vm_gk20a *vm = ch->vm;
-
-	trace_gk20a_cde_remove_ctx(cde_ctx);
-
-	/* release mapped memory */
-	gk20a_deinit_cde_img(cde_ctx);
-	nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
-			 cde_ctx->backing_store_vaddr);
-
-	/*
-	 * free the channel
-	 * gk20a_channel_close() will also unbind the channel from TSG
-	 */
-	gk20a_channel_close(ch);
-	nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
-
-	/* housekeeping on app */
-	nvgpu_list_del(&cde_ctx->list);
-	l->cde_app.ctx_count--;
-	nvgpu_kfree(g, cde_ctx);
-}
-
-static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
-		bool wait_finish)
-__releases(&cde_app->mutex)
-__acquires(&cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
-
-	/* permanent contexts do not have deleter works */
-	if (!cde_ctx->is_temporary)
-		return;
-
-	if (wait_finish) {
-		nvgpu_mutex_release(&cde_app->mutex);
-		cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
-		nvgpu_mutex_acquire(&cde_app->mutex);
-	} else {
-		cancel_delayed_work(&cde_ctx->ctx_deleter_work);
-	}
-}
-
-static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
-__must_hold(&l->cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
-
-	/* safe to go off the mutex in cancel_deleter since app is
-	 * deinitialised; no new jobs are started. deleter works may be only at
-	 * waiting for the mutex or before, going to abort */
-
-	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
-			&cde_app->free_contexts, gk20a_cde_ctx, list) {
-		gk20a_cde_cancel_deleter(cde_ctx, true);
-		gk20a_cde_remove_ctx(cde_ctx);
-	}
-
-	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
-			&cde_app->used_contexts, gk20a_cde_ctx, list) {
-		gk20a_cde_cancel_deleter(cde_ctx, true);
-		gk20a_cde_remove_ctx(cde_ctx);
-	}
-}
-
-static void gk20a_cde_stop(struct nvgpu_os_linux *l)
-__must_hold(&l->cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-
-	/* prevent further conversions and delayed works from working */
-	cde_app->initialised = false;
-	/* free all data, empty the list */
-	gk20a_cde_remove_contexts(l);
-}
-
-void gk20a_cde_destroy(struct nvgpu_os_linux *l)
-__acquires(&l->cde_app->mutex)
-__releases(&l->cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-
-	if (!cde_app->initialised)
-		return;
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-	gk20a_cde_stop(l);
-	nvgpu_mutex_release(&cde_app->mutex);
-
-	nvgpu_mutex_destroy(&cde_app->mutex);
-}
-
-void gk20a_cde_suspend(struct nvgpu_os_linux *l)
-__acquires(&l->cde_app->mutex)
-__releases(&l->cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
-
-	if (!cde_app->initialised)
-		return;
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-
-	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
-			&cde_app->free_contexts, gk20a_cde_ctx, list) {
-		gk20a_cde_cancel_deleter(cde_ctx, false);
-	}
-
-	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
-			&cde_app->used_contexts, gk20a_cde_ctx, list) {
-		gk20a_cde_cancel_deleter(cde_ctx, false);
-	}
-
-	nvgpu_mutex_release(&cde_app->mutex);
-
-}
-
-static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
-__must_hold(&l->cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a_cde_ctx *cde_ctx;
-
-	cde_ctx = gk20a_cde_allocate_context(l);
-	if (IS_ERR(cde_ctx))
-		return PTR_ERR(cde_ctx);
-
-	nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
-	cde_app->ctx_count++;
-	if (cde_app->ctx_count > cde_app->ctx_count_top)
-		cde_app->ctx_count_top = cde_app->ctx_count;
-
-	return 0;
-}
-
-static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
-__must_hold(&l->cde_app->mutex)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
-		err = gk20a_cde_create_context(l);
-		if (err)
-			goto out;
-	}
-
-	return 0;
-out:
-	gk20a_cde_remove_contexts(l);
-	return err;
-}
-
-static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
-			      struct nvgpu_firmware *img,
-			      struct gk20a_cde_hdr_buf *buf)
-{
-	struct nvgpu_mem *mem;
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	int err;
-
-	/* check that the file can hold the buf */
-	if (buf->data_byte_offset != 0 &&
-	    buf->data_byte_offset + buf->num_bytes > img->size) {
-		nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
-			   cde_ctx->num_bufs);
-		return -EINVAL;
-	}
-
-	/* check that we have enough buf elems available */
-	if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
-		nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
-			   cde_ctx->num_bufs);
-		return -ENOMEM;
-	}
-
-	/* allocate buf */
-	mem = cde_ctx->mem + cde_ctx->num_bufs;
-	err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
-	if (err) {
-		nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
-			   cde_ctx->num_bufs);
-		return -ENOMEM;
-	}
-
-	/* copy the content */
-	if (buf->data_byte_offset != 0)
-		memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
-		       buf->num_bytes);
-
-	cde_ctx->num_bufs++;
-
-	return 0;
-}
-
-static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
-			      int type, s32 shift, u64 mask, u64 value)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	u32 *target_mem_ptr = target;
-	u64 *target_mem_ptr_u64 = target;
-	u64 current_value, new_value;
-
-	value = (shift >= 0) ? value << shift : value >> -shift;
-	value &= mask;
-
-	/* read current data from the location */
-	current_value = 0;
-	if (type == TYPE_PARAM_TYPE_U32) {
-		if (mask != 0xfffffffful)
-			current_value = *target_mem_ptr;
-	} else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
-		if (mask != ~0ul)
-			current_value = *target_mem_ptr_u64;
-	} else if (type == TYPE_PARAM_TYPE_U64_BIG) {
-		current_value = *target_mem_ptr_u64;
-		current_value = (u64)(current_value >> 32) |
-			(u64)(current_value << 32);
-	} else {
-		nvgpu_warn(g, "cde: unknown type. type=%d",
-			   type);
-		return -EINVAL;
-	}
-
-	current_value &= ~mask;
-	new_value = current_value | value;
-
-	/* store the element data back */
-	if (type == TYPE_PARAM_TYPE_U32)
-		*target_mem_ptr = (u32)new_value;
-	else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
-		*target_mem_ptr_u64 = new_value;
-	else  {
-		new_value = (u64)(new_value >> 32) |
-			(u64)(new_value << 32);
-		*target_mem_ptr_u64 = new_value;
-	}
-
-	return 0;
-}
-
-static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
-				  struct nvgpu_firmware *img,
-				  struct gk20a_cde_hdr_replace *replace)
-{
-	struct nvgpu_mem *source_mem;
-	struct nvgpu_mem *target_mem;
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	u32 *target_mem_ptr;
-	u64 vaddr;
-	int err;
-
-	if (replace->target_buf >= cde_ctx->num_bufs ||
-	    replace->source_buf >= cde_ctx->num_bufs) {
-		nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
-			   replace->target_buf, replace->source_buf,
-			   cde_ctx->num_bufs);
-		return -EINVAL;
-	}
-
-	source_mem = cde_ctx->mem + replace->source_buf;
-	target_mem = cde_ctx->mem + replace->target_buf;
-	target_mem_ptr = target_mem->cpu_va;
-
-	if (source_mem->size < (replace->source_byte_offset + 3) ||
-	    target_mem->size < (replace->target_byte_offset + 3)) {
-		nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
-			   replace->target_byte_offset,
-			   replace->source_byte_offset,
-			 source_mem->size,
-			 target_mem->size);
-		return -EINVAL;
-	}
-
-	/* calculate the target pointer */
-	target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
-
-	/* determine patch value */
-	vaddr = source_mem->gpu_va + replace->source_byte_offset;
-	err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
-				 replace->shift, replace->mask,
-				 vaddr);
-	if (err) {
-		nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
-			   err, replace->target_buf,
-			   replace->target_byte_offset,
-			   replace->source_buf,
-			   replace->source_byte_offset);
-	}
-
-	return err;
-}
-
-static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct nvgpu_mem *target_mem;
-	u32 *target_mem_ptr;
-	u64 new_data;
-	int user_id = 0, err;
-	unsigned int i;
-
-	for (i = 0; i < cde_ctx->num_params; i++) {
-		struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
-		target_mem = cde_ctx->mem + param->target_buf;
-		target_mem_ptr = target_mem->cpu_va;
-		target_mem_ptr += (param->target_byte_offset / sizeof(u32));
-
-		switch (param->id) {
-		case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
-			new_data = g->gr.comptags_per_cacheline;
-			break;
-		case TYPE_PARAM_GPU_CONFIGURATION:
-			new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
-				g->gr.cacheline_size;
-			break;
-		case TYPE_PARAM_FIRSTPAGEOFFSET:
-			new_data = cde_ctx->surf_param_offset;
-			break;
-		case TYPE_PARAM_NUMPAGES:
-			new_data = cde_ctx->surf_param_lines;
-			break;
-		case TYPE_PARAM_BACKINGSTORE:
-			new_data = cde_ctx->backing_store_vaddr;
-			break;
-		case TYPE_PARAM_DESTINATION:
-			new_data = cde_ctx->compbit_vaddr;
-			break;
-		case TYPE_PARAM_DESTINATION_SIZE:
-			new_data = cde_ctx->compbit_size;
-			break;
-		case TYPE_PARAM_BACKINGSTORE_SIZE:
-			new_data = g->gr.compbit_store.mem.size;
-			break;
-		case TYPE_PARAM_SOURCE_SMMU_ADDR:
-			new_data = gpuva_to_iova_base(cde_ctx->vm,
-						      cde_ctx->surf_vaddr);
-			if (new_data == 0) {
-				nvgpu_warn(g, "cde: failed to find 0x%llx",
-						cde_ctx->surf_vaddr);
-				return -EINVAL;
-			}
-			break;
-		case TYPE_PARAM_BACKINGSTORE_BASE_HW:
-			new_data = g->gr.compbit_store.base_hw;
-			break;
-		case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
-			new_data = g->gr.gobs_per_comptagline_per_slice;
-			break;
-		case TYPE_PARAM_SCATTERBUFFER:
-			new_data = cde_ctx->scatterbuffer_vaddr;
-			break;
-		case TYPE_PARAM_SCATTERBUFFER_SIZE:
-			new_data = cde_ctx->scatterbuffer_size;
-			break;
-		default:
-			user_id = param->id - NUM_RESERVED_PARAMS;
-			if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
-				continue;
-			new_data = cde_ctx->user_param_values[user_id];
-		}
-
-		nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d	param_id=%d	target_buf=%u	target_byte_offset=%lld	data_value=0x%llx	data_offset/data_diff=%lld	data_type=%d	data_shift=%d	data_mask=0x%llx",
-			  i, param->id, param->target_buf,
-			  param->target_byte_offset, new_data,
-			  param->data_offset, param->type, param->shift,
-			  param->mask);
-
-		new_data += param->data_offset;
-
-		err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
-					 param->shift, param->mask, new_data);
-
-		if (err) {
-			nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
-				   err, i, param->id, param->target_buf,
-				   param->target_byte_offset, new_data);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
-				struct nvgpu_firmware *img,
-				struct gk20a_cde_hdr_param *param)
-{
-	struct nvgpu_mem *target_mem;
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-
-	if (param->target_buf >= cde_ctx->num_bufs) {
-		nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
-			   cde_ctx->num_params, param->target_buf,
-			   cde_ctx->num_bufs);
-		return -EINVAL;
-	}
-
-	target_mem = cde_ctx->mem + param->target_buf;
-	if (target_mem->size < (param->target_byte_offset + 3)) {
-		nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
-			   cde_ctx->num_params, param->target_byte_offset,
-			   target_mem->size);
-		return -EINVAL;
-	}
-
-	/* does this parameter fit into our parameter structure */
-	if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
-		nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
-			   cde_ctx->num_params);
-		return -ENOMEM;
-	}
-
-	/* is the given id valid? */
-	if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
-		nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
-			   param->id, cde_ctx->num_params,
-			   NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
-		return -EINVAL;
-	}
-
-	cde_ctx->params[cde_ctx->num_params] = *param;
-	cde_ctx->num_params++;
-
-	return 0;
-}
-
-static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
-					 struct nvgpu_firmware *img,
-					 u32 required_class)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	int err;
-
-	/* CDE enabled */
-	cde_ctx->ch->cde = true;
-
-	err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
-	if (err) {
-		nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
-			   err);
-		return err;
-	}
-
-	return 0;
-}
-
-static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
-				  struct nvgpu_firmware *img,
-				  u32 op,
-				  struct gk20a_cde_cmd_elem *cmd_elem,
-				  u32 num_elems)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
-	u32 *num_entries;
-	unsigned int i;
-
-	/* check command type */
-	if (op == TYPE_BUF_COMMAND_INIT) {
-		gpfifo = &cde_ctx->init_convert_cmd;
-		num_entries = &cde_ctx->init_cmd_num_entries;
-	} else if (op == TYPE_BUF_COMMAND_CONVERT) {
-		gpfifo = &cde_ctx->convert_cmd;
-		num_entries = &cde_ctx->convert_cmd_num_entries;
-	} else {
-		nvgpu_warn(g, "cde: unknown command. op=%u",
-			   op);
-		return -EINVAL;
-	}
-
-	/* allocate gpfifo entries to be pushed */
-	*gpfifo = nvgpu_kzalloc(g,
-				sizeof(struct nvgpu_gpfifo_entry) * num_elems);
-	if (!*gpfifo) {
-		nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
-		return -ENOMEM;
-	}
-
-	gpfifo_elem = *gpfifo;
-	for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
-		struct nvgpu_mem *target_mem;
-
-		/* validate the current entry */
-		if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
-			nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
-				   cmd_elem->target_buf, cde_ctx->num_bufs);
-			return -EINVAL;
-		}
-
-		target_mem = cde_ctx->mem + cmd_elem->target_buf;
-		if (target_mem->size<
-		    cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
-			nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
-				   target_mem->size,
-				   cmd_elem->target_byte_offset,
-				   cmd_elem->num_bytes);
-			return -EINVAL;
-		}
-
-		/* store the element into gpfifo */
-		gpfifo_elem->entry0 =
-			u64_lo32(target_mem->gpu_va +
-			cmd_elem->target_byte_offset);
-		gpfifo_elem->entry1 =
-			u64_hi32(target_mem->gpu_va +
-			cmd_elem->target_byte_offset) |
-			pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
-						 sizeof(u32));
-	}
-
-	*num_entries = num_elems;
-	return 0;
-}
-
-static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
-		sizeof(struct nvgpu_gpfifo_entry);
-	unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
-		sizeof(struct nvgpu_gpfifo_entry);
-	unsigned long total_bytes = init_bytes + conv_bytes;
-	struct nvgpu_gpfifo_entry *combined_cmd;
-
-	/* allocate buffer that has space for both */
-	combined_cmd = nvgpu_kzalloc(g, total_bytes);
-	if (!combined_cmd) {
-		nvgpu_warn(g,
-			"cde: could not allocate memory for gpfifo entries");
-		return -ENOMEM;
-	}
-
-	/* move the original init here and append convert */
-	memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
-	memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
-			cde_ctx->convert_cmd, conv_bytes);
-
-	nvgpu_kfree(g, cde_ctx->init_convert_cmd);
-	nvgpu_kfree(g, cde_ctx->convert_cmd);
-
-	cde_ctx->init_convert_cmd = combined_cmd;
-	cde_ctx->convert_cmd = combined_cmd
-		+ cde_ctx->init_cmd_num_entries;
-
-	return 0;
-}
-
-static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
-			      struct nvgpu_firmware *img)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	u32 *data = (u32 *)img->data;
-	u32 num_of_elems;
-	struct gk20a_cde_hdr_elem *elem;
-	u32 min_size = 0;
-	int err = 0;
-	unsigned int i;
-
-	min_size += 2 * sizeof(u32);
-	if (img->size < min_size) {
-		nvgpu_warn(g, "cde: invalid image header");
-		return -EINVAL;
-	}
-
-	cde_app->firmware_version = data[0];
-	num_of_elems = data[1];
-
-	min_size += num_of_elems * sizeof(*elem);
-	if (img->size < min_size) {
-		nvgpu_warn(g, "cde: bad image");
-		return -EINVAL;
-	}
-
-	elem = (struct gk20a_cde_hdr_elem *)&data[2];
-	for (i = 0; i < num_of_elems; i++) {
-		int err = 0;
-		switch (elem->type) {
-		case TYPE_BUF:
-			err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
-			break;
-		case TYPE_REPLACE:
-			err = gk20a_init_cde_replace(cde_ctx, img,
-						     &elem->replace);
-			break;
-		case TYPE_PARAM:
-			err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
-			break;
-		case TYPE_REQUIRED_CLASS:
-			err = gk20a_init_cde_required_class(cde_ctx, img,
-				elem->required_class);
-			break;
-		case TYPE_COMMAND:
-		{
-			struct gk20a_cde_cmd_elem *cmd = (void *)
-				&img->data[elem->command.data_byte_offset];
-			err = gk20a_init_cde_command(cde_ctx, img,
-				elem->command.op, cmd,
-				elem->command.num_entries);
-			break;
-		}
-		case TYPE_ARRAY:
-			memcpy(&cde_app->arrays[elem->array.id][0],
-				elem->array.data,
-				MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
-			break;
-		default:
-			nvgpu_warn(g, "cde: unknown header element");
-			err = -EINVAL;
-		}
-
-		if (err)
-			goto deinit_image;
-
-		elem++;
-	}
-
-	if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
-		nvgpu_warn(g, "cde: convert command not defined");
-		err = -EINVAL;
-		goto deinit_image;
-	}
-
-	if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
-		nvgpu_warn(g, "cde: convert command not defined");
-		err = -EINVAL;
-		goto deinit_image;
-	}
-
-	err = gk20a_cde_pack_cmdbufs(cde_ctx);
-	if (err)
-		goto deinit_image;
-
-	return 0;
-
-deinit_image:
-	gk20a_deinit_cde_img(cde_ctx);
-	return err;
-}
-
-static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
-				    u32 op, struct nvgpu_channel_fence *fence,
-				    u32 flags, struct gk20a_fence **fence_out)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct nvgpu_gpfifo_entry *gpfifo = NULL;
-	int num_entries = 0;
-
-	/* check command type */
-	if (op == TYPE_BUF_COMMAND_INIT) {
-		/* both init and convert combined */
-		gpfifo = cde_ctx->init_convert_cmd;
-		num_entries = cde_ctx->init_cmd_num_entries
-			+ cde_ctx->convert_cmd_num_entries;
-	} else if (op == TYPE_BUF_COMMAND_CONVERT) {
-		gpfifo = cde_ctx->convert_cmd;
-		num_entries = cde_ctx->convert_cmd_num_entries;
-	} else if (op == TYPE_BUF_COMMAND_NOOP) {
-		/* Any non-null gpfifo will suffice with 0 num_entries */
-		gpfifo = cde_ctx->init_convert_cmd;
-		num_entries = 0;
-	} else {
-		nvgpu_warn(g, "cde: unknown buffer");
-		return -EINVAL;
-	}
-
-	if (gpfifo == NULL) {
-		nvgpu_warn(g, "cde: buffer not available");
-		return -ENOSYS;
-	}
-
-	return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
-				   num_entries, flags, fence, fence_out,
-				   NULL);
-}
-
-static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
-__acquires(&cde_app->mutex)
-__releases(&cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
-	struct gk20a *g = &cde_ctx->l->g;
-
-	nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
-	trace_gk20a_cde_release(cde_ctx);
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-
-	if (cde_ctx->in_use) {
-		cde_ctx->in_use = false;
-		nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
-		cde_app->ctx_usecount--;
-	} else {
-		nvgpu_log_info(g, "double release cde context %p", cde_ctx);
-	}
-
-	nvgpu_mutex_release(&cde_app->mutex);
-}
-
-static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
-__acquires(&cde_app->mutex)
-__releases(&cde_app->mutex)
-{
-	struct delayed_work *delay_work = to_delayed_work(work);
-	struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
-			struct gk20a_cde_ctx, ctx_deleter_work);
-	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	int err;
-
-	/* someone has just taken it? engine deletion started? */
-	if (cde_ctx->in_use || !cde_app->initialised)
-		return;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
-			"cde: attempting to delete temporary %p", cde_ctx);
-
-	err = gk20a_busy(g);
-	if (err) {
-		/* this context would find new use anyway later, so not freeing
-		 * here does not leak anything */
-		nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
-				" temp ctx deletion");
-		return;
-	}
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-	if (cde_ctx->in_use || !cde_app->initialised) {
-		nvgpu_log(g, gpu_dbg_cde_ctx,
-				"cde: context use raced, not deleting %p",
-				cde_ctx);
-		goto out;
-	}
-
-	WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
-			"double pending %p", cde_ctx);
-
-	gk20a_cde_remove_ctx(cde_ctx);
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
-			"cde: destroyed %p count=%d use=%d max=%d",
-			cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
-			cde_app->ctx_count_top);
-
-out:
-	nvgpu_mutex_release(&cde_app->mutex);
-	gk20a_idle(g);
-}
-
-static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
-__must_hold(&cde_app->mutex)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a_cde_ctx *cde_ctx;
-
-	/* exhausted? */
-
-	if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
-		return ERR_PTR(-EAGAIN);
-
-	/* idle context available? */
-
-	if (!nvgpu_list_empty(&cde_app->free_contexts)) {
-		cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
-				gk20a_cde_ctx, list);
-		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
-				"cde: got free %p count=%d use=%d max=%d",
-				cde_ctx, cde_app->ctx_count,
-				cde_app->ctx_usecount,
-				cde_app->ctx_count_top);
-		trace_gk20a_cde_get_context(cde_ctx);
-
-		/* deleter work may be scheduled, but in_use prevents it */
-		cde_ctx->in_use = true;
-		nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
-		cde_app->ctx_usecount++;
-
-		/* cancel any deletions now that ctx is in use */
-		gk20a_cde_cancel_deleter(cde_ctx, true);
-		return cde_ctx;
-	}
-
-	/* no free contexts, get a temporary one */
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
-			"cde: no free contexts, count=%d",
-			cde_app->ctx_count);
-
-	cde_ctx = gk20a_cde_allocate_context(l);
-	if (IS_ERR(cde_ctx)) {
-		nvgpu_warn(g, "cde: cannot allocate context: %ld",
-				PTR_ERR(cde_ctx));
-		return cde_ctx;
-	}
-
-	trace_gk20a_cde_get_context(cde_ctx);
-	cde_ctx->in_use = true;
-	cde_ctx->is_temporary = true;
-	cde_app->ctx_usecount++;
-	cde_app->ctx_count++;
-	if (cde_app->ctx_count > cde_app->ctx_count_top)
-		cde_app->ctx_count_top = cde_app->ctx_count;
-	nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
-
-	return cde_ctx;
-}
-
-static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
-__releases(&cde_app->mutex)
-__acquires(&cde_app->mutex)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a_cde_ctx *cde_ctx = NULL;
-	struct nvgpu_timeout timeout;
-
-	nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
-			   NVGPU_TIMER_CPU_TIMER);
-
-	do {
-		cde_ctx = gk20a_cde_do_get_context(l);
-		if (PTR_ERR(cde_ctx) != -EAGAIN)
-			break;
-
-		/* exhausted, retry */
-		nvgpu_mutex_release(&cde_app->mutex);
-		cond_resched();
-		nvgpu_mutex_acquire(&cde_app->mutex);
-	} while (!nvgpu_timeout_expired(&timeout));
-
-	return cde_ctx;
-}
-
-static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_ctx *cde_ctx;
-	int ret;
-
-	cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
-	if (!cde_ctx)
-		return ERR_PTR(-ENOMEM);
-
-	cde_ctx->l = l;
-	cde_ctx->dev = dev_from_gk20a(g);
-
-	ret = gk20a_cde_load(cde_ctx);
-	if (ret) {
-		nvgpu_kfree(g, cde_ctx);
-		return ERR_PTR(ret);
-	}
-
-	nvgpu_init_list_node(&cde_ctx->list);
-	cde_ctx->is_temporary = false;
-	cde_ctx->in_use = false;
-	INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
-			gk20a_cde_ctx_deleter_fn);
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
-	trace_gk20a_cde_allocate_context(cde_ctx);
-	return cde_ctx;
-}
-
-int gk20a_cde_convert(struct nvgpu_os_linux *l,
-		      struct dma_buf *compbits_scatter_buf,
-		      u64 compbits_byte_offset,
-		      u64 scatterbuffer_byte_offset,
-		      struct nvgpu_channel_fence *fence,
-		      u32 __flags, struct gk20a_cde_param *params,
-		      int num_params, struct gk20a_fence **fence_out)
-__acquires(&l->cde_app->mutex)
-__releases(&l->cde_app->mutex)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_ctx *cde_ctx = NULL;
-	struct gk20a_comptags comptags;
-	struct nvgpu_os_buffer os_buf = {
-		compbits_scatter_buf,
-		NULL,
-		dev_from_gk20a(g)
-	};
-	u64 mapped_compbits_offset = 0;
-	u64 compbits_size = 0;
-	u64 mapped_scatterbuffer_offset = 0;
-	u64 scatterbuffer_size = 0;
-	u64 map_vaddr = 0;
-	u64 map_offset = 0;
-	u64 map_size = 0;
-	u8 *surface = NULL;
-	u64 big_page_mask = 0;
-	u32 flags;
-	int err, i;
-	const s16 compbits_kind = 0;
-	u32 submit_op;
-	struct dma_buf_attachment *attachment;
-
-	nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
-		  compbits_byte_offset, scatterbuffer_byte_offset);
-
-	/* scatter buffer must be after compbits buffer */
-	if (scatterbuffer_byte_offset &&
-	    scatterbuffer_byte_offset < compbits_byte_offset)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	nvgpu_mutex_acquire(&l->cde_app.mutex);
-	cde_ctx = gk20a_cde_get_context(l);
-	nvgpu_mutex_release(&l->cde_app.mutex);
-	if (IS_ERR(cde_ctx)) {
-		err = PTR_ERR(cde_ctx);
-		goto exit_idle;
-	}
-
-	/* First, map the buffer to local va */
-
-	/* ensure that the compbits buffer has drvdata */
-	err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
-			dev_from_gk20a(g));
-	if (err)
-		goto exit_idle;
-
-	/* compbits don't start at page aligned offset, so we need to align
-	   the region to be mapped */
-	big_page_mask = cde_ctx->vm->big_page_size - 1;
-	map_offset = compbits_byte_offset & ~big_page_mask;
-	map_size = compbits_scatter_buf->size - map_offset;
-
-
-	/* compute compbit start offset from the beginning of the mapped
-	   area */
-	mapped_compbits_offset = compbits_byte_offset - map_offset;
-	if (scatterbuffer_byte_offset) {
-		compbits_size = scatterbuffer_byte_offset -
-				compbits_byte_offset;
-		mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
-					      map_offset;
-		scatterbuffer_size = compbits_scatter_buf->size -
-				     scatterbuffer_byte_offset;
-	} else {
-		compbits_size = compbits_scatter_buf->size -
-				compbits_byte_offset;
-	}
-
-	nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
-		  map_offset, map_size);
-	nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
-		  mapped_compbits_offset, compbits_size);
-	nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
-		  mapped_scatterbuffer_offset, scatterbuffer_size);
-
-
-	/* map the destination buffer */
-	get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
-	err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
-				 NVGPU_VM_MAP_CACHEABLE |
-				 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
-				 NVGPU_KIND_INVALID,
-				 compbits_kind, /* incompressible kind */
-				 gk20a_mem_flag_none,
-				 map_offset, map_size,
-				 NULL,
-				 &map_vaddr);
-	if (err) {
-		dma_buf_put(compbits_scatter_buf);
-		err = -EINVAL;
-		goto exit_idle;
-	}
-
-	if (scatterbuffer_byte_offset &&
-	    l->ops.cde.need_scatter_buffer &&
-	    l->ops.cde.need_scatter_buffer(g)) {
-		struct sg_table *sgt;
-		void *scatter_buffer;
-
-		surface = dma_buf_vmap(compbits_scatter_buf);
-		if (IS_ERR(surface)) {
-			nvgpu_warn(g,
-				   "dma_buf_vmap failed");
-			err = -EINVAL;
-			goto exit_unmap_vaddr;
-		}
-
-		scatter_buffer = surface + scatterbuffer_byte_offset;
-
-		nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
-			  surface, scatter_buffer);
-		sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
-				   &attachment);
-		if (IS_ERR(sgt)) {
-			nvgpu_warn(g,
-				   "mm_pin failed");
-			err = -EINVAL;
-			goto exit_unmap_surface;
-		} else {
-			err = l->ops.cde.populate_scatter_buffer(g, sgt,
-					compbits_byte_offset, scatter_buffer,
-					scatterbuffer_size);
-			WARN_ON(err);
-
-			gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
-				       attachment, sgt);
-			if (err)
-				goto exit_unmap_surface;
-		}
-
-		__cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
-		dma_buf_vunmap(compbits_scatter_buf, surface);
-		surface = NULL;
-	}
-
-	/* store source buffer compression tags */
-	gk20a_get_comptags(&os_buf, &comptags);
-	cde_ctx->surf_param_offset = comptags.offset;
-	cde_ctx->surf_param_lines = comptags.lines;
-
-	/* store surface vaddr. This is actually compbit vaddr, but since
-	   compbits live in the same surface, and we can get the alloc base
-	   address by using gpuva_to_iova_base, this will do */
-	cde_ctx->surf_vaddr = map_vaddr;
-
-	/* store information about destination */
-	cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
-	cde_ctx->compbit_size = compbits_size;
-
-	cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
-	cde_ctx->scatterbuffer_size = scatterbuffer_size;
-
-	/* remove existing argument data */
-	memset(cde_ctx->user_param_values, 0,
-	       sizeof(cde_ctx->user_param_values));
-
-	/* read user space arguments for the conversion */
-	for (i = 0; i < num_params; i++) {
-		struct gk20a_cde_param *param = params + i;
-		int id = param->id - NUM_RESERVED_PARAMS;
-
-		if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
-			nvgpu_warn(g, "cde: unknown user parameter");
-			err = -EINVAL;
-			goto exit_unmap_surface;
-		}
-		cde_ctx->user_param_values[id] = param->value;
-	}
-
-	/* patch data */
-	err = gk20a_cde_patch_params(cde_ctx);
-	if (err) {
-		nvgpu_warn(g, "cde: failed to patch parameters");
-		goto exit_unmap_surface;
-	}
-
-	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
-		 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
-	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
-		 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
-	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
-		 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
-
-	/* take always the postfence as it is needed for protecting the
-	 * cde context */
-	flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
-
-	/* gk20a_cde_execute_buffer() will grab a power reference of it's own */
-	gk20a_idle(g);
-
-	if (comptags.lines == 0) {
-		/*
-		 * Nothing to do on the buffer, but do a null kickoff for
-		 * managing the pre and post fences.
-		 */
-		submit_op = TYPE_BUF_COMMAND_NOOP;
-	} else if (!cde_ctx->init_cmd_executed) {
-		/*
-		 * First time, so include the init pushbuf too in addition to
-		 * the conversion code.
-		 */
-		submit_op = TYPE_BUF_COMMAND_INIT;
-	} else {
-		/*
-		 * The usual condition: execute just the conversion.
-		 */
-		submit_op = TYPE_BUF_COMMAND_CONVERT;
-	}
-	err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
-			fence, flags, fence_out);
-
-	if (comptags.lines != 0 && !err)
-		cde_ctx->init_cmd_executed = true;
-
-	/* unmap the buffers - channel holds references to them now */
-	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
-
-	return err;
-
-exit_unmap_surface:
-	if (surface)
-		dma_buf_vunmap(compbits_scatter_buf, surface);
-exit_unmap_vaddr:
-	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
-exit_idle:
-	gk20a_idle(g);
-	return err;
-}
-
-static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
-__acquires(&cde_app->mutex)
-__releases(&cde_app->mutex)
-{
-	struct gk20a_cde_ctx *cde_ctx = data;
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	bool channel_idle;
-
-	channel_gk20a_joblist_lock(ch);
-	channel_idle = channel_gk20a_joblist_is_empty(ch);
-	channel_gk20a_joblist_unlock(ch);
-
-	if (!channel_idle)
-		return;
-
-	trace_gk20a_cde_finished_ctx_cb(cde_ctx);
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
-	if (!cde_ctx->in_use)
-		nvgpu_log_info(g, "double finish cde context %p on channel %p",
-				cde_ctx, ch);
-
-	if (ch->has_timedout) {
-		if (cde_ctx->is_temporary) {
-			nvgpu_warn(g,
-					"cde: channel had timed out"
-					" (temporary channel)");
-			/* going to be deleted anyway */
-		} else {
-			nvgpu_warn(g,
-					"cde: channel had timed out"
-					", reloading");
-			/* mark it to be deleted, replace with a new one */
-			nvgpu_mutex_acquire(&cde_app->mutex);
-			cde_ctx->is_temporary = true;
-			if (gk20a_cde_create_context(l)) {
-				nvgpu_err(g, "cde: can't replace context");
-			}
-			nvgpu_mutex_release(&cde_app->mutex);
-		}
-	}
-
-	/* delete temporary contexts later (watch for doubles) */
-	if (cde_ctx->is_temporary && cde_ctx->in_use) {
-		WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
-		schedule_delayed_work(&cde_ctx->ctx_deleter_work,
-			msecs_to_jiffies(CTX_DELETE_TIME));
-	}
-
-	if (!ch->has_timedout)
-		gk20a_cde_ctx_release(cde_ctx);
-}
-
-static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
-{
-	struct nvgpu_os_linux *l = cde_ctx->l;
-	struct gk20a *g = &l->g;
-	struct nvgpu_firmware *img;
-	struct channel_gk20a *ch;
-	struct tsg_gk20a *tsg;
-	struct gr_gk20a *gr = &g->gr;
-	struct nvgpu_gpfifo_args gpfifo_args;
-	int err = 0;
-	u64 vaddr;
-
-	img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
-	if (!img) {
-		nvgpu_err(g, "cde: could not fetch the firmware");
-		return -ENOSYS;
-	}
-
-	tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
-	if (!tsg) {
-		nvgpu_err(g, "cde: could not create TSG");
-		err = -ENOMEM;
-		goto err_get_gk20a_channel;
-	}
-
-	ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
-			cde_ctx,
-			-1,
-			false);
-	if (!ch) {
-		nvgpu_warn(g, "cde: gk20a channel not available");
-		err = -ENOMEM;
-		goto err_get_gk20a_channel;
-	}
-
-	ch->timeout.enabled = false;
-
-	/* bind the channel to the vm */
-	err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
-	if (err) {
-		nvgpu_warn(g, "cde: could not bind vm");
-		goto err_commit_va;
-	}
-
-	err = gk20a_tsg_bind_channel(tsg, ch);
-	if (err) {
-		nvgpu_err(g, "cde: unable to bind to tsg");
-		goto err_alloc_gpfifo;
-	}
-
-	gpfifo_args.num_entries = 1024;
-	gpfifo_args.num_inflight_jobs = 0;
-	gpfifo_args.flags = 0;
-	/* allocate gpfifo (1024 should be more than enough) */
-	err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
-	if (err) {
-		nvgpu_warn(g, "cde: unable to allocate gpfifo");
-		goto err_alloc_gpfifo;
-	}
-
-	/* map backing store to gpu virtual space */
-	vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
-			       g->gr.compbit_store.mem.size,
-			       NVGPU_VM_MAP_CACHEABLE,
-			       gk20a_mem_flag_read_only,
-			       false,
-			       gr->compbit_store.mem.aperture);
-
-	if (!vaddr) {
-		nvgpu_warn(g, "cde: cannot map compression bit backing store");
-		err = -ENOMEM;
-		goto err_map_backingstore;
-	}
-
-	/* store initialisation data */
-	cde_ctx->ch = ch;
-	cde_ctx->tsg = tsg;
-	cde_ctx->vm = ch->vm;
-	cde_ctx->backing_store_vaddr = vaddr;
-
-	/* initialise the firmware */
-	err = gk20a_init_cde_img(cde_ctx, img);
-	if (err) {
-		nvgpu_warn(g, "cde: image initialisation failed");
-		goto err_init_cde_img;
-	}
-
-	/* initialisation done */
-	nvgpu_release_firmware(g, img);
-
-	return 0;
-
-err_init_cde_img:
-	nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
-err_map_backingstore:
-err_alloc_gpfifo:
-	nvgpu_vm_put(ch->vm);
-err_commit_va:
-err_get_gk20a_channel:
-	nvgpu_release_firmware(g, img);
-	nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
-	return err;
-}
-
-int gk20a_cde_reload(struct nvgpu_os_linux *l)
-__acquires(&l->cde_app->mutex)
-__releases(&l->cde_app->mutex)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	int err;
-
-	if (!cde_app->initialised)
-		return -ENOSYS;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-
-	gk20a_cde_stop(l);
-
-	err = gk20a_cde_create_contexts(l);
-	if (!err)
-		cde_app->initialised = true;
-
-	nvgpu_mutex_release(&cde_app->mutex);
-
-	gk20a_idle(g);
-	return err;
-}
-
-int gk20a_init_cde_support(struct nvgpu_os_linux *l)
-__acquires(&cde_app->mutex)
-__releases(&cde_app->mutex)
-{
-	struct gk20a_cde_app *cde_app = &l->cde_app;
-	struct gk20a *g = &l->g;
-	int err;
-
-	if (cde_app->initialised)
-		return 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
-
-	err = nvgpu_mutex_init(&cde_app->mutex);
-	if (err)
-		return err;
-
-	nvgpu_mutex_acquire(&cde_app->mutex);
-
-	nvgpu_init_list_node(&cde_app->free_contexts);
-	nvgpu_init_list_node(&cde_app->used_contexts);
-	cde_app->ctx_count = 0;
-	cde_app->ctx_count_top = 0;
-	cde_app->ctx_usecount = 0;
-
-	err = gk20a_cde_create_contexts(l);
-	if (!err)
-		cde_app->initialised = true;
-
-	nvgpu_mutex_release(&cde_app->mutex);
-	nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
-
-	if (err)
-		nvgpu_mutex_destroy(&cde_app->mutex);
-
-	return err;
-}
-
-enum cde_launch_patch_id {
-	PATCH_H_QMD_CTA_RASTER_WIDTH_ID     = 1024,
-	PATCH_H_QMD_CTA_RASTER_HEIGHT_ID    = 1025,
-	PATCH_QMD_CTA_RASTER_DEPTH_ID       = 1026, /* for firmware v0 only */
-	PATCH_QMD_CTA_THREAD_DIMENSION0_ID  = 1027,
-	PATCH_QMD_CTA_THREAD_DIMENSION1_ID  = 1028,
-	PATCH_QMD_CTA_THREAD_DIMENSION2_ID  = 1029, /* for firmware v0 only */
-	PATCH_USER_CONST_XTILES_ID          = 1030, /* for firmware v0 only */
-	PATCH_USER_CONST_YTILES_ID          = 1031, /* for firmware v0 only */
-	PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
-	PATCH_USER_CONST_DSTPITCH_ID        = 1033, /* for firmware v0 only */
-	PATCH_H_USER_CONST_FLAGS_ID         = 1034, /* for firmware v0 only */
-	PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID  = 1035,
-	PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID  = 1036,
-	PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID  = 1037,
-	PATCH_VPC_CURRENT_GROUP_SIZE_X_ID   = 1038,
-	PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID   = 1039,
-	PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID   = 1040,
-	PATCH_USER_CONST_XBLOCKS_ID         = 1041,
-	PATCH_H_USER_CONST_DSTOFFSET_ID     = 1042,
-	PATCH_V_QMD_CTA_RASTER_WIDTH_ID     = 1043,
-	PATCH_V_QMD_CTA_RASTER_HEIGHT_ID    = 1044,
-	PATCH_V_USER_CONST_DSTOFFSET_ID     = 1045,
-	PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID  = 1046,
-	PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID  = 1047,
-	PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID  = 1048,
-	PATCH_H_LAUNCH_WORD1_ID             = 1049,
-	PATCH_H_LAUNCH_WORD2_ID             = 1050,
-	PATCH_V_LAUNCH_WORD1_ID             = 1051,
-	PATCH_V_LAUNCH_WORD2_ID             = 1052,
-	PATCH_H_QMD_PROGRAM_OFFSET_ID       = 1053,
-	PATCH_H_QMD_REGISTER_COUNT_ID       = 1054,
-	PATCH_V_QMD_PROGRAM_OFFSET_ID       = 1055,
-	PATCH_V_QMD_REGISTER_COUNT_ID       = 1056,
-};
-
-/* maximum number of WRITE_PATCHes in the below function */
-#define MAX_CDE_LAUNCH_PATCHES		  32
-
-static int gk20a_buffer_convert_gpu_to_cde_v1(
-		struct nvgpu_os_linux *l,
-		struct dma_buf *dmabuf, u32 consumer,
-		u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
-		u64 scatterbuffer_offset,
-		u32 width, u32 height, u32 block_height_log2,
-		u32 submit_flags, struct nvgpu_channel_fence *fence_in,
-		struct gk20a_buffer_state *state)
-{
-	struct gk20a *g = &l->g;
-	struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
-	int param = 0;
-	int err = 0;
-	struct gk20a_fence *new_fence = NULL;
-	const int wgx = 8;
-	const int wgy = 8;
-	const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
-	const int xalign = compbits_per_byte * wgx;
-	const int yalign = wgy;
-
-	/* Compute per launch parameters */
-	const int xtiles = (width + 7) >> 3;
-	const int ytiles = (height + 7) >> 3;
-	const int gridw_h = roundup(xtiles, xalign) / xalign;
-	const int gridh_h = roundup(ytiles, yalign) / yalign;
-	const int gridw_v = roundup(ytiles, xalign) / xalign;
-	const int gridh_v = roundup(xtiles, yalign) / yalign;
-	const int xblocks = (xtiles + 1) >> 1;
-	const int voffset = compbits_voffset - compbits_hoffset;
-
-	int hprog = -1;
-	int vprog = -1;
-
-	if (l->ops.cde.get_program_numbers)
-		l->ops.cde.get_program_numbers(g, block_height_log2,
-					       l->cde_app.shader_parameter,
-					       &hprog, &vprog);
-	else {
-		nvgpu_warn(g, "cde: chip not supported");
-		return -ENOSYS;
-	}
-
-	if (hprog < 0 || vprog < 0) {
-		nvgpu_warn(g, "cde: could not determine programs");
-		return -ENOSYS;
-	}
-
-	if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
-		nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
-			   xtiles, ytiles);
-
-	nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
-		  width, height, block_height_log2,
-		  compbits_hoffset, compbits_voffset, scatterbuffer_offset);
-	nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
-		  width, height, xtiles, ytiles);
-	nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
-		  wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
-	nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
-		  hprog,
-		  l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
-		  l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
-		  vprog,
-		  l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
-		  l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
-
-	/* Write parameters */
-#define WRITE_PATCH(NAME, VALUE) \
-	params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
-	WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
-	WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
-		block_height_log2);
-	WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
-	WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
-	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
-	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
-	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
-
-	WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
-	WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
-	WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
-	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
-	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
-	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
-
-	WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
-	WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
-	WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
-	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
-	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
-	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
-
-	WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
-		l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
-	WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
-		l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
-	WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
-		l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
-	WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
-		l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
-
-	if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
-		WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
-		WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
-	} else {
-		WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
-		WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
-	}
-
-	if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
-		WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
-		WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
-	} else {
-		WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
-		WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
-			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
-	}
-#undef WRITE_PATCH
-
-	err = gk20a_cde_convert(l, dmabuf,
-				compbits_hoffset,
-				scatterbuffer_offset,
-				fence_in, submit_flags,
-				params, param, &new_fence);
-	if (err)
-		goto out;
-
-	/* compbits generated, update state & fence */
-	gk20a_fence_put(state->fence);
-	state->fence = new_fence;
-	state->valid_compbits |= consumer &
-		(NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
-out:
-	return err;
-}
-
-static int gk20a_buffer_convert_gpu_to_cde(
-		struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
-		u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
-		u64 scatterbuffer_offset,
-		u32 width, u32 height, u32 block_height_log2,
-		u32 submit_flags, struct nvgpu_channel_fence *fence_in,
-		struct gk20a_buffer_state *state)
-{
-	struct gk20a *g = &l->g;
-	int err = 0;
-
-	if (!l->cde_app.initialised)
-		return -ENOSYS;
-
-	nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
-		l->cde_app.firmware_version);
-
-	if (l->cde_app.firmware_version == 1) {
-		err = gk20a_buffer_convert_gpu_to_cde_v1(
-		    l, dmabuf, consumer, offset, compbits_hoffset,
-		    compbits_voffset, scatterbuffer_offset,
-		    width, height, block_height_log2,
-		    submit_flags, fence_in, state);
-	} else {
-		nvgpu_err(g, "unsupported CDE firmware version %d",
-			l->cde_app.firmware_version);
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-int gk20a_prepare_compressible_read(
-		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
-		u64 compbits_hoffset, u64 compbits_voffset,
-		u64 scatterbuffer_offset,
-		u32 width, u32 height, u32 block_height_log2,
-		u32 submit_flags, struct nvgpu_channel_fence *fence,
-		u32 *valid_compbits, u32 *zbc_color,
-		struct gk20a_fence **fence_out)
-{
-	struct gk20a *g = &l->g;
-	int err = 0;
-	struct gk20a_buffer_state *state;
-	struct dma_buf *dmabuf;
-	u32 missing_bits;
-
-	dmabuf = dma_buf_get(buffer_fd);
-	if (IS_ERR(dmabuf))
-		return -EINVAL;
-
-	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
-	if (err) {
-		dma_buf_put(dmabuf);
-		return err;
-	}
-
-	missing_bits = (state->valid_compbits ^ request) & request;
-
-	nvgpu_mutex_acquire(&state->lock);
-
-	if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
-
-		gk20a_fence_put(state->fence);
-		state->fence = NULL;
-		/* state->fence = decompress();
-		state->valid_compbits = 0; */
-		err = -EINVAL;
-		goto out;
-	} else if (missing_bits) {
-		u32 missing_cde_bits = missing_bits &
-			 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
-		if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
-		    missing_cde_bits) {
-			err = gk20a_buffer_convert_gpu_to_cde(
-					l, dmabuf,
-					missing_cde_bits,
-					offset, compbits_hoffset,
-					compbits_voffset, scatterbuffer_offset,
-					width, height, block_height_log2,
-					submit_flags, fence,
-					state);
-			if (err)
-				goto out;
-		}
-	}
-
-	if (state->fence && fence_out)
-		*fence_out = gk20a_fence_get(state->fence);
-
-	if (valid_compbits)
-		*valid_compbits = state->valid_compbits;
-
-	if (zbc_color)
-		*zbc_color = state->zbc_color;
-
-out:
-	nvgpu_mutex_release(&state->lock);
-	dma_buf_put(dmabuf);
-	return err;
-}
-
-int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
-				  u32 valid_compbits, u64 offset, u32 zbc_color)
-{
-	int err;
-	struct gk20a_buffer_state *state;
-	struct dma_buf *dmabuf;
-
-	dmabuf = dma_buf_get(buffer_fd);
-	if (IS_ERR(dmabuf)) {
-		nvgpu_err(g, "invalid dmabuf");
-		return -EINVAL;
-	}
-
-	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
-	if (err) {
-		nvgpu_err(g, "could not get state from dmabuf");
-		dma_buf_put(dmabuf);
-		return err;
-	}
-
-	nvgpu_mutex_acquire(&state->lock);
-
-	/* Update the compbits state. */
-	state->valid_compbits = valid_compbits;
-	state->zbc_color = zbc_color;
-
-	/* Discard previous compbit job fence. */
-	gk20a_fence_put(state->fence);
-	state->fence = NULL;
-
-	nvgpu_mutex_release(&state->lock);
-	dma_buf_put(dmabuf);
-	return 0;
-}
-
-int nvgpu_cde_init_ops(struct nvgpu_os_linux *l)
-{
-	struct gk20a *g = &l->g;
-	u32 ver = g->params.gpu_arch + g->params.gpu_impl;
-
-	switch (ver) {
-	case GK20A_GPUID_GM20B:
-	case GK20A_GPUID_GM20B_B:
-		l->ops.cde = gm20b_cde_ops.cde;
-		break;
-	case NVGPU_GPUID_GP10B:
-		l->ops.cde = gp10b_cde_ops.cde;
-		break;
-	default:
-		/* CDE is optional, so today ignoring unknown chip is fine */
-		break;
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h
deleted file mode 100644
index 5928b624..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * GK20A color decompression engine support
- *
- * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _CDE_GK20A_H_
-#define _CDE_GK20A_H_
-
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/list.h>
-#include <nvgpu/lock.h>
-
-#include <linux/kobject.h>
-#include <linux/workqueue.h>
-
-#define MAX_CDE_BUFS		10
-#define MAX_CDE_PARAMS		64
-#define MAX_CDE_USER_PARAMS	40
-#define MAX_CDE_ARRAY_ENTRIES	9
-
-/*
- * The size of the context ring buffer that is dedicated for handling cde
- * jobs.  Re-using a context (=channel) for a differnt cde job forces a cpu
- * wait on the previous job to that channel, so increasing this value
- * reduces the likelihood of stalls.
- */
-#define NUM_CDE_CONTEXTS	4
-
-struct dma_buf;
-struct device;
-struct nvgpu_os_linux;
-struct gk20a;
-struct gk20a_fence;
-struct nvgpu_channel_fence;
-struct channel_gk20a;
-struct vm_gk20a;
-struct nvgpu_gpfifo_entry;
-
-/*
- * this element defines a buffer that is allocated and mapped into gpu address
- * space. data_byte_offset defines the beginning of the buffer inside the
- * firmare. num_bytes defines how many bytes the firmware contains.
- *
- * If data_byte_offset is zero, we allocate an empty buffer.
- */
-
-struct gk20a_cde_hdr_buf {
-	u64 data_byte_offset;
-	u64 num_bytes;
-};
-
-/*
- * this element defines a constant patching in buffers. It basically
- * computes physical address to <source_buf>+source_byte_offset. The
- * address is then modified into patch value as per:
- *    value = (current_value & ~mask) | (address << shift) & mask .
- *
- * The type field defines the register size as:
- *  0=u32,
- *  1=u64 (little endian),
- *  2=u64 (big endian)
- */
-
-struct gk20a_cde_hdr_replace {
-	u32 target_buf;
-	u32 source_buf;
-	s32 shift;
-	u32 type;
-	u64 target_byte_offset;
-	u64 source_byte_offset;
-	u64 mask;
-};
-
-enum {
-	TYPE_PARAM_TYPE_U32 = 0,
-	TYPE_PARAM_TYPE_U64_LITTLE,
-	TYPE_PARAM_TYPE_U64_BIG
-};
-
-/*
- * this element defines a runtime patching in buffers. Parameters with id from
- * 0 to 1024 are reserved for special usage as follows:
- *   0 = comptags_per_cacheline,
- *   1 = slices_per_fbp,
- *   2 = num_fbps
- *   3 = source buffer first page offset
- *   4 = source buffer block height log2
- *   5 = backing store memory address
- *   6 = destination memory address
- *   7 = destination size (bytes)
- *   8 = backing store size (bytes)
- *   9 = cache line size
- *
- * Parameters above id 1024 are user-specified. I.e. they determine where a
- * parameters from user space should be placed in buffers, what is their
- * type, etc.
- *
- * Once the value is available, we add data_offset to the value.
- *
- * The value address is then modified into patch value as per:
- *    value = (current_value & ~mask) | (address << shift) & mask .
- *
- * The type field defines the register size as:
- *  0=u32,
- *  1=u64 (little endian),
- *  2=u64 (big endian)
- */
-
-struct gk20a_cde_hdr_param {
-	u32 id;
-	u32 target_buf;
-	s32 shift;
-	u32 type;
-	s64 data_offset;
-	u64 target_byte_offset;
-	u64 mask;
-};
-
-enum {
-	TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
-	TYPE_PARAM_GPU_CONFIGURATION,
-	TYPE_PARAM_FIRSTPAGEOFFSET,
-	TYPE_PARAM_NUMPAGES,
-	TYPE_PARAM_BACKINGSTORE,
-	TYPE_PARAM_DESTINATION,
-	TYPE_PARAM_DESTINATION_SIZE,
-	TYPE_PARAM_BACKINGSTORE_SIZE,
-	TYPE_PARAM_SOURCE_SMMU_ADDR,
-	TYPE_PARAM_BACKINGSTORE_BASE_HW,
-	TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
-	TYPE_PARAM_SCATTERBUFFER,
-	TYPE_PARAM_SCATTERBUFFER_SIZE,
-	NUM_RESERVED_PARAMS = 1024,
-};
-
-/*
- * This header element defines a command. The op field determines whether the
- * element is defining an init (0) or convert command (1). data_byte_offset
- * denotes the beginning address of command elements in the file.
- */
-
-struct gk20a_cde_hdr_command {
-	u32 op;
-	u32 num_entries;
-	u64 data_byte_offset;
-};
-
-enum {
-	TYPE_BUF_COMMAND_INIT = 0,
-	TYPE_BUF_COMMAND_CONVERT,
-	TYPE_BUF_COMMAND_NOOP
-};
-
-/*
- * This is a command element defines one entry inside push buffer. target_buf
- * defines the buffer including the pushbuffer entries, target_byte_offset the
- * offset inside the buffer and num_bytes the number of words in the buffer.
- */
-
-struct gk20a_cde_cmd_elem {
-	u32 target_buf;
-	u32 padding;
-	u64 target_byte_offset;
-	u64 num_bytes;
-};
-
-/*
- * This element is used for storing a small array of data.
- */
-
-enum {
-	ARRAY_PROGRAM_OFFSET = 0,
-	ARRAY_REGISTER_COUNT,
-	ARRAY_LAUNCH_COMMAND,
-	NUM_CDE_ARRAYS
-};
-
-struct gk20a_cde_hdr_array {
-	u32 id;
-	u32 data[MAX_CDE_ARRAY_ENTRIES];
-};
-
-/*
- * Following defines a single header element. Each element has a type and
- * some of the data structures.
- */
-
-struct gk20a_cde_hdr_elem {
-	u32 type;
-	u32 padding;
-	union {
-		struct gk20a_cde_hdr_buf buf;
-		struct gk20a_cde_hdr_replace replace;
-		struct gk20a_cde_hdr_param param;
-		u32 required_class;
-		struct gk20a_cde_hdr_command command;
-		struct gk20a_cde_hdr_array array;
-	};
-};
-
-enum {
-	TYPE_BUF = 0,
-	TYPE_REPLACE,
-	TYPE_PARAM,
-	TYPE_REQUIRED_CLASS,
-	TYPE_COMMAND,
-	TYPE_ARRAY
-};
-
-struct gk20a_cde_param {
-	u32 id;
-	u32 padding;
-	u64 value;
-};
-
-struct gk20a_cde_ctx {
-	struct nvgpu_os_linux *l;
-	struct device *dev;
-
-	/* channel related data */
-	struct channel_gk20a *ch;
-	struct tsg_gk20a *tsg;
-	struct vm_gk20a *vm;
-
-	/* buf converter configuration */
-	struct nvgpu_mem mem[MAX_CDE_BUFS];
-	unsigned int num_bufs;
-
-	/* buffer patching params (where should patching be done) */
-	struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
-	unsigned int num_params;
-
-	/* storage for user space parameter values */
-	u32 user_param_values[MAX_CDE_USER_PARAMS];
-
-	u32 surf_param_offset;
-	u32 surf_param_lines;
-	u64 surf_vaddr;
-
-	u64 compbit_vaddr;
-	u64 compbit_size;
-
-	u64 scatterbuffer_vaddr;
-	u64 scatterbuffer_size;
-
-	u64 backing_store_vaddr;
-
-	struct nvgpu_gpfifo_entry *init_convert_cmd;
-	int init_cmd_num_entries;
-
-	struct nvgpu_gpfifo_entry *convert_cmd;
-	int convert_cmd_num_entries;
-
-	struct kobj_attribute attr;
-
-	bool init_cmd_executed;
-
-	struct nvgpu_list_node list;
-	bool is_temporary;
-	bool in_use;
-	struct delayed_work ctx_deleter_work;
-};
-
-static inline struct gk20a_cde_ctx *
-gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
-{
-	return (struct gk20a_cde_ctx *)
-		((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
-};
-
-struct gk20a_cde_app {
-	bool initialised;
-	struct nvgpu_mutex mutex;
-
-	struct nvgpu_list_node free_contexts;
-	struct nvgpu_list_node used_contexts;
-	unsigned int ctx_count;
-	unsigned int ctx_usecount;
-	unsigned int ctx_count_top;
-
-	u32 firmware_version;
-
-	u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
-
-	u32 shader_parameter;
-};
-
-void gk20a_cde_destroy(struct nvgpu_os_linux *l);
-void gk20a_cde_suspend(struct nvgpu_os_linux *l);
-int gk20a_init_cde_support(struct nvgpu_os_linux *l);
-int gk20a_cde_reload(struct nvgpu_os_linux *l);
-int gk20a_cde_convert(struct nvgpu_os_linux *l,
-		struct dma_buf *compbits_buf,
-		u64 compbits_byte_offset,
-		u64 scatterbuffer_byte_offset,
-		struct nvgpu_channel_fence *fence,
-		u32 __flags, struct gk20a_cde_param *params,
-		int num_params, struct gk20a_fence **fence_out);
-
-int gk20a_prepare_compressible_read(
-		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
-		u64 compbits_hoffset, u64 compbits_voffset,
-		u64 scatterbuffer_offset,
-		u32 width, u32 height, u32 block_height_log2,
-		u32 submit_flags, struct nvgpu_channel_fence *fence,
-		u32 *valid_compbits, u32 *zbc_color,
-		struct gk20a_fence **fence_out);
-int gk20a_mark_compressible_write(
-		struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
-		u32 zbc_color);
-int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c b/drivers/gpu/nvgpu/common/linux/cde_gm20b.c
deleted file mode 100644
index 1cd15c54..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * GM20B CDE
- *
- * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "gk20a/gk20a.h"
-#include "cde_gm20b.h"
-
-enum programs {
-	PROG_HPASS              = 0,
-	PROG_VPASS_LARGE        = 1,
-	PROG_VPASS_SMALL        = 2,
-	PROG_HPASS_DEBUG        = 3,
-	PROG_VPASS_LARGE_DEBUG  = 4,
-	PROG_VPASS_SMALL_DEBUG  = 5,
-	PROG_PASSTHROUGH        = 6,
-};
-
-static void gm20b_cde_get_program_numbers(struct gk20a *g,
-					  u32 block_height_log2,
-					  u32 shader_parameter,
-					  int *hprog_out, int *vprog_out)
-{
-	int hprog = PROG_HPASS;
-	int vprog = (block_height_log2 >= 2) ?
-		PROG_VPASS_LARGE : PROG_VPASS_SMALL;
-	if (shader_parameter == 1) {
-		hprog = PROG_PASSTHROUGH;
-		vprog = PROG_PASSTHROUGH;
-	} else if (shader_parameter == 2) {
-		hprog = PROG_HPASS_DEBUG;
-		vprog = (block_height_log2 >= 2) ?
-			PROG_VPASS_LARGE_DEBUG :
-			PROG_VPASS_SMALL_DEBUG;
-	}
-
-	*hprog_out = hprog;
-	*vprog_out = vprog;
-}
-
-struct nvgpu_os_linux_ops gm20b_cde_ops = {
-	.cde = {
-		.get_program_numbers = gm20b_cde_get_program_numbers,
-	},
-};
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h b/drivers/gpu/nvgpu/common/linux/cde_gm20b.h
deleted file mode 100644
index 640d6ab6..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * GM20B CDE
- *
- * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _NVHOST_GM20B_CDE
-#define _NVHOST_GM20B_CDE
-
-#include "os_linux.h"
-
-extern struct nvgpu_os_linux_ops gm20b_cde_ops;
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
deleted file mode 100644
index 5c0e79a7..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * GP10B CDE
- *
- * Copyright (c) 2015-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "gk20a/gk20a.h"
-#include "cde_gp10b.h"
-
-#include <nvgpu/log.h>
-#include <nvgpu/dma.h>
-
-enum gp10b_programs {
-	GP10B_PROG_HPASS              = 0,
-	GP10B_PROG_HPASS_4K           = 1,
-	GP10B_PROG_VPASS              = 2,
-	GP10B_PROG_VPASS_4K           = 3,
-	GP10B_PROG_HPASS_DEBUG        = 4,
-	GP10B_PROG_HPASS_4K_DEBUG     = 5,
-	GP10B_PROG_VPASS_DEBUG        = 6,
-	GP10B_PROG_VPASS_4K_DEBUG     = 7,
-	GP10B_PROG_PASSTHROUGH        = 8,
-};
-
-void gp10b_cde_get_program_numbers(struct gk20a *g,
-					  u32 block_height_log2,
-					  u32 shader_parameter,
-					  int *hprog_out, int *vprog_out)
-{
-	int hprog, vprog;
-
-	if (shader_parameter == 1) {
-		hprog = GP10B_PROG_PASSTHROUGH;
-		vprog = GP10B_PROG_PASSTHROUGH;
-	} else {
-		hprog = GP10B_PROG_HPASS;
-		vprog = GP10B_PROG_VPASS;
-		if (shader_parameter == 2) {
-			hprog = GP10B_PROG_HPASS_DEBUG;
-			vprog = GP10B_PROG_VPASS_DEBUG;
-		}
-		if (!nvgpu_iommuable(g)) {
-			if (!g->mm.disable_bigpage) {
-				nvgpu_warn(g,
-					   "When no IOMMU big pages cannot be used");
-			}
-			hprog |= 1;
-			vprog |= 1;
-		}
-	}
-
-	*hprog_out = hprog;
-	*vprog_out = vprog;
-}
-
-bool gp10b_need_scatter_buffer(struct gk20a *g)
-{
-	return !nvgpu_iommuable(g);
-}
-
-static u8 parity(u32 a)
-{
-	a ^= a>>16u;
-	a ^= a>>8u;
-	a ^= a>>4u;
-	a &= 0xfu;
-	return (0x6996u >> a) & 1u;
-}
-
-int gp10b_populate_scatter_buffer(struct gk20a *g,
-					 struct sg_table *sgt,
-					 size_t surface_size,
-					 void *scatter_buffer_ptr,
-					 size_t scatter_buffer_size)
-{
-	/* map scatter buffer to CPU VA and fill it */
-	const u32 page_size_log2 = 12;
-	const u32 page_size = 1 << page_size_log2;
-	const u32 page_size_shift = page_size_log2 - 7u;
-
-	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
-	const u32 getSliceMaskGP10B = 0x3ffffe48;
-	u8 *scatter_buffer = scatter_buffer_ptr;
-
-	size_t i;
-	struct scatterlist *sg = NULL;
-	u8 d = 0;
-	size_t page = 0;
-	size_t pages_left;
-
-	surface_size = round_up(surface_size, page_size);
-
-	pages_left = surface_size >> page_size_log2;
-	if ((pages_left >> 3) > scatter_buffer_size)
-	    return -ENOMEM;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		unsigned int j;
-		u64 surf_pa = sg_phys(sg);
-		unsigned int n = (int)(sg->length >> page_size_log2);
-
-		nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
-
-		for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
-			u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
-			u8 scatter_bit = parity(addr);
-			u8 bit = page & 7;
-
-			d |= scatter_bit << bit;
-			if (bit == 7) {
-				scatter_buffer[page >> 3] = d;
-				d = 0;
-			}
-
-			++page;
-			--pages_left;
-		}
-
-		if (pages_left == 0)
-			break;
-	}
-
-	/* write the last byte in case the number of pages is not divisible by 8 */
-	if ((page & 7) != 0)
-		scatter_buffer[page >> 3] = d;
-
-	if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
-		nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
-		for (i = 0; i < page >> 3; i++) {
-			nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
-		}
-	}
-
-	return 0;
-}
-
-struct nvgpu_os_linux_ops gp10b_cde_ops = {
-	.cde = {
-		.get_program_numbers = gp10b_cde_get_program_numbers,
-		.need_scatter_buffer = gp10b_need_scatter_buffer,
-		.populate_scatter_buffer = gp10b_populate_scatter_buffer,
-	},
-};
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h b/drivers/gpu/nvgpu/common/linux/cde_gp10b.h
deleted file mode 100644
index 52e9f292..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * GP10B CDE
- *
- * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _NVHOST_GP10B_CDE
-#define _NVHOST_GP10B_CDE
-
-#include "os_linux.h"
-
-extern struct nvgpu_os_linux_ops gp10b_cde_ops;
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
deleted file mode 100644
index 165f33db..00000000
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/types.h>
-
-#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
-
-#include "gk20a/ce2_gk20a.h"
-#include "gk20a/gk20a.h"
-#include "channel.h"
-
-static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
-{
-	/* there is no local memory available,
-	don't allow local memory related CE flags */
-	if (!g->mm.vidmem.size) {
-		launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
-			NVGPU_CE_DST_LOCATION_LOCAL_FB);
-	}
-	return launch_flags;
-}
-
-int gk20a_ce_execute_ops(struct gk20a *g,
-		u32 ce_ctx_id,
-		u64 src_buf,
-		u64 dst_buf,
-		u64 size,
-		unsigned int payload,
-		int launch_flags,
-		int request_operation,
-		u32 submit_flags,
-		struct gk20a_fence **gk20a_fence_out)
-{
-	int ret = -EPERM;
-	struct gk20a_ce_app *ce_app = &g->ce_app;
-	struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
-	bool found = false;
-	u32 *cmd_buf_cpu_va;
-	u64 cmd_buf_gpu_va = 0;
-	u32 methodSize;
-	u32 cmd_buf_read_offset;
-	u32 dma_copy_class;
-	struct nvgpu_gpfifo_entry gpfifo;
-	struct nvgpu_channel_fence fence = {0, 0};
-	struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
-
-	if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
-		goto end;
-
-	nvgpu_mutex_acquire(&ce_app->app_mutex);
-
-	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
-			&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
-		if (ce_ctx->ctx_id == ce_ctx_id) {
-			found = true;
-			break;
-		}
-	}
-
-	nvgpu_mutex_release(&ce_app->app_mutex);
-
-	if (!found) {
-		ret = -EINVAL;
-		goto end;
-	}
-
-	if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
-		ret = -ENODEV;
-		goto end;
-	}
-
-	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
-
-	ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
-
-	cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
-			(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
-
-	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
-
-	if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
-		struct gk20a_fence **prev_post_fence =
-			&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
-
-		ret = gk20a_fence_wait(g, *prev_post_fence,
-				       gk20a_get_gr_idle_timeout(g));
-
-		gk20a_fence_put(*prev_post_fence);
-		*prev_post_fence = NULL;
-		if (ret)
-			goto noop;
-	}
-
-	cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
-
-	dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
-	methodSize = gk20a_ce_prepare_submit(src_buf,
-					dst_buf,
-					size,
-					&cmd_buf_cpu_va[cmd_buf_read_offset],
-					NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
-					payload,
-					gk20a_get_valid_launch_flags(g, launch_flags),
-					request_operation,
-					dma_copy_class);
-
-	if (methodSize) {
-		/* store the element into gpfifo */
-		gpfifo.entry0 =
-			u64_lo32(cmd_buf_gpu_va);
-		gpfifo.entry1 =
-			(u64_hi32(cmd_buf_gpu_va) |
-			pbdma_gp_entry1_length_f(methodSize));
-
-		/* take always the postfence as it is needed for protecting the ce context */
-		submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
-
-		nvgpu_smp_wmb();
-
-		ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
-					1, submit_flags, &fence,
-					&ce_cmd_buf_fence_out, NULL);
-
-		if (!ret) {
-			ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
-				ce_cmd_buf_fence_out;
-			if (gk20a_fence_out) {
-				gk20a_fence_get(ce_cmd_buf_fence_out);
-				*gk20a_fence_out = ce_cmd_buf_fence_out;
-			}
-
-			/* Next available command buffer queue Index */
-			++ce_ctx->cmd_buf_read_queue_offset;
-		}
-	} else {
-		ret = -ENOMEM;
-	}
-noop:
-	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
-end:
-	return ret;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
deleted file mode 100644
index 7810bc21..00000000
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/enabled.h>
-#include <nvgpu/debug.h>
-#include <nvgpu/ltc.h>
-#include <nvgpu/error_notifier.h>
-#include <nvgpu/os_sched.h>
-
-/*
- * This is required for nvgpu_vm_find_buf() which is used in the tracing
- * code. Once we can get and access userspace buffers without requiring
- * direct dma_buf usage this can be removed.
- */
-#include <nvgpu/linux/vm.h>
-
-#include "gk20a/gk20a.h"
-
-#include "channel.h"
-#include "ioctl_channel.h"
-#include "os_linux.h"
-
-#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
-
-#include <linux/uaccess.h>
-#include <linux/dma-buf.h>
-#include <trace/events/gk20a.h>
-#include <uapi/linux/nvgpu.h>
-
-#include "sync_sema_android.h"
-
-u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
-{
-	u32 flags = 0;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
-		flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-		flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
-		flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
-		flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
-		flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
-
-	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
-		flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
-
-	return flags;
-}
-
-/*
- * API to convert error_notifiers in common code and of the form
- * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
- * space and of the form  NVGPU_CHANNEL_*
- */
-static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
-{
-	switch (error_notifier) {
-	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
-		return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
-	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
-		return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
-	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
-		return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
-	case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
-		return NVGPU_CHANNEL_GR_EXCEPTION;
-	case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
-		return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
-	case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
-		return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
-	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
-		return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
-	case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
-		return NVGPU_CHANNEL_PBDMA_ERROR;
-	case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
-		return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
-	case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
-		return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
-	case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
-		return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
-	}
-
-	pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
-
-	return error_notifier;
-}
-
-/**
- * nvgpu_set_error_notifier_locked()
- * Should be called with ch->error_notifier_mutex held
- *
- * error should be of the form  NVGPU_ERR_NOTIFIER_*
- */
-void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	error = nvgpu_error_notifier_to_channel_notifier(error);
-
-	if (priv->error_notifier.dmabuf) {
-		struct nvgpu_notification *notification =
-			priv->error_notifier.notification;
-		struct timespec time_data;
-		u64 nsec;
-
-		getnstimeofday(&time_data);
-		nsec = ((u64)time_data.tv_sec) * 1000000000u +
-				(u64)time_data.tv_nsec;
-		notification->time_stamp.nanoseconds[0] =
-				(u32)nsec;
-		notification->time_stamp.nanoseconds[1] =
-				(u32)(nsec >> 32);
-		notification->info32 = error;
-		notification->status = 0xffff;
-
-		nvgpu_err(ch->g,
-		    "error notifier set to %d for ch %d", error, ch->chid);
-	}
-}
-
-/* error should be of the form  NVGPU_ERR_NOTIFIER_* */
-void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
-	nvgpu_set_error_notifier_locked(ch, error);
-	nvgpu_mutex_release(&priv->error_notifier.mutex);
-}
-
-void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
-	if (priv->error_notifier.dmabuf) {
-		struct nvgpu_notification *notification =
-			priv->error_notifier.notification;
-
-		/* Don't overwrite error flag if it is already set */
-		if (notification->status != 0xffff)
-			nvgpu_set_error_notifier_locked(ch, error);
-	}
-	nvgpu_mutex_release(&priv->error_notifier.mutex);
-}
-
-/* error_notifier should be of the form  NVGPU_ERR_NOTIFIER_* */
-bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-	bool notifier_set = false;
-
-	error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
-
-	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
-	if (priv->error_notifier.dmabuf) {
-		struct nvgpu_notification *notification =
-			priv->error_notifier.notification;
-		u32 err = notification->info32;
-
-		if (err == error_notifier)
-			notifier_set = true;
-	}
-	nvgpu_mutex_release(&priv->error_notifier.mutex);
-
-	return notifier_set;
-}
-
-static void gk20a_channel_update_runcb_fn(struct work_struct *work)
-{
-	struct nvgpu_channel_completion_cb *completion_cb =
-		container_of(work, struct nvgpu_channel_completion_cb, work);
-	struct nvgpu_channel_linux *priv =
-		container_of(completion_cb,
-				struct nvgpu_channel_linux, completion_cb);
-	struct channel_gk20a *ch = priv->ch;
-	void (*fn)(struct channel_gk20a *, void *);
-	void *user_data;
-
-	nvgpu_spinlock_acquire(&completion_cb->lock);
-	fn = completion_cb->fn;
-	user_data = completion_cb->user_data;
-	nvgpu_spinlock_release(&completion_cb->lock);
-
-	if (fn)
-		fn(ch, user_data);
-}
-
-static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	priv->completion_cb.fn = NULL;
-	priv->completion_cb.user_data = NULL;
-	nvgpu_spinlock_init(&priv->completion_cb.lock);
-	INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
-}
-
-static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	nvgpu_spinlock_acquire(&priv->completion_cb.lock);
-	priv->completion_cb.fn = NULL;
-	priv->completion_cb.user_data = NULL;
-	nvgpu_spinlock_release(&priv->completion_cb.lock);
-	cancel_work_sync(&priv->completion_cb.work);
-}
-
-static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	if (priv->completion_cb.fn)
-		schedule_work(&priv->completion_cb.work);
-}
-
-static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	if (priv->completion_cb.fn)
-		cancel_work_sync(&priv->completion_cb.work);
-}
-
-struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
-		void (*update_fn)(struct channel_gk20a *, void *),
-		void *update_fn_data,
-		int runlist_id,
-		bool is_privileged_channel)
-{
-	struct channel_gk20a *ch;
-	struct nvgpu_channel_linux *priv;
-
-	ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel,
-				nvgpu_current_pid(g), nvgpu_current_tid(g));
-
-	if (ch) {
-		priv = ch->os_priv;
-		nvgpu_spinlock_acquire(&priv->completion_cb.lock);
-		priv->completion_cb.fn = update_fn;
-		priv->completion_cb.user_data = update_fn_data;
-		nvgpu_spinlock_release(&priv->completion_cb.lock);
-	}
-
-	return ch;
-}
-
-static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
-{
-}
-
-static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
-{
-	nvgpu_channel_work_completion_clear(ch);
-
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-	gk20a_channel_free_cycle_stats_buffer(ch);
-	gk20a_channel_free_cycle_stats_snapshot(ch);
-#endif
-}
-
-static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv;
-	int err;
-
-	priv = nvgpu_kzalloc(g, sizeof(*priv));
-	if (!priv)
-		return -ENOMEM;
-
-	ch->os_priv = priv;
-	priv->ch = ch;
-
-#ifdef CONFIG_SYNC
-	ch->has_os_fence_framework_support = true;
-#endif
-
-	err = nvgpu_mutex_init(&priv->error_notifier.mutex);
-	if (err) {
-		nvgpu_kfree(g, priv);
-		return err;
-	}
-
-	nvgpu_channel_work_completion_init(ch);
-
-	return 0;
-}
-
-static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	nvgpu_mutex_destroy(&priv->error_notifier.mutex);
-	nvgpu_kfree(g, priv);
-
-	ch->os_priv = NULL;
-
-#ifdef CONFIG_SYNC
-	ch->has_os_fence_framework_support = false;
-#endif
-}
-
-static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch,
-	const char *fmt, ...)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-	struct nvgpu_os_fence_framework *fence_framework;
-	char name[30];
-	va_list args;
-
-	fence_framework = &priv->fence_framework;
-
-	va_start(args, fmt);
-	vsnprintf(name, sizeof(name), fmt, args);
-	va_end(args);
-
-	fence_framework->timeline = gk20a_sync_timeline_create(name);
-
-	if (!fence_framework->timeline)
-		return -EINVAL;
-
-	return 0;
-}
-static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-	struct nvgpu_os_fence_framework *fence_framework;
-
-	fence_framework = &priv->fence_framework;
-
-	gk20a_sync_timeline_signal(fence_framework->timeline);
-}
-
-static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-	struct nvgpu_os_fence_framework *fence_framework;
-
-	fence_framework = &priv->fence_framework;
-
-	gk20a_sync_timeline_destroy(fence_framework->timeline);
-	fence_framework->timeline = NULL;
-}
-
-static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-	struct nvgpu_os_fence_framework *fence_framework;
-
-	fence_framework = &priv->fence_framework;
-
-	return (fence_framework->timeline != NULL);
-}
-
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
-{
-	struct gk20a *g = &l->g;
-	struct fifo_gk20a *f = &g->fifo;
-	int chid;
-	int err;
-
-	for (chid = 0; chid < (int)f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-
-		err = nvgpu_channel_alloc_linux(g, ch);
-		if (err)
-			goto err_clean;
-	}
-
-	g->os_channel.open = nvgpu_channel_open_linux;
-	g->os_channel.close = nvgpu_channel_close_linux;
-	g->os_channel.work_completion_signal =
-		nvgpu_channel_work_completion_signal;
-	g->os_channel.work_completion_cancel_sync =
-		nvgpu_channel_work_completion_cancel_sync;
-
-	g->os_channel.os_fence_framework_inst_exists =
-		nvgpu_channel_fence_framework_exists;
-	g->os_channel.init_os_fence_framework =
-		nvgpu_channel_init_os_fence_framework;
-	g->os_channel.signal_os_fence_framework =
-		nvgpu_channel_signal_os_fence_framework;
-	g->os_channel.destroy_os_fence_framework =
-		nvgpu_channel_destroy_os_fence_framework;
-
-	return 0;
-
-err_clean:
-	for (; chid >= 0; chid--) {
-		struct channel_gk20a *ch = &f->channel[chid];
-
-		nvgpu_channel_free_linux(g, ch);
-	}
-	return err;
-}
-
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
-{
-	struct gk20a *g = &l->g;
-	struct fifo_gk20a *f = &g->fifo;
-	unsigned int chid;
-
-	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-
-		nvgpu_channel_free_linux(g, ch);
-	}
-
-	g->os_channel.os_fence_framework_inst_exists = NULL;
-	g->os_channel.init_os_fence_framework = NULL;
-	g->os_channel.signal_os_fence_framework = NULL;
-	g->os_channel.destroy_os_fence_framework = NULL;
-}
-
-u32 nvgpu_get_gpfifo_entry_size(void)
-{
-	return sizeof(struct nvgpu_gpfifo_entry);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static void trace_write_pushbuffer(struct channel_gk20a *c,
-				   struct nvgpu_gpfifo_entry *g)
-{
-	void *mem = NULL;
-	unsigned int words;
-	u64 offset;
-	struct dma_buf *dmabuf = NULL;
-
-	if (gk20a_debug_trace_cmdbuf) {
-		u64 gpu_va = (u64)g->entry0 |
-			(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
-		int err;
-
-		words = pbdma_gp_entry1_length_v(g->entry1);
-		err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
-		if (!err)
-			mem = dma_buf_vmap(dmabuf);
-	}
-
-	if (mem) {
-		u32 i;
-		/*
-		 * Write in batches of 128 as there seems to be a limit
-		 * of how much you can output to ftrace at once.
-		 */
-		for (i = 0; i < words; i += 128U) {
-			trace_gk20a_push_cmdbuf(
-				c->g->name,
-				0,
-				min(words - i, 128U),
-				offset + i * sizeof(u32),
-				mem);
-		}
-		dma_buf_vunmap(dmabuf, mem);
-	}
-}
-#endif
-
-static void trace_write_pushbuffer_range(struct channel_gk20a *c,
-					 struct nvgpu_gpfifo_entry *g,
-					 struct nvgpu_gpfifo_entry __user *user_gpfifo,
-					 int offset,
-					 int count)
-{
-#ifdef CONFIG_DEBUG_FS
-	u32 size;
-	int i;
-	struct nvgpu_gpfifo_entry *gp;
-	bool gpfifo_allocated = false;
-
-	if (!gk20a_debug_trace_cmdbuf)
-		return;
-
-	if (!g && !user_gpfifo)
-		return;
-
-	if (!g) {
-		size = count * sizeof(struct nvgpu_gpfifo_entry);
-		if (size) {
-			g = nvgpu_big_malloc(c->g, size);
-			if (!g)
-				return;
-
-			if (copy_from_user(g, user_gpfifo, size)) {
-				nvgpu_big_free(c->g, g);
-				return;
-			}
-		}
-		gpfifo_allocated = true;
-	}
-
-	gp = g + offset;
-	for (i = 0; i < count; i++, gp++)
-		trace_write_pushbuffer(c, gp);
-
-	if (gpfifo_allocated)
-		nvgpu_big_free(c->g, g);
-#endif
-}
-
-/*
- * Handle the submit synchronization - pre-fences and post-fences.
- */
-static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
-				      struct nvgpu_channel_fence *fence,
-				      struct channel_gk20a_job *job,
-				      struct priv_cmd_entry **wait_cmd,
-				      struct priv_cmd_entry **incr_cmd,
-				      struct gk20a_fence **post_fence,
-				      bool register_irq,
-				      u32 flags)
-{
-	struct gk20a *g = c->g;
-	bool need_sync_fence = false;
-	bool new_sync_created = false;
-	int wait_fence_fd = -1;
-	int err = 0;
-	bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
-	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
-
-	if (g->aggressive_sync_destroy_thresh) {
-		nvgpu_mutex_acquire(&c->sync_lock);
-		if (!c->sync) {
-			c->sync = gk20a_channel_sync_create(c, false);
-			if (!c->sync) {
-				err = -ENOMEM;
-				nvgpu_mutex_release(&c->sync_lock);
-				goto fail;
-			}
-			new_sync_created = true;
-		}
-		nvgpu_atomic_inc(&c->sync->refcount);
-		nvgpu_mutex_release(&c->sync_lock);
-	}
-
-	if (g->ops.fifo.resetup_ramfc && new_sync_created) {
-		err = g->ops.fifo.resetup_ramfc(c);
-		if (err)
-			goto fail;
-	}
-
-	/*
-	 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
-	 * submission when user requested and the wait hasn't expired.
-	 */
-	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
-		int max_wait_cmds = c->deterministic ? 1 : 0;
-
-		if (!pre_alloc_enabled)
-			job->wait_cmd = nvgpu_kzalloc(g,
-				sizeof(struct priv_cmd_entry));
-
-		if (!job->wait_cmd) {
-			err = -ENOMEM;
-			goto fail;
-		}
-
-		if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
-			wait_fence_fd = fence->id;
-			err = c->sync->wait_fd(c->sync, wait_fence_fd,
-					       job->wait_cmd, max_wait_cmds);
-		} else {
-			err = c->sync->wait_syncpt(c->sync, fence->id,
-						   fence->value,
-						   job->wait_cmd);
-		}
-
-		if (err)
-			goto clean_up_wait_cmd;
-
-		if (job->wait_cmd->valid)
-			*wait_cmd = job->wait_cmd;
-	}
-
-	if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
-	    (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
-		need_sync_fence = true;
-
-	/*
-	 * Always generate an increment at the end of a GPFIFO submission. This
-	 * is used to keep track of method completion for idle railgating. The
-	 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
-	 */
-	job->post_fence = gk20a_alloc_fence(c);
-	if (!job->post_fence) {
-		err = -ENOMEM;
-		goto clean_up_wait_cmd;
-	}
-	if (!pre_alloc_enabled)
-		job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
-
-	if (!job->incr_cmd) {
-		err = -ENOMEM;
-		goto clean_up_post_fence;
-	}
-
-	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
-		err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
-				 job->post_fence, need_wfi, need_sync_fence,
-				 register_irq);
-	else
-		err = c->sync->incr(c->sync, job->incr_cmd,
-				    job->post_fence, need_sync_fence,
-				    register_irq);
-	if (!err) {
-		*incr_cmd = job->incr_cmd;
-		*post_fence = job->post_fence;
-	} else
-		goto clean_up_incr_cmd;
-
-	return 0;
-
-clean_up_incr_cmd:
-	free_priv_cmdbuf(c, job->incr_cmd);
-	if (!pre_alloc_enabled)
-		job->incr_cmd = NULL;
-clean_up_post_fence:
-	gk20a_fence_put(job->post_fence);
-	job->post_fence = NULL;
-clean_up_wait_cmd:
-	free_priv_cmdbuf(c, job->wait_cmd);
-	if (!pre_alloc_enabled)
-		job->wait_cmd = NULL;
-fail:
-	*wait_cmd = NULL;
-	return err;
-}
-
-static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
-		struct priv_cmd_entry *cmd)
-{
-	struct gk20a *g = c->g;
-	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-	struct nvgpu_gpfifo_entry x = {
-		.entry0 = u64_lo32(cmd->gva),
-		.entry1 = u64_hi32(cmd->gva) |
-			pbdma_gp_entry1_length_f(cmd->size)
-	};
-
-	nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
-			&x, sizeof(x));
-
-	if (cmd->mem->aperture == APERTURE_SYSMEM)
-		trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
-				cmd->mem->cpu_va + cmd->off * sizeof(u32));
-
-	c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
-}
-
-/*
- * Copy source gpfifo entries into the gpfifo ring buffer, potentially
- * splitting into two memcpys to handle wrap-around.
- */
-static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
-		struct nvgpu_gpfifo_entry *kern_gpfifo,
-		struct nvgpu_gpfifo_entry __user *user_gpfifo,
-		u32 num_entries)
-{
-	/* byte offsets */
-	u32 gpfifo_size =
-		c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
-	u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
-	u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
-	u32 end = start + len; /* exclusive */
-	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-	struct nvgpu_gpfifo_entry *cpu_src;
-	int err;
-
-	if (user_gpfifo && !c->gpfifo.pipe) {
-		/*
-		 * This path (from userspace to sysmem) is special in order to
-		 * avoid two copies unnecessarily (from user to pipe, then from
-		 * pipe to gpu sysmem buffer).
-		 */
-		if (end > gpfifo_size) {
-			/* wrap-around */
-			int length0 = gpfifo_size - start;
-			int length1 = len - length0;
-			void __user *user2 = (u8 __user *)user_gpfifo + length0;
-
-			err = copy_from_user(gpfifo_mem->cpu_va + start,
-					user_gpfifo, length0);
-			if (err)
-				return err;
-
-			err = copy_from_user(gpfifo_mem->cpu_va,
-					user2, length1);
-			if (err)
-				return err;
-		} else {
-			err = copy_from_user(gpfifo_mem->cpu_va + start,
-					user_gpfifo, len);
-			if (err)
-				return err;
-		}
-
-		trace_write_pushbuffer_range(c, NULL, user_gpfifo,
-				0, num_entries);
-		goto out;
-	} else if (user_gpfifo) {
-		/* from userspace to vidmem, use the common copy path below */
-		err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
-		if (err)
-			return err;
-
-		cpu_src = c->gpfifo.pipe;
-	} else {
-		/* from kernel to either sysmem or vidmem, don't need
-		 * copy_from_user so use the common path below */
-		cpu_src = kern_gpfifo;
-	}
-
-	if (end > gpfifo_size) {
-		/* wrap-around */
-		int length0 = gpfifo_size - start;
-		int length1 = len - length0;
-		void *src2 = (u8 *)cpu_src + length0;
-
-		nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
-		nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
-	} else {
-		nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
-
-	}
-
-	trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
-
-out:
-	c->gpfifo.put = (c->gpfifo.put + num_entries) &
-		(c->gpfifo.entry_num - 1);
-
-	return 0;
-}
-
-int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
-				struct nvgpu_gpfifo_entry *gpfifo,
-				struct nvgpu_submit_gpfifo_args *args,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out,
-				struct fifo_profile_gk20a *profile)
-{
-	struct gk20a *g = c->g;
-	struct priv_cmd_entry *wait_cmd = NULL;
-	struct priv_cmd_entry *incr_cmd = NULL;
-	struct gk20a_fence *post_fence = NULL;
-	struct channel_gk20a_job *job = NULL;
-	/* we might need two extra gpfifo entries - one for pre fence
-	 * and one for post fence. */
-	const int extra_entries = 2;
-	bool skip_buffer_refcounting = (flags &
-			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
-	int err = 0;
-	bool need_job_tracking;
-	bool need_deferred_cleanup = false;
-	struct nvgpu_gpfifo_entry __user *user_gpfifo = args ?
-		(struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL;
-
-	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
-		return -ENODEV;
-
-	if (c->has_timedout)
-		return -ETIMEDOUT;
-
-	if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
-		return -ENOMEM;
-
-	/* fifo not large enough for request. Return error immediately.
-	 * Kernel can insert gpfifo entries before and after user gpfifos.
-	 * So, add extra_entries in user request. Also, HW with fifo size N
-	 * can accept only N-1 entreis and so the below condition */
-	if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
-		nvgpu_err(g, "not enough gpfifo space allocated");
-		return -ENOMEM;
-	}
-
-	if (!gpfifo && !args)
-		return -EINVAL;
-
-	if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
-		      NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
-	    !fence)
-		return -EINVAL;
-
-	/* an address space needs to have been bound at this point. */
-	if (!gk20a_channel_as_bound(c)) {
-		nvgpu_err(g,
-			    "not bound to an address space at time of gpfifo"
-			    " submission.");
-		return -EINVAL;
-	}
-
-	gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
-
-	/* update debug settings */
-	nvgpu_ltc_sync_enabled(g);
-
-	nvgpu_log_info(g, "channel %d", c->chid);
-
-	/*
-	 * Job tracking is necessary for any of the following conditions:
-	 *  - pre- or post-fence functionality
-	 *  - channel wdt
-	 *  - GPU rail-gating with non-deterministic channels
-	 *  - buffer refcounting
-	 *
-	 * If none of the conditions are met, then job tracking is not
-	 * required and a fast submit can be done (ie. only need to write
-	 * out userspace GPFIFO entries and update GP_PUT).
-	 */
-	need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
-			(flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
-			c->timeout.enabled ||
-			(g->can_railgate && !c->deterministic) ||
-			!skip_buffer_refcounting;
-
-	if (need_job_tracking) {
-		bool need_sync_framework = false;
-
-		/*
-		 * If the channel is to have deterministic latency and
-		 * job tracking is required, the channel must have
-		 * pre-allocated resources. Otherwise, we fail the submit here
-		 */
-		if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
-			return -EINVAL;
-
-		need_sync_framework =
-			gk20a_channel_sync_needs_sync_framework(g) ||
-			(flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
-			 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
-
-		/*
-		 * Deferred clean-up is necessary for any of the following
-		 * conditions:
-		 * - channel's deterministic flag is not set
-		 * - dependency on sync framework, which could make the
-		 *   behavior of the clean-up operation non-deterministic
-		 *   (should not be performed in the submit path)
-		 * - channel wdt
-		 * - GPU rail-gating with non-deterministic channels
-		 * - buffer refcounting
-		 *
-		 * If none of the conditions are met, then deferred clean-up
-		 * is not required, and we clean-up one job-tracking
-		 * resource in the submit path.
-		 */
-		need_deferred_cleanup = !c->deterministic ||
-					need_sync_framework ||
-					c->timeout.enabled ||
-					(g->can_railgate &&
-					 !c->deterministic) ||
-					!skip_buffer_refcounting;
-
-		/*
-		 * For deterministic channels, we don't allow deferred clean_up
-		 * processing to occur. In cases we hit this, we fail the submit
-		 */
-		if (c->deterministic && need_deferred_cleanup)
-			return -EINVAL;
-
-		if (!c->deterministic) {
-			/*
-			 * Get a power ref unless this is a deterministic
-			 * channel that holds them during the channel lifetime.
-			 * This one is released by gk20a_channel_clean_up_jobs,
-			 * via syncpt or sema interrupt, whichever is used.
-			 */
-			err = gk20a_busy(g);
-			if (err) {
-				nvgpu_err(g,
-					"failed to host gk20a to submit gpfifo, process %s",
-					current->comm);
-				return err;
-			}
-		}
-
-		if (!need_deferred_cleanup) {
-			/* clean up a single job */
-			gk20a_channel_clean_up_jobs(c, false);
-		}
-	}
-
-
-	/* Grab access to HW to deal with do_idle */
-	if (c->deterministic)
-		nvgpu_rwsem_down_read(&g->deterministic_busy);
-
-	if (c->deterministic && c->deterministic_railgate_allowed) {
-		/*
-		 * Nope - this channel has dropped its own power ref. As
-		 * deterministic submits don't hold power on per each submitted
-		 * job like normal ones do, the GPU might railgate any time now
-		 * and thus submit is disallowed.
-		 */
-		err = -EINVAL;
-		goto clean_up;
-	}
-
-	trace_gk20a_channel_submit_gpfifo(g->name,
-					  c->chid,
-					  num_entries,
-					  flags,
-					  fence ? fence->id : 0,
-					  fence ? fence->value : 0);
-
-	nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
-		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
-
-	/*
-	 * Make sure we have enough space for gpfifo entries. Check cached
-	 * values first and then read from HW. If no space, return EAGAIN
-	 * and let userpace decide to re-try request or not.
-	 */
-	if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
-		if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
-			err = -EAGAIN;
-			goto clean_up;
-		}
-	}
-
-	if (c->has_timedout) {
-		err = -ETIMEDOUT;
-		goto clean_up;
-	}
-
-	if (need_job_tracking) {
-		err = channel_gk20a_alloc_job(c, &job);
-		if (err)
-			goto clean_up;
-
-		err = gk20a_submit_prepare_syncs(c, fence, job,
-						 &wait_cmd, &incr_cmd,
-						 &post_fence,
-						 need_deferred_cleanup,
-						 flags);
-		if (err)
-			goto clean_up_job;
-	}
-
-	gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
-
-	if (wait_cmd)
-		gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
-
-	if (gpfifo || user_gpfifo)
-		err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
-				num_entries);
-	if (err)
-		goto clean_up_job;
-
-	/*
-	 * And here's where we add the incr_cmd we generated earlier. It should
-	 * always run!
-	 */
-	if (incr_cmd)
-		gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
-
-	if (fence_out)
-		*fence_out = gk20a_fence_get(post_fence);
-
-	if (need_job_tracking)
-		/* TODO! Check for errors... */
-		gk20a_channel_add_job(c, job, skip_buffer_refcounting);
-	gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
-
-	g->ops.fifo.userd_gp_put(g, c);
-
-	/* No hw access beyond this point */
-	if (c->deterministic)
-		nvgpu_rwsem_up_read(&g->deterministic_busy);
-
-	trace_gk20a_channel_submitted_gpfifo(g->name,
-				c->chid,
-				num_entries,
-				flags,
-				post_fence ? post_fence->syncpt_id : 0,
-				post_fence ? post_fence->syncpt_value : 0);
-
-	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
-		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
-
-	gk20a_fifo_profile_snapshot(profile, PROFILE_END);
-
-	nvgpu_log_fn(g, "done");
-	return err;
-
-clean_up_job:
-	channel_gk20a_free_job(c, job);
-clean_up:
-	nvgpu_log_fn(g, "fail");
-	gk20a_fence_put(post_fence);
-	if (c->deterministic)
-		nvgpu_rwsem_up_read(&g->deterministic_busy);
-	else if (need_deferred_cleanup)
-		gk20a_idle(g);
-
-	return err;
-}
-
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
deleted file mode 100644
index 4a58b10c..00000000
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __NVGPU_CHANNEL_H__
-#define __NVGPU_CHANNEL_H__
-
-#include <linux/workqueue.h>
-#include <linux/dma-buf.h>
-
-#include <nvgpu/types.h>
-
-struct channel_gk20a;
-struct nvgpu_gpfifo;
-struct nvgpu_submit_gpfifo_args;
-struct nvgpu_channel_fence;
-struct gk20a_fence;
-struct fifo_profile_gk20a;
-struct nvgpu_os_linux;
-
-struct sync_fence;
-struct sync_timeline;
-
-struct nvgpu_channel_completion_cb {
-	/*
-	 * Signal channel owner via a callback, if set, in job cleanup with
-	 * schedule_work. Means that something finished on the channel (perhaps
-	 * more than one job).
-	 */
-	void (*fn)(struct channel_gk20a *, void *);
-	void *user_data;
-	/* Make access to the two above atomic */
-	struct nvgpu_spinlock lock;
-	/* Per-channel async work task, cannot reschedule itself */
-	struct work_struct work;
-};
-
-struct nvgpu_error_notifier {
-	struct dma_buf *dmabuf;
-	void *vaddr;
-
-	struct nvgpu_notification *notification;
-
-	struct nvgpu_mutex mutex;
-};
-
-/*
- * This struct contains fence_related data.
- * e.g. sync_timeline for sync_fences.
- */
-struct nvgpu_os_fence_framework {
-	struct sync_timeline *timeline;
-};
-
-struct nvgpu_channel_linux {
-	struct channel_gk20a *ch;
-
-	struct nvgpu_os_fence_framework fence_framework;
-
-	struct nvgpu_channel_completion_cb completion_cb;
-	struct nvgpu_error_notifier error_notifier;
-
-	struct dma_buf *cyclestate_buffer_handler;
-};
-
-u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
-
-struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
-		void (*update_fn)(struct channel_gk20a *, void *),
-		void *update_fn_data,
-		int runlist_id,
-		bool is_privileged_channel);
-
-int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
-				struct nvgpu_gpfifo_entry *gpfifo,
-				struct nvgpu_submit_gpfifo_args *args,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out,
-				struct fifo_profile_gk20a *profile);
-
-#endif /* __NVGPU_CHANNEL_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/clk.c b/drivers/gpu/nvgpu/common/linux/clk.c
deleted file mode 100644
index 414b17c4..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Linux clock support
- *
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/clk.h>
-
-#include <soc/tegra/tegra-dvfs.h>
-#include <soc/tegra/tegra-bpmp-dvfs.h>
-
-#include "clk.h"
-#include "os_linux.h"
-#include "platform_gk20a.h"
-
-#include "gk20a/gk20a.h"
-
-static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
-	unsigned long ret;
-
-	switch (api_domain) {
-	case CTRL_CLK_DOMAIN_GPCCLK:
-		if (g->clk.tegra_clk)
-			ret = g->clk.cached_rate ?
-				g->clk.cached_rate :
-				clk_get_rate(g->clk.tegra_clk);
-		else
-			ret = platform->cached_rate ?
-				platform->cached_rate :
-				clk_get_rate(platform->clk[0]);
-		break;
-	case CTRL_CLK_DOMAIN_PWRCLK:
-		ret = clk_get_rate(platform->clk[1]);
-		break;
-	default:
-		nvgpu_err(g, "unknown clock: %u", api_domain);
-		ret = 0;
-		break;
-	}
-
-	return ret;
-}
-
-static int nvgpu_linux_clk_set_rate(struct gk20a *g,
-				     u32 api_domain, unsigned long rate)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
-	int ret;
-
-	switch (api_domain) {
-	case CTRL_CLK_DOMAIN_GPCCLK:
-		if (g->clk.tegra_clk) {
-			ret = clk_set_rate(g->clk.tegra_clk, rate);
-			if (!ret)
-				g->clk.cached_rate = rate;
-		} else {
-			ret = clk_set_rate(platform->clk[0], rate);
-			if (!ret)
-				platform->cached_rate = rate;
-		}
-		break;
-	case CTRL_CLK_DOMAIN_PWRCLK:
-		ret = clk_set_rate(platform->clk[1], rate);
-		break;
-	default:
-		nvgpu_err(g, "unknown clock: %u", api_domain);
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
-
-	/*
-	 * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
-	 * to frequency governor is a shared user on the gbus. The latter can be
-	 * accessed as GPU clock parent, and incorporate DVFS related data.
-	 */
-	if (g->clk.tegra_clk)
-		return tegra_dvfs_get_fmax_at_vmin_safe_t(
-			clk_get_parent(g->clk.tegra_clk));
-
-	if (platform->maxmin_clk_id)
-		return tegra_bpmp_dvfs_get_fmax_at_vmin(
-			platform->maxmin_clk_id);
-
-	return 0;
-}
-
-static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
-{
-	struct clk *c;
-
-	c = clk_get_sys("gpu_ref", "gpu_ref");
-	if (IS_ERR(c)) {
-		nvgpu_err(g, "failed to get GPCPLL reference clock");
-		return 0;
-	}
-
-	return clk_get_rate(c);
-}
-
-static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
-	unsigned long rate)
-{
-	return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
-				clk_get_parent(clk->tegra_clk), rate);
-}
-
-static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
-{
-	int ret;
-
-	switch (api_domain) {
-	case CTRL_CLK_DOMAIN_GPCCLK:
-		ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk));
-		break;
-	default:
-		nvgpu_err(g, "unknown clock: %u", api_domain);
-		ret = 0;
-		break;
-	}
-
-	return ret;
-}
-
-static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
-{
-	return clk_prepare_enable(clk->tegra_clk);
-}
-
-static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
-{
-	clk_disable_unprepare(clk->tegra_clk);
-}
-
-void nvgpu_linux_init_clk_support(struct gk20a *g)
-{
-	g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
-	g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
-	g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
-	g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
-	g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
-	g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
-	g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
-	g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/clk.h b/drivers/gpu/nvgpu/common/linux/clk.h
deleted file mode 100644
index 614a7fd7..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef NVGPU_COMMON_LINUX_CLK_H
-
-struct gk20a;
-void nvgpu_linux_init_clk_support(struct gk20a *g);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c
deleted file mode 100644
index 353f6363..00000000
--- a/drivers/gpu/nvgpu/common/linux/comptags.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-* Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/dma-buf.h>
-
-#include <nvgpu/comptags.h>
-
-#include <nvgpu/linux/vm.h>
-
-#include "gk20a/gk20a.h"
-#include "dmabuf.h"
-
-void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
-			struct gk20a_comptags *comptags)
-{
-	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
-							     buf->dev);
-
-	if (!comptags)
-		return;
-
-	if (!priv) {
-		memset(comptags, 0, sizeof(*comptags));
-		return;
-	}
-
-	nvgpu_mutex_acquire(&priv->lock);
-	*comptags = priv->comptags;
-	nvgpu_mutex_release(&priv->lock);
-}
-
-int gk20a_alloc_or_get_comptags(struct gk20a *g,
-				struct nvgpu_os_buffer *buf,
-				struct gk20a_comptag_allocator *allocator,
-				struct gk20a_comptags *comptags)
-{
-	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
-							     buf->dev);
-	u32 offset;
-	int err;
-	unsigned int ctag_granularity;
-	u32 lines;
-
-	if (!priv)
-		return -ENOSYS;
-
-	nvgpu_mutex_acquire(&priv->lock);
-
-	if (priv->comptags.allocated) {
-		/*
-		 * already allocated
-		 */
-		*comptags = priv->comptags;
-
-		err = 0;
-		goto exit_locked;
-	}
-
-	ctag_granularity = g->ops.fb.compression_page_size(g);
-	lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
-
-	/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
-	if (lines < 1) {
-		err = -EINVAL;
-		goto exit_locked;
-	}
-
-	/* store the allocator so we can use it when we free the ctags */
-	priv->comptag_allocator = allocator;
-	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
-	if (!err) {
-		priv->comptags.offset = offset;
-		priv->comptags.lines = lines;
-		priv->comptags.needs_clear = true;
-	} else {
-		priv->comptags.offset = 0;
-		priv->comptags.lines = 0;
-		priv->comptags.needs_clear = false;
-	}
-
-	/*
-	 * We don't report an error here if comptag alloc failed. The
-	 * caller will simply fallback to incompressible kinds. It
-	 * would not be safe to re-allocate comptags anyways on
-	 * successive calls, as that would break map aliasing.
-	 */
-	err = 0;
-	priv->comptags.allocated = true;
-
-	*comptags = priv->comptags;
-
-exit_locked:
-	nvgpu_mutex_release(&priv->lock);
-
-	return err;
-}
-
-bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
-{
-	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
-							     buf->dev);
-	bool clear_started = false;
-
-	if (priv) {
-		nvgpu_mutex_acquire(&priv->lock);
-
-		clear_started = priv->comptags.needs_clear;
-
-		if (!clear_started)
-			nvgpu_mutex_release(&priv->lock);
-	}
-
-	return clear_started;
-}
-
-void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
-				 bool clear_successful)
-{
-	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
-							     buf->dev);
-	if (priv) {
-		if (clear_successful)
-			priv->comptags.needs_clear = false;
-
-		nvgpu_mutex_release(&priv->lock);
-	}
-}
diff --git a/drivers/gpu/nvgpu/common/linux/cond.c b/drivers/gpu/nvgpu/common/linux/cond.c
deleted file mode 100644
index 633c34fd..00000000
--- a/drivers/gpu/nvgpu/common/linux/cond.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-
-#include <nvgpu/cond.h>
-
-int nvgpu_cond_init(struct nvgpu_cond *cond)
-{
-	init_waitqueue_head(&cond->wq);
-	cond->initialized = true;
-
-	return 0;
-}
-
-void nvgpu_cond_destroy(struct nvgpu_cond *cond)
-{
-	cond->initialized = false;
-}
-
-int nvgpu_cond_signal(struct nvgpu_cond *cond)
-{
-	if (!cond->initialized)
-		return -EINVAL;
-
-	wake_up(&cond->wq);
-
-	return 0;
-}
-
-int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
-{
-	if (!cond->initialized)
-		return -EINVAL;
-
-	wake_up_interruptible(&cond->wq);
-
-	return 0;
-}
-
-int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
-{
-	if (!cond->initialized)
-		return -EINVAL;
-
-	wake_up_all(&cond->wq);
-
-	return 0;
-}
-
-int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
-{
-	if (!cond->initialized)
-		return -EINVAL;
-
-	wake_up_interruptible_all(&cond->wq);
-
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
deleted file mode 100644
index a335988a..00000000
--- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/wait.h>
-#include <linux/ktime.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <trace/events/gk20a.h>
-#include <uapi/linux/nvgpu.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/gr_gk20a.h"
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/barrier.h>
-
-#include "platform_gk20a.h"
-#include "os_linux.h"
-#include "ctxsw_trace.h"
-
-#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-
-#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE	(128*PAGE_SIZE)
-
-/* Userland-facing FIFO (one global + eventually one per VM) */
-struct gk20a_ctxsw_dev {
-	struct gk20a *g;
-
-	struct nvgpu_ctxsw_ring_header *hdr;
-	struct nvgpu_ctxsw_trace_entry *ents;
-	struct nvgpu_ctxsw_trace_filter filter;
-	bool write_enabled;
-	struct nvgpu_cond readout_wq;
-	size_t size;
-	u32 num_ents;
-
-	nvgpu_atomic_t vma_ref;
-
-	struct nvgpu_mutex write_lock;
-};
-
-
-struct gk20a_ctxsw_trace {
-	struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
-};
-
-static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
-{
-	return (hdr->write_idx == hdr->read_idx);
-}
-
-static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
-{
-	return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
-}
-
-static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
-{
-	return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
-}
-
-ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
-	loff_t *off)
-{
-	struct gk20a_ctxsw_dev *dev = filp->private_data;
-	struct gk20a *g = dev->g;
-	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-	struct nvgpu_ctxsw_trace_entry __user *entry =
-		(struct nvgpu_ctxsw_trace_entry *) buf;
-	size_t copied = 0;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
-		"filp=%p buf=%p size=%zu", filp, buf, size);
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-	while (ring_is_empty(hdr)) {
-		nvgpu_mutex_release(&dev->write_lock);
-		if (filp->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
-			!ring_is_empty(hdr), 0);
-		if (err)
-			return err;
-		nvgpu_mutex_acquire(&dev->write_lock);
-	}
-
-	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
-		if (ring_is_empty(hdr))
-			break;
-
-		if (copy_to_user(entry, &dev->ents[hdr->read_idx],
-			sizeof(*entry))) {
-			nvgpu_mutex_release(&dev->write_lock);
-			return -EFAULT;
-		}
-
-		hdr->read_idx++;
-		if (hdr->read_idx >= hdr->num_ents)
-			hdr->read_idx = 0;
-
-		entry++;
-		copied += sizeof(*entry);
-		size -= sizeof(*entry);
-	}
-
-	nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
-		hdr->read_idx);
-
-	*off = hdr->read_idx;
-	nvgpu_mutex_release(&dev->write_lock);
-
-	return copied;
-}
-
-static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
-{
-	struct gk20a *g = dev->g;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
-	nvgpu_mutex_acquire(&dev->write_lock);
-	dev->write_enabled = true;
-	nvgpu_mutex_release(&dev->write_lock);
-	dev->g->ops.fecs_trace.enable(dev->g);
-	return 0;
-}
-
-static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
-{
-	struct gk20a *g = dev->g;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
-	dev->g->ops.fecs_trace.disable(dev->g);
-	nvgpu_mutex_acquire(&dev->write_lock);
-	dev->write_enabled = false;
-	nvgpu_mutex_release(&dev->write_lock);
-	return 0;
-}
-
-static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
-					size_t size)
-{
-	struct gk20a *g = dev->g;
-	void *buf;
-	int err;
-
-	if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
-		return -EBUSY;
-
-	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
-	if (err)
-		return err;
-
-
-	dev->hdr = buf;
-	dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
-	dev->size = size;
-	dev->num_ents = dev->hdr->num_ents;
-
-	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
-		dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
-	return 0;
-}
-
-int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
-		void **buf, size_t *size)
-{
-	struct nvgpu_ctxsw_ring_header *hdr;
-
-	*size = roundup(*size, PAGE_SIZE);
-	hdr = vmalloc_user(*size);
-	if (!hdr)
-		return -ENOMEM;
-
-	hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
-	hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
-	hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
-		/ sizeof(struct nvgpu_ctxsw_trace_entry);
-	hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
-	hdr->drop_count = 0;
-	hdr->read_idx = 0;
-	hdr->write_idx = 0;
-	hdr->write_seqno = 0;
-
-	*buf = hdr;
-	return 0;
-}
-
-int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
-{
-	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
-
-	nvgpu_vfree(g, dev->hdr);
-	return 0;
-}
-
-static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
-	struct nvgpu_ctxsw_ring_setup_args *args)
-{
-	struct gk20a *g = dev->g;
-	size_t size = args->size;
-	int ret;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
-
-	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
-		return -EINVAL;
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-	nvgpu_mutex_release(&dev->write_lock);
-
-	return ret;
-}
-
-static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
-	struct nvgpu_ctxsw_trace_filter_args *args)
-{
-	struct gk20a *g = dev->g;
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-	dev->filter = args->filter;
-	nvgpu_mutex_release(&dev->write_lock);
-
-	if (g->ops.fecs_trace.set_filter)
-		g->ops.fecs_trace.set_filter(g, &dev->filter);
-	return 0;
-}
-
-static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
-	struct nvgpu_ctxsw_trace_filter_args *args)
-{
-	nvgpu_mutex_acquire(&dev->write_lock);
-	args->filter = dev->filter;
-	nvgpu_mutex_release(&dev->write_lock);
-
-	return 0;
-}
-
-static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
-{
-	struct gk20a *g = dev->g;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (g->ops.fecs_trace.flush)
-		err = g->ops.fecs_trace.flush(g);
-
-	if (likely(!err))
-		err = g->ops.fecs_trace.poll(g);
-
-	gk20a_idle(g);
-	return err;
-}
-
-int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l;
-	struct gk20a *g;
-	struct gk20a_ctxsw_trace *trace;
-	struct gk20a_ctxsw_dev *dev;
-	int err;
-	size_t size;
-	u32 n;
-
-	/* only one VM for now */
-	const int vmid = 0;
-
-	l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
-	g = gk20a_get(&l->g);
-	if (!g)
-		return -ENODEV;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto free_ref;
-	}
-
-	err = gk20a_busy(g);
-	if (err)
-		goto free_ref;
-
-	trace = g->ctxsw_trace;
-	if (!trace) {
-		err = -ENODEV;
-		goto idle;
-	}
-
-	/* Allow only one user for this device */
-	dev = &trace->devs[vmid];
-	nvgpu_mutex_acquire(&dev->write_lock);
-	if (dev->hdr) {
-		err = -EBUSY;
-		goto done;
-	}
-
-	/* By default, allocate ring buffer big enough to accommodate
-	 * FECS records with default event filter */
-
-	/* enable all traces by default */
-	NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
-
-	/* compute max number of entries generated with this filter */
-	n = g->ops.fecs_trace.max_entries(g, &dev->filter);
-
-	size = sizeof(struct nvgpu_ctxsw_ring_header) +
-			n * sizeof(struct nvgpu_ctxsw_trace_entry);
-	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
-		size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
-
-	err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
-	if (!err) {
-		filp->private_data = dev;
-		nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
-			filp, dev, size);
-	}
-
-done:
-	nvgpu_mutex_release(&dev->write_lock);
-
-idle:
-	gk20a_idle(g);
-free_ref:
-	if (err)
-		gk20a_put(g);
-	return err;
-}
-
-int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
-{
-	struct gk20a_ctxsw_dev *dev = filp->private_data;
-	struct gk20a *g = dev->g;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
-
-	g->ops.fecs_trace.disable(g);
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-	dev->write_enabled = false;
-	nvgpu_mutex_release(&dev->write_lock);
-
-	if (dev->hdr) {
-		dev->g->ops.fecs_trace.free_user_buffer(dev->g);
-		dev->hdr = NULL;
-	}
-	gk20a_put(g);
-	return 0;
-}
-
-long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
-	unsigned long arg)
-{
-	struct gk20a_ctxsw_dev *dev = filp->private_data;
-	struct gk20a *g = dev->g;
-	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
-		(_IOC_NR(cmd) == 0) ||
-		(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
-		(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	switch (cmd) {
-	case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
-		err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
-		break;
-	case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
-		err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
-		break;
-	case NVGPU_CTXSW_IOCTL_RING_SETUP:
-		err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
-			(struct nvgpu_ctxsw_ring_setup_args *) buf);
-		break;
-	case NVGPU_CTXSW_IOCTL_SET_FILTER:
-		err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
-			(struct nvgpu_ctxsw_trace_filter_args *) buf);
-		break;
-	case NVGPU_CTXSW_IOCTL_GET_FILTER:
-		err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
-			(struct nvgpu_ctxsw_trace_filter_args *) buf);
-		break;
-	case NVGPU_CTXSW_IOCTL_POLL:
-		err = gk20a_ctxsw_dev_ioctl_poll(dev);
-		break;
-	default:
-		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
-			cmd);
-		err = -ENOTTY;
-	}
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
-
-	return err;
-}
-
-unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
-{
-	struct gk20a_ctxsw_dev *dev = filp->private_data;
-	struct gk20a *g = dev->g;
-	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
-	unsigned int mask = 0;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-	poll_wait(filp, &dev->readout_wq.wq, wait);
-	if (!ring_is_empty(hdr))
-		mask |= POLLIN | POLLRDNORM;
-	nvgpu_mutex_release(&dev->write_lock);
-
-	return mask;
-}
-
-static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
-{
-	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-	struct gk20a *g = dev->g;
-
-	nvgpu_atomic_inc(&dev->vma_ref);
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-		nvgpu_atomic_read(&dev->vma_ref));
-}
-
-static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
-{
-	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-	struct gk20a *g = dev->g;
-
-	nvgpu_atomic_dec(&dev->vma_ref);
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-		nvgpu_atomic_read(&dev->vma_ref));
-}
-
-static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
-	.open = gk20a_ctxsw_dev_vma_open,
-	.close = gk20a_ctxsw_dev_vma_close,
-};
-
-int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
-				struct vm_area_struct *vma)
-{
-	return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
-}
-
-int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct gk20a_ctxsw_dev *dev = filp->private_data;
-	struct gk20a *g = dev->g;
-	int ret;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
-		vma->vm_start, vma->vm_end);
-
-	ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
-	if (likely(!ret)) {
-		vma->vm_private_data = dev;
-		vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
-		vma->vm_ops->open(vma);
-	}
-
-	return ret;
-}
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-static int gk20a_ctxsw_init_devs(struct gk20a *g)
-{
-	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-	struct gk20a_ctxsw_dev *dev = trace->devs;
-	int err;
-	int i;
-
-	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-		dev->g = g;
-		dev->hdr = NULL;
-		dev->write_enabled = false;
-		nvgpu_cond_init(&dev->readout_wq);
-		err = nvgpu_mutex_init(&dev->write_lock);
-		if (err)
-			return err;
-		nvgpu_atomic_set(&dev->vma_ref, 0);
-		dev++;
-	}
-	return 0;
-}
-#endif
-
-int gk20a_ctxsw_trace_init(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
-
-	/* if tracing is not supported, skip this */
-	if (!g->ops.fecs_trace.init)
-		return 0;
-
-	if (likely(trace))
-		return 0;
-
-	trace = nvgpu_kzalloc(g, sizeof(*trace));
-	if (unlikely(!trace))
-		return -ENOMEM;
-	g->ctxsw_trace = trace;
-
-	err = gk20a_ctxsw_init_devs(g);
-	if (err)
-		goto fail;
-
-	err = g->ops.fecs_trace.init(g);
-	if (unlikely(err))
-		goto fail;
-
-	return 0;
-
-fail:
-	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
-	nvgpu_kfree(g, trace);
-	g->ctxsw_trace = NULL;
-	return err;
-#else
-	return 0;
-#endif
-}
-
-void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	struct gk20a_ctxsw_trace *trace;
-	struct gk20a_ctxsw_dev *dev;
-	int i;
-
-	if (!g->ctxsw_trace)
-		return;
-
-	trace = g->ctxsw_trace;
-	dev = trace->devs;
-
-	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
-		nvgpu_mutex_destroy(&dev->write_lock);
-		dev++;
-	}
-
-	nvgpu_kfree(g, g->ctxsw_trace);
-	g->ctxsw_trace = NULL;
-
-	g->ops.fecs_trace.deinit(g);
-#endif
-}
-
-int gk20a_ctxsw_trace_write(struct gk20a *g,
-		struct nvgpu_ctxsw_trace_entry *entry)
-{
-	struct nvgpu_ctxsw_ring_header *hdr;
-	struct gk20a_ctxsw_dev *dev;
-	int ret = 0;
-	const char *reason;
-	u32 write_idx;
-
-	if (!g->ctxsw_trace)
-		return 0;
-
-	if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
-		return -ENODEV;
-
-	dev = &g->ctxsw_trace->devs[entry->vmid];
-	hdr = dev->hdr;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
-		"dev=%p hdr=%p", dev, hdr);
-
-	nvgpu_mutex_acquire(&dev->write_lock);
-
-	if (unlikely(!hdr)) {
-		/* device has been released */
-		ret = -ENODEV;
-		goto done;
-	}
-
-	write_idx = hdr->write_idx;
-	if (write_idx >= dev->num_ents) {
-		nvgpu_err(dev->g,
-			"write_idx=%u out of range [0..%u]",
-			write_idx, dev->num_ents);
-		ret = -ENOSPC;
-		reason = "write_idx out of range";
-		goto disable;
-	}
-
-	entry->seqno = hdr->write_seqno++;
-
-	if (!dev->write_enabled) {
-		ret = -EBUSY;
-		reason = "write disabled";
-		goto drop;
-	}
-
-	if (unlikely(ring_is_full(hdr))) {
-		ret = -ENOSPC;
-		reason = "user fifo full";
-		goto drop;
-	}
-
-	if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
-		reason = "filtered out";
-		goto filter;
-	}
-
-	nvgpu_log(g, gpu_dbg_ctxsw,
-		"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
-		entry->seqno, entry->context_id, entry->pid,
-		entry->tag, entry->timestamp);
-
-	dev->ents[write_idx] = *entry;
-
-	/* ensure record is written before updating write index */
-	nvgpu_smp_wmb();
-
-	write_idx++;
-	if (unlikely(write_idx >= hdr->num_ents))
-		write_idx = 0;
-	hdr->write_idx = write_idx;
-	nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
-		hdr->read_idx, hdr->write_idx, ring_len(hdr));
-
-	nvgpu_mutex_release(&dev->write_lock);
-	return ret;
-
-disable:
-	g->ops.fecs_trace.disable(g);
-
-drop:
-	hdr->drop_count++;
-
-filter:
-	nvgpu_log(g, gpu_dbg_ctxsw,
-			"dropping seqno=%d context_id=%08x pid=%lld "
-			"tag=%x time=%llx (%s)",
-			entry->seqno, entry->context_id, entry->pid,
-			entry->tag, entry->timestamp, reason);
-
-done:
-	nvgpu_mutex_release(&dev->write_lock);
-	return ret;
-}
-
-void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
-{
-	struct gk20a_ctxsw_dev *dev;
-
-	if (!g->ctxsw_trace)
-		return;
-
-	dev = &g->ctxsw_trace->devs[vmid];
-	nvgpu_cond_signal_interruptible(&dev->readout_wq);
-}
-
-void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	struct nvgpu_ctxsw_trace_entry entry = {
-		.vmid = 0,
-		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-		.context_id = 0,
-		.pid = ch->tgid,
-	};
-
-	if (!g->ctxsw_trace)
-		return;
-
-	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
-	gk20a_ctxsw_trace_write(g, &entry);
-	gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-	trace_gk20a_channel_reset(ch->chid, ch->tsgid);
-}
-
-void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	struct nvgpu_ctxsw_trace_entry entry = {
-		.vmid = 0,
-		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
-		.context_id = 0,
-		.pid = tsg->tgid,
-	};
-
-	if (!g->ctxsw_trace)
-		return;
-
-	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
-	gk20a_ctxsw_trace_write(g, &entry);
-	gk20a_ctxsw_trace_wake_up(g, 0);
-#endif
-	trace_gk20a_channel_reset(~0, tsg->tsgid);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h
deleted file mode 100644
index 88ca7f25..00000000
--- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __CTXSW_TRACE_H__
-#define __CTXSW_TRACE_H__
-
-#include <nvgpu/types.h>
-
-#define GK20A_CTXSW_TRACE_NUM_DEVS			1
-
-struct file;
-struct inode;
-struct poll_table_struct;
-
-struct gk20a;
-
-int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
-int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
-long gk20a_ctxsw_dev_ioctl(struct file *filp,
-			 unsigned int cmd, unsigned long arg);
-ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
-			     size_t size, loff_t *offs);
-unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
-				  struct poll_table_struct *pts);
-
-#endif /* __CTXSW_TRACE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
deleted file mode 100644
index 8738f3e7..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (C) 2017-2018 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_cde.h"
-#include "debug_ce.h"
-#include "debug_fifo.h"
-#include "debug_gr.h"
-#include "debug_allocator.h"
-#include "debug_kmem.h"
-#include "debug_pmu.h"
-#include "debug_sched.h"
-#include "debug_hal.h"
-#include "debug_xve.h"
-#include "os_linux.h"
-#include "platform_gk20a.h"
-
-#include "gk20a/gk20a.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-
-#include <nvgpu/debug.h>
-
-unsigned int gk20a_debug_trace_cmdbuf;
-
-static inline void gk20a_debug_write_printk(void *ctx, const char *str,
-					    size_t len)
-{
-	pr_info("%s", str);
-}
-
-static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
-						size_t len)
-{
-	seq_write((struct seq_file *)ctx, str, len);
-}
-
-void gk20a_debug_output(struct gk20a_debug_output *o,
-					const char *fmt, ...)
-{
-	va_list args;
-	int len;
-
-	va_start(args, fmt);
-	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
-	va_end(args);
-	o->fn(o->ctx, o->buf, len);
-}
-
-static int gk20a_gr_dump_regs(struct gk20a *g,
-		struct gk20a_debug_output *o)
-{
-	if (g->ops.gr.dump_gr_regs)
-		gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
-
-	return 0;
-}
-
-int gk20a_gr_debug_dump(struct gk20a *g)
-{
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_printk
-	};
-
-	gk20a_gr_dump_regs(g, &o);
-
-	return 0;
-}
-
-static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
-{
-	struct device *dev = s->private;
-	struct gk20a *g = gk20a_get_platform(dev)->g;
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_to_seqfile,
-		.ctx = s,
-	};
-	int err;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu: %d", err);
-		return -EINVAL;
-	}
-
-	gk20a_gr_dump_regs(g, &o);
-
-	gk20a_idle(g);
-
-	return 0;
-}
-
-void gk20a_debug_dump(struct gk20a *g)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_printk
-	};
-
-	if (platform->dump_platform_dependencies)
-		platform->dump_platform_dependencies(dev_from_gk20a(g));
-
-	/* HAL only initialized after 1st power-on */
-	if (g->ops.debug.show_dump)
-		g->ops.debug.show_dump(g, &o);
-}
-
-static int gk20a_debug_show(struct seq_file *s, void *unused)
-{
-	struct device *dev = s->private;
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_to_seqfile,
-		.ctx = s,
-	};
-	struct gk20a *g;
-	int err;
-
-	g = gk20a_get_platform(dev)->g;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu: %d", err);
-		return -EFAULT;
-	}
-
-	/* HAL only initialized after 1st power-on */
-	if (g->ops.debug.show_dump)
-		g->ops.debug.show_dump(g, &o);
-
-	gk20a_idle(g);
-	return 0;
-}
-
-static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_gr_debug_show, inode->i_private);
-}
-
-static int gk20a_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_debug_show, inode->i_private);
-}
-
-static const struct file_operations gk20a_gr_debug_fops = {
-	.open		= gk20a_gr_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations gk20a_debug_fops = {
-	.open		= gk20a_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
-{
-	g->ops.fifo.dump_pbdma_status(g, o);
-	g->ops.fifo.dump_eng_status(g, o);
-
-	gk20a_debug_dump_all_channel_status_ramfc(g, o);
-}
-
-static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
-{
-	char buf[3];
-	struct gk20a *g = file->private_data;
-
-	if (g->mm.disable_bigpage)
-		buf[0] = 'Y';
-	else
-		buf[0] = 'N';
-	buf[1] = '\n';
-	buf[2] = 0x00;
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
-{
-	char buf[32];
-	int buf_size;
-	bool bv;
-	struct gk20a *g = file->private_data;
-
-	buf_size = min(count, (sizeof(buf)-1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	if (strtobool(buf, &bv) == 0) {
-		g->mm.disable_bigpage = bv;
-		gk20a_init_gpu_characteristics(g);
-	}
-
-	return count;
-}
-
-static struct file_operations disable_bigpage_fops = {
-	.open =		simple_open,
-	.read =		disable_bigpage_read,
-	.write =	disable_bigpage_write,
-};
-
-static int railgate_residency_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-	unsigned long time_since_last_state_transition_ms;
-	unsigned long total_rail_gate_time_ms;
-	unsigned long total_rail_ungate_time_ms;
-
-	if (platform->is_railgated(dev_from_gk20a(g))) {
-		time_since_last_state_transition_ms =
-				jiffies_to_msecs(jiffies -
-				g->pstats.last_rail_gate_complete);
-		total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
-		total_rail_gate_time_ms =
-					g->pstats.total_rail_gate_time_ms +
-					time_since_last_state_transition_ms;
-	} else {
-		time_since_last_state_transition_ms =
-				jiffies_to_msecs(jiffies -
-				g->pstats.last_rail_ungate_complete);
-		total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
-		total_rail_ungate_time_ms =
-					g->pstats.total_rail_ungate_time_ms +
-					time_since_last_state_transition_ms;
-	}
-
-	seq_printf(s, "Time with Rails Gated: %lu ms\n"
-			"Time with Rails UnGated: %lu ms\n"
-			"Total railgating cycles: %lu\n",
-			total_rail_gate_time_ms,
-			total_rail_ungate_time_ms,
-			g->pstats.railgating_cycle_count - 1);
-	return 0;
-
-}
-
-static int railgate_residency_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, railgate_residency_show, inode->i_private);
-}
-
-static const struct file_operations railgate_residency_fops = {
-	.open		= railgate_residency_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int gk20a_railgating_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *d;
-
-	d = debugfs_create_file(
-		"railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
-						&railgate_residency_fops);
-	if (!d)
-		return -ENOMEM;
-
-	return 0;
-}
-static ssize_t timeouts_enabled_read(struct file *file,
-			char __user *user_buf, size_t count, loff_t *ppos)
-{
-	char buf[3];
-	struct gk20a *g = file->private_data;
-
-	if (nvgpu_is_timeouts_enabled(g))
-		buf[0] = 'Y';
-	else
-		buf[0] = 'N';
-	buf[1] = '\n';
-	buf[2] = 0x00;
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t timeouts_enabled_write(struct file *file,
-			const char __user *user_buf, size_t count, loff_t *ppos)
-{
-	char buf[3];
-	int buf_size;
-	bool timeouts_enabled;
-	struct gk20a *g = file->private_data;
-
-	buf_size = min(count, (sizeof(buf)-1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	if (strtobool(buf, &timeouts_enabled) == 0) {
-		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-		if (timeouts_enabled == false) {
-			/* requesting to disable timeouts */
-			if (g->timeouts_disabled_by_user == false) {
-				nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
-				g->timeouts_disabled_by_user = true;
-			}
-		} else {
-			/* requesting to enable timeouts */
-			if (g->timeouts_disabled_by_user == true) {
-				nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
-				g->timeouts_disabled_by_user = false;
-			}
-		}
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-	}
-
-	return count;
-}
-
-static const struct file_operations timeouts_enabled_fops = {
-	.open =		simple_open,
-	.read =		timeouts_enabled_read,
-	.write =	timeouts_enabled_write,
-};
-
-void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct device *dev = dev_from_gk20a(g);
-
-	l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
-	if (!l->debugfs)
-		return;
-
-	if (debugfs_symlink)
-		l->debugfs_alias =
-			debugfs_create_symlink(debugfs_symlink,
-					NULL, dev_name(dev));
-
-	debugfs_create_file("status", S_IRUGO, l->debugfs,
-		dev, &gk20a_debug_fops);
-	debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
-		dev, &gk20a_gr_debug_fops);
-	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
-		l->debugfs, &gk20a_debug_trace_cmdbuf);
-
-	debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
-		l->debugfs, &g->ch_wdt_timeout_ms);
-
-	debugfs_create_u32("disable_syncpoints", S_IRUGO,
-		l->debugfs, &g->disable_syncpoints);
-
-	/* New debug logging API. */
-	debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
-		l->debugfs, &g->log_mask);
-	debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
-		l->debugfs, &g->log_trace);
-
-	l->debugfs_ltc_enabled =
-			debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
-				 l->debugfs,
-				 &g->mm.ltc_enabled_target);
-
-	l->debugfs_gr_idle_timeout_default =
-			debugfs_create_u32("gr_idle_timeout_default_us",
-					S_IRUGO|S_IWUSR, l->debugfs,
-					 &g->gr_idle_timeout_default);
-	l->debugfs_timeouts_enabled =
-			debugfs_create_file("timeouts_enabled",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					g,
-					&timeouts_enabled_fops);
-
-	l->debugfs_disable_bigpage =
-			debugfs_create_file("disable_bigpage",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					g,
-					&disable_bigpage_fops);
-
-	l->debugfs_timeslice_low_priority_us =
-			debugfs_create_u32("timeslice_low_priority_us",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					&g->timeslice_low_priority_us);
-	l->debugfs_timeslice_medium_priority_us =
-			debugfs_create_u32("timeslice_medium_priority_us",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					&g->timeslice_medium_priority_us);
-	l->debugfs_timeslice_high_priority_us =
-			debugfs_create_u32("timeslice_high_priority_us",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					&g->timeslice_high_priority_us);
-	l->debugfs_runlist_interleave =
-			debugfs_create_bool("runlist_interleave",
-					S_IRUGO|S_IWUSR,
-					l->debugfs,
-					&g->runlist_interleave);
-	l->debugfs_force_preemption_gfxp =
-		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
-		l->debugfs,
-		&g->gr.ctx_vars.force_preemption_gfxp);
-
-	l->debugfs_force_preemption_cilp =
-		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
-		l->debugfs,
-		&g->gr.ctx_vars.force_preemption_cilp);
-
-	l->debugfs_dump_ctxsw_stats =
-		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
-			S_IRUGO|S_IWUSR, l->debugfs,
-			&g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
-
-	gr_gk20a_debugfs_init(g);
-	gk20a_pmu_debugfs_init(g);
-	gk20a_railgating_debugfs_init(g);
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	gk20a_cde_debugfs_init(g);
-#endif
-	gk20a_ce_debugfs_init(g);
-	nvgpu_alloc_debugfs_init(g);
-	nvgpu_hal_debugfs_init(g);
-	gk20a_fifo_debugfs_init(g);
-	gk20a_sched_debugfs_init(g);
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	nvgpu_kmem_debugfs_init(g);
-#endif
-	if (g->pci_vendor_id)
-		nvgpu_xve_debugfs_init(g);
-}
-
-void gk20a_debug_deinit(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (!l->debugfs)
-		return;
-
-	gk20a_fifo_debugfs_deinit(g);
-
-	debugfs_remove_recursive(l->debugfs);
-	debugfs_remove(l->debugfs_alias);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
deleted file mode 100644
index d63a9030..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_allocator.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_allocator.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <nvgpu/allocator.h>
-
-static int __alloc_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_allocator *a = s->private;
-
-	nvgpu_alloc_print_stats(a, s, 1);
-
-	return 0;
-}
-
-static int __alloc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __alloc_show, inode->i_private);
-}
-
-static const struct file_operations __alloc_fops = {
-	.open = __alloc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (!l->debugfs_allocators)
-		return;
-
-	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
-					       l->debugfs_allocators,
-					       a, &__alloc_fops);
-}
-
-void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
-{
-}
-
-void nvgpu_alloc_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
-	if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
-		l->debugfs_allocators = NULL;
-		return;
-	}
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
deleted file mode 100644
index 1b21cfc5..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_allocator.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
-#define __NVGPU_DEBUG_ALLOCATOR_H__
-
-struct gk20a;
-void nvgpu_alloc_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
deleted file mode 100644
index f0afa6ee..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_cde.h"
-#include "platform_gk20a.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-
-
-static ssize_t gk20a_cde_reload_write(struct file *file,
-	const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct nvgpu_os_linux *l = file->private_data;
-	gk20a_cde_reload(l);
-	return count;
-}
-
-static const struct file_operations gk20a_cde_reload_fops = {
-	.open		= simple_open,
-	.write		= gk20a_cde_reload_write,
-};
-
-void gk20a_cde_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	if (!platform->has_cde)
-		return;
-
-	debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
-			   l->debugfs, &l->cde_app.shader_parameter);
-	debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
-			   l->debugfs, &l->cde_app.ctx_count);
-	debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
-			   l->debugfs, &l->cde_app.ctx_usecount);
-	debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
-			   l->debugfs, &l->cde_app.ctx_count_top);
-	debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
-			    l, &gk20a_cde_reload_fops);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
deleted file mode 100644
index 4895edd6..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_CDE_H__
-#define __NVGPU_DEBUG_CDE_H__
-
-struct gk20a;
-void gk20a_cde_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
deleted file mode 100644
index cea0bb47..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_ce.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_ce.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-
-void gk20a_ce_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
-			   l->debugfs, &g->ce_app.ctx_count);
-	debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
-			   l->debugfs, &g->ce_app.app_state);
-	debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
-			   l->debugfs, &g->ce_app.next_ctx_id);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
deleted file mode 100644
index 2a8750c4..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_ce.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_CE_H__
-#define __NVGPU_DEBUG_CE_H__
-
-struct gk20a;
-void gk20a_ce_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_clk.c b/drivers/gpu/nvgpu/common/linux/debug_clk.c
deleted file mode 100644
index 2484d44b..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_clk.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include "gm20b/clk_gm20b.h"
-#include "os_linux.h"
-#include "platform_gk20a.h"
-
-static int rate_get(void *data, u64 *val)
-{
-	struct gk20a *g = (struct gk20a *)data;
-	struct clk_gk20a *clk = &g->clk;
-
-	*val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
-	return 0;
-}
-static int rate_set(void *data, u64 val)
-{
-	struct gk20a *g = (struct gk20a *)data;
-	return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
-}
-DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
-
-static int pll_reg_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct nvgpu_clk_pll_debug_data d;
-	u32 reg, m, n, pl, f;
-	int err = 0;
-
-	if (g->ops.clk.get_pll_debug_data) {
-		err = g->ops.clk.get_pll_debug_data(g, &d);
-		if (err)
-			return err;
-	} else {
-		return -EINVAL;
-	}
-
-	seq_printf(s, "bypassctrl = %s, ",
-			d.trim_sys_bypassctrl_val ? "bypass" : "vco");
-	seq_printf(s, "sel_vco = %s, ",
-			d.trim_sys_sel_vco_val ? "vco" : "bypass");
-
-	seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
-		d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
-		d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
-		d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
-
-	reg = d.trim_sys_gpcpll_coeff_val;
-	m = d.trim_sys_gpcpll_coeff_mdiv;
-	n = d.trim_sys_gpcpll_coeff_ndiv;
-	pl = d.trim_sys_gpcpll_coeff_pldiv;
-	f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
-	seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
-	seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
-
-	seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
-		d.trim_sys_gpcpll_dvfs0_val,
-		d.trim_sys_gpcpll_dvfs0_dfs_coeff,
-		d.trim_sys_gpcpll_dvfs0_dfs_det_max,
-		d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
-
-	return 0;
-}
-
-static int pll_reg_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pll_reg_show, inode->i_private);
-}
-
-static const struct file_operations pll_reg_fops = {
-	.open		= pll_reg_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int pll_reg_raw_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct nvgpu_clk_pll_debug_data d;
-	u32 reg;
-	int err = 0;
-
-	if (g->ops.clk.get_pll_debug_data) {
-		err = g->ops.clk.get_pll_debug_data(g, &d);
-		if (err)
-			return err;
-	} else {
-		return -EINVAL;
-	}
-
-	seq_puts(s, "GPCPLL REGISTERS:\n");
-	for (reg = d.trim_sys_gpcpll_cfg_reg;
-	     reg <= d.trim_sys_gpcpll_dvfs2_reg;
-	     reg += sizeof(u32))
-		seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
-
-	seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
-
-	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
-					     d.trim_sys_sel_vco_val);
-	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
-					     d.trim_sys_gpc2clk_out_val);
-	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
-					     d.trim_sys_bypassctrl_val);
-
-	return 0;
-}
-
-static int pll_reg_raw_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pll_reg_raw_show, inode->i_private);
-}
-
-static ssize_t pll_reg_raw_write(struct file *file,
-	const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct gk20a *g = file->f_path.dentry->d_inode->i_private;
-	char buf[80];
-	u32 reg, val;
-	int err = 0;
-
-	if (sizeof(buf) <= count)
-		return -EINVAL;
-
-	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
-
-	/* terminate buffer and trim - white spaces may be appended
-	 *  at the end when invoked from shell command line */
-	buf[count] = '\0';
-	strim(buf);
-
-	if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
-		return -EINVAL;
-
-	if (g->ops.clk.pll_reg_write(g, reg, val))
-		err = g->ops.clk.pll_reg_write(g, reg, val);
-	else
-		err = -EINVAL;
-
-	return err;
-}
-
-static const struct file_operations pll_reg_raw_fops = {
-	.open		= pll_reg_raw_open,
-	.read		= seq_read,
-	.write		= pll_reg_raw_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int monitor_get(void *data, u64 *val)
-{
-	struct gk20a *g = (struct gk20a *)data;
-	int err = 0;
-
-	if (g->ops.clk.get_gpcclk_clock_counter)
-		err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
-	else
-		err = -EINVAL;
-
-	return err;
-}
-DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
-
-static int voltage_get(void *data, u64 *val)
-{
-	struct gk20a *g = (struct gk20a *)data;
-	int err = 0;
-
-	if (g->ops.clk.get_voltage)
-		err = g->ops.clk.get_voltage(&g->clk, val);
-	else
-		err = -EINVAL;
-
-	return err;
-}
-DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
-
-static int pll_param_show(struct seq_file *s, void *data)
-{
-	struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
-
-	seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
-		   gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
-		   gpc_pll_params->vco_ctrl);
-	return 0;
-}
-
-static int pll_param_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pll_param_show, inode->i_private);
-}
-
-static const struct file_operations pll_param_fops = {
-	.open		= pll_param_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int gm20b_clk_init_debugfs(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *d;
-
-	if (!l->debugfs)
-		return -EINVAL;
-
-	d = debugfs_create_file(
-		"rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file("pll_reg_raw",
-		S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
-			       (u32 *)&g->clk.gpc_pll.mode);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
-		       l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
-	if (!d)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	pr_err("%s: Failed to make debugfs node\n", __func__);
-	return -ENOMEM;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
deleted file mode 100644
index 2b5674c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright (C) 2017-2018 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_fifo.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <nvgpu/sort.h>
-#include <nvgpu/timers.h>
-
-void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
-
-static void *gk20a_fifo_sched_debugfs_seq_start(
-		struct seq_file *s, loff_t *pos)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-
-	if (*pos >= f->num_channels)
-		return NULL;
-
-	return &f->channel[*pos];
-}
-
-static void *gk20a_fifo_sched_debugfs_seq_next(
-		struct seq_file *s, void *v, loff_t *pos)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-
-	++(*pos);
-	if (*pos >= f->num_channels)
-		return NULL;
-
-	return &f->channel[*pos];
-}
-
-static void gk20a_fifo_sched_debugfs_seq_stop(
-		struct seq_file *s, void *v)
-{
-}
-
-static int gk20a_fifo_sched_debugfs_seq_show(
-		struct seq_file *s, void *v)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = v;
-	struct tsg_gk20a *tsg = NULL;
-
-	struct fifo_engine_info_gk20a *engine_info;
-	struct fifo_runlist_info_gk20a *runlist;
-	u32 runlist_id;
-	int ret = SEQ_SKIP;
-	u32 engine_id;
-
-	engine_id = gk20a_fifo_get_gr_engine_id(g);
-	engine_info = (f->engine_info + engine_id);
-	runlist_id = engine_info->runlist_id;
-	runlist = &f->runlist_info[runlist_id];
-
-	if (ch == f->channel) {
-		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
-		seq_puts(s, "                            (usecs)   (msecs)\n");
-		ret = 0;
-	}
-
-	if (!test_bit(ch->chid, runlist->active_channels))
-		return ret;
-
-	if (gk20a_channel_get(ch)) {
-		tsg = tsg_gk20a_from_ch(ch);
-
-		if (tsg)
-			seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
-				ch->chid,
-				ch->tsgid,
-				ch->tgid,
-				tsg->timeslice_us,
-				ch->timeout_ms_max,
-				tsg->interleave_level,
-				tsg->gr_ctx.graphics_preempt_mode,
-				tsg->gr_ctx.compute_preempt_mode);
-		gk20a_channel_put(ch);
-	}
-	return 0;
-}
-
-static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
-	.start = gk20a_fifo_sched_debugfs_seq_start,
-	.next = gk20a_fifo_sched_debugfs_seq_next,
-	.stop = gk20a_fifo_sched_debugfs_seq_stop,
-	.show = gk20a_fifo_sched_debugfs_seq_show
-};
-
-static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
-	struct file *file)
-{
-	struct gk20a *g = inode->i_private;
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
-	if (err)
-		return err;
-
-	nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
-
-	((struct seq_file *)file->private_data)->private = inode->i_private;
-	return 0;
-};
-
-/*
- * The file operations structure contains our open function along with
- * set of the canned seq_ ops.
- */
-static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
-	.owner = THIS_MODULE,
-	.open = gk20a_fifo_sched_debugfs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release
-};
-
-static int gk20a_fifo_profile_enable(void *data, u64 val)
-{
-	struct gk20a *g = (struct gk20a *) data;
-	struct fifo_gk20a *f = &g->fifo;
-
-
-	nvgpu_mutex_acquire(&f->profile.lock);
-	if (val == 0) {
-		if (f->profile.enabled) {
-			f->profile.enabled = false;
-			nvgpu_ref_put(&f->profile.ref,
-				__gk20a_fifo_profile_free);
-		}
-	} else {
-		if (!f->profile.enabled) {
-			/* not kref init as it can have a running condition if
-			 * we enable/disable/enable while kickoff is happening
-			 */
-			if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
-				f->profile.data = nvgpu_vzalloc(g,
-					FIFO_PROFILING_ENTRIES *
-					sizeof(struct fifo_profile_gk20a));
-				f->profile.sorted  = nvgpu_vzalloc(g,
-					FIFO_PROFILING_ENTRIES *
-					sizeof(u64));
-				if (!(f->profile.data && f->profile.sorted)) {
-					nvgpu_vfree(g, f->profile.data);
-					nvgpu_vfree(g, f->profile.sorted);
-					nvgpu_mutex_release(&f->profile.lock);
-					return -ENOMEM;
-				}
-				nvgpu_ref_init(&f->profile.ref);
-			}
-			atomic_set(&f->profile.get.atomic_var, 0);
-			f->profile.enabled = true;
-		}
-	}
-	nvgpu_mutex_release(&f->profile.lock);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(
-	gk20a_fifo_profile_enable_debugfs_fops,
-	NULL,
-	gk20a_fifo_profile_enable,
-	"%llu\n"
-);
-
-static int __profile_cmp(const void *a, const void *b)
-{
-	return *((unsigned long long *) a) - *((unsigned long long *) b);
-}
-
-/*
- * This uses about 800b in the stack, but the function using it is not part
- * of a callstack where much memory is being used, so it is fine
- */
-#define PERCENTILE_WIDTH	5
-#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
-
-static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
-		u64 *percentiles, u32 index_end, u32 index_start)
-{
-	unsigned int nelem = 0;
-	unsigned int index;
-	struct fifo_profile_gk20a *profile;
-
-	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
-		profile = &g->fifo.profile.data[index];
-
-		if (profile->timestamp[index_end] >
-				profile->timestamp[index_start]) {
-			/* This is a valid element */
-			g->fifo.profile.sorted[nelem] =
-						profile->timestamp[index_end] -
-						profile->timestamp[index_start];
-			nelem++;
-		}
-	}
-
-	/* sort it */
-	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
-		__profile_cmp, NULL);
-
-	/* build ranges */
-	for (index = 0; index < PERCENTILE_RANGES; index++) {
-		percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
-			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
-						nelem)/100 - 1];
-	}
-	return nelem;
-}
-
-static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	unsigned int get, nelem, index;
-	/*
-	 * 800B in the stack, but function is declared statically and only
-	 * called from debugfs handler
-	 */
-	u64 percentiles_ioctl[PERCENTILE_RANGES];
-	u64 percentiles_kickoff[PERCENTILE_RANGES];
-	u64 percentiles_jobtracking[PERCENTILE_RANGES];
-	u64 percentiles_append[PERCENTILE_RANGES];
-	u64 percentiles_userd[PERCENTILE_RANGES];
-
-	if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
-		seq_printf(s, "Profiling disabled\n");
-		return 0;
-	}
-
-	get = atomic_read(&g->fifo.profile.get.atomic_var);
-
-	__gk20a_fifo_create_stats(g, percentiles_ioctl,
-		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_kickoff,
-		PROFILE_END, PROFILE_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
-		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_append,
-		PROFILE_APPEND, PROFILE_JOB_TRACKING);
-	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
-		PROFILE_END, PROFILE_APPEND);
-
-	seq_printf(s, "Number of kickoffs: %d\n", nelem);
-	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
-
-	for (index = 0; index < PERCENTILE_RANGES; index++)
-		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
-			PERCENTILE_WIDTH * (index+1),
-			percentiles_ioctl[index],
-			percentiles_kickoff[index],
-			percentiles_append[index],
-			percentiles_jobtracking[index],
-			percentiles_userd[index]);
-
-	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
-
-	return 0;
-}
-
-static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
-}
-
-static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
-	.open		= gk20a_fifo_profile_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
-void gk20a_fifo_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *gpu_root = l->debugfs;
-	struct dentry *fifo_root;
-	struct dentry *profile_root;
-
-	fifo_root = debugfs_create_dir("fifo", gpu_root);
-	if (IS_ERR_OR_NULL(fifo_root))
-		return;
-
-	nvgpu_log(g, gpu_dbg_info, "g=%p", g);
-
-	debugfs_create_file("sched", 0600, fifo_root, g,
-		&gk20a_fifo_sched_debugfs_fops);
-
-	profile_root = debugfs_create_dir("profile", fifo_root);
-	if (IS_ERR_OR_NULL(profile_root))
-		return;
-
-	nvgpu_mutex_init(&g->fifo.profile.lock);
-	g->fifo.profile.enabled = false;
-	atomic_set(&g->fifo.profile.get.atomic_var, 0);
-	atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
-
-	debugfs_create_file("enable", 0600, profile_root, g,
-		&gk20a_fifo_profile_enable_debugfs_fops);
-
-	debugfs_create_file("stats", 0600, profile_root, g,
-		&gk20a_fifo_profile_stats_debugfs_fops);
-
-}
-
-void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
-{
-	if (profile)
-		profile->timestamp[idx] = nvgpu_current_time_ns();
-}
-
-void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
-{
-	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
-						profile.ref);
-	nvgpu_vfree(f->g, f->profile.data);
-	nvgpu_vfree(f->g, f->profile.sorted);
-}
-
-/* Get the next element in the ring buffer of profile entries
- * and grab a reference to the structure
- */
-struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_profile_gk20a *profile;
-	unsigned int index;
-
-	/* If kref is zero, profiling is not enabled */
-	if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
-		return NULL;
-	index = atomic_inc_return(&f->profile.get.atomic_var);
-	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
-
-	return profile;
-}
-
-/* Free the reference to the structure. This allows deferred cleanups */
-void gk20a_fifo_profile_release(struct gk20a *g,
-					struct fifo_profile_gk20a *profile)
-{
-	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
-}
-
-void gk20a_fifo_debugfs_deinit(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-
-	nvgpu_mutex_acquire(&f->profile.lock);
-	if (f->profile.enabled) {
-		f->profile.enabled = false;
-		nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
-	}
-	nvgpu_mutex_release(&f->profile.lock);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
deleted file mode 100644
index 46ac853e..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_FIFO_H__
-#define __NVGPU_DEBUG_FIFO_H__
-
-struct gk20a;
-void gk20a_fifo_debugfs_init(struct gk20a *g);
-void gk20a_fifo_debugfs_deinit(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
deleted file mode 100644
index d54c6d63..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_gr.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_gr.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-
-int gr_gk20a_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->debugfs_gr_default_attrib_cb_size =
-		debugfs_create_u32("gr_default_attrib_cb_size",
-				   S_IRUGO|S_IWUSR, l->debugfs,
-				   &g->gr.attrib_cb_default_size);
-
-	return 0;
-}
-
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
deleted file mode 100644
index 4b46acbb..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_gr.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_GR_H__
-#define __NVGPU_DEBUG_GR_H__
-
-struct gk20a;
-int gr_gk20a_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.c b/drivers/gpu/nvgpu/common/linux/debug_hal.c
deleted file mode 100644
index 031e335e..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_hal.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_hal.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-/* Format and print a single function pointer to the specified seq_file. */
-static void __hal_print_op(struct seq_file *s, void *op_ptr)
-{
-	seq_printf(s, "%pF\n", op_ptr);
-}
-
-/*
- * Prints an array of function pointer addresses in op_ptrs to the
- * specified seq_file
- */
-static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
-{
-	int i;
-
-	for (i = 0; i < num_ops; i++)
-		__hal_print_op(s, op_ptrs[i]);
-}
-
-/*
- * Show file operation, which generates content of the file once. Prints a list
- * of gpu operations as defined by gops and the corresponding function pointer
- * destination addresses. Relies on no compiler reordering of struct fields and
- * assumption that all members are function pointers.
- */
-static int __hal_show(struct seq_file *s, void *unused)
-{
-	struct gpu_ops *gops = s->private;
-
-	__hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
-
-	return 0;
-}
-
-static int __hal_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __hal_show, inode->i_private);
-}
-
-static const struct file_operations __hal_fops = {
-	.open = __hal_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-void nvgpu_hal_debugfs_fini(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (!(l->debugfs_hal == NULL))
-		debugfs_remove_recursive(l->debugfs_hal);
-}
-
-void nvgpu_hal_debugfs_init(struct gk20a *g)
-{
-	struct dentry *d;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (!l->debugfs)
-		return;
-	l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
-	if (IS_ERR_OR_NULL(l->debugfs_hal)) {
-		l->debugfs_hal = NULL;
-		return;
-	}
-
-	/* Pass along reference to the gpu_ops struct as private data */
-	d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
-		&g->ops, &__hal_fops);
-	if (!d) {
-		nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
-		debugfs_remove_recursive(l->debugfs_hal);
-		return;
-	}
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.h b/drivers/gpu/nvgpu/common/linux/debug_hal.h
deleted file mode 100644
index eee6f234..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_hal.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_HAL_H__
-#define __NVGPU_DEBUG_HAL_H__
-
-struct gk20a;
-void nvgpu_hal_debugfs_fini(struct gk20a *g);
-void nvgpu_hal_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_HAL_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
deleted file mode 100644
index a0c7d47d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_kmem.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include "os_linux.h"
-#include "debug_kmem.h"
-#include "kmem_priv.h"
-
-/**
- * to_human_readable_bytes - Determine  suffix for passed size.
- *
- * @bytes - Number of bytes to generate a suffix for.
- * @hr_bytes [out] - The human readable number of bytes.
- * @hr_suffix [out] - The suffix for the HR number of bytes.
- *
- * Computes a human readable decomposition of the passed number of bytes. The
- * suffix for the bytes is passed back through the @hr_suffix pointer. The right
- * number of bytes is then passed back in @hr_bytes. This returns the following
- * ranges:
- *
- *   0 - 1023 B
- *   1 - 1023 KB
- *   1 - 1023 MB
- *   1 - 1023 GB
- *   1 - 1023 TB
- *   1 - ...  PB
- */
-static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
-				      const char **hr_suffix)
-{
-	static const char *suffixes[] =
-		{ "B", "KB", "MB", "GB", "TB", "PB" };
-
-	u64 suffix_ind = 0;
-
-	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
-		bytes >>= 10;
-		suffix_ind++;
-	}
-
-	/*
-	 * Handle case where bytes > 1023PB.
-	 */
-	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
-		suffix_ind : ARRAY_SIZE(suffixes) - 1;
-
-	*hr_bytes = bytes;
-	*hr_suffix = suffixes[suffix_ind];
-}
-
-/**
- * print_hr_bytes - Print human readable bytes
- *
- * @s - A seq_file to print to. May be NULL.
- * @msg - A message to print before the bytes.
- * @bytes - Number of bytes.
- *
- * Print @msg followed by the human readable decomposition of the passed number
- * of bytes.
- *
- * If @s is NULL then this prints will be made to the kernel log.
- */
-static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
-{
-	u64 hr_bytes;
-	const char *hr_suffix;
-
-	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
-	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
-}
-
-/**
- * print_histogram - Build a histogram of the memory usage.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- */
-static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	int i;
-	u64 pot_min, pot_max;
-	u64 nr_buckets;
-	unsigned int *buckets;
-	unsigned int total_allocs;
-	struct nvgpu_rbtree_node *node;
-	static const char histogram_line[] =
-		"++++++++++++++++++++++++++++++++++++++++";
-
-	/*
-	 * pot_min is essentially a round down to the nearest power of 2. This
-	 * is the start of the histogram. pot_max is just a round up to the
-	 * nearest power of two. Each histogram bucket is one power of two so
-	 * the histogram buckets are exponential.
-	 */
-	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
-	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
-
-	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
-
-	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
-	if (!buckets) {
-		__pstat(s, "OOM: could not allocate bucket storage!?\n");
-		return;
-	}
-
-	/*
-	 * Iterate across all of the allocs and determine what bucket they
-	 * should go in. Round the size down to the nearest power of two to
-	 * find the right bucket.
-	 */
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		int b;
-		u64 bucket_min;
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
-		if (bucket_min < tracker->min_alloc)
-			bucket_min = tracker->min_alloc;
-
-		b = __ffs(bucket_min) - __ffs(pot_min);
-
-		/*
-		 * Handle the one case were there's an alloc exactly as big as
-		 * the maximum bucket size of the largest bucket. Most of the
-		 * buckets have an inclusive minimum and exclusive maximum. But
-		 * the largest bucket needs to have an _inclusive_ maximum as
-		 * well.
-		 */
-		if (b == (int)nr_buckets)
-			b--;
-
-		buckets[b]++;
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	total_allocs = 0;
-	for (i = 0; i < (int)nr_buckets; i++)
-		total_allocs += buckets[i];
-
-	__pstat(s, "Alloc histogram:\n");
-
-	/*
-	 * Actually compute the histogram lines.
-	 */
-	for (i = 0; i < (int)nr_buckets; i++) {
-		char this_line[sizeof(histogram_line) + 1];
-		u64 line_length;
-		u64 hr_bytes;
-		const char *hr_suffix;
-
-		memset(this_line, 0, sizeof(this_line));
-
-		/*
-		 * Compute the normalized line length. Cant use floating point
-		 * so we will just multiply everything by 1000 and use fixed
-		 * point.
-		 */
-		line_length = (1000 * buckets[i]) / total_allocs;
-		line_length *= sizeof(histogram_line);
-		line_length /= 1000;
-
-		memset(this_line, '+', line_length);
-
-		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
-					  &hr_bytes, &hr_suffix);
-		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
-			hr_bytes, hr_bytes << 1,
-			hr_suffix, buckets[i], this_line);
-	}
-}
-
-/**
- * nvgpu_kmem_print_stats - Print kmem tracking stats.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- *
- * Print stats from a tracker. If @s is non-null then seq_printf() will be
- * used with @s. Otherwise the stats are pr_info()ed.
- */
-void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	nvgpu_lock_tracker(tracker);
-
-	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
-
-	__pstat(s, "Basic Stats:\n");
-	__pstat(s,        "  Number of allocs        %lld\n",
-		tracker->nr_allocs);
-	__pstat(s,        "  Number of frees         %lld\n",
-		tracker->nr_frees);
-	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
-	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
-	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
-	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
-	print_hr_bytes(s, "  Bytes allocated (real)  ",
-		       tracker->bytes_alloced_real);
-	print_hr_bytes(s, "  Bytes freed (real)      ",
-		       tracker->bytes_freed_real);
-	__pstat(s, "\n");
-
-	print_histogram(tracker, s);
-
-	nvgpu_unlock_tracker(tracker);
-}
-
-static int __kmem_tracking_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_mem_alloc_tracker *tracker = s->private;
-
-	nvgpu_kmem_print_stats(tracker, s);
-
-	return 0;
-}
-
-static int __kmem_tracking_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_tracking_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_tracking_fops = {
-	.open = __kmem_tracking_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int __kmem_traces_dump_tracker(struct gk20a *g,
-				      struct nvgpu_mem_alloc_tracker *tracker,
-				      struct seq_file *s)
-{
-	struct nvgpu_rbtree_node *node;
-
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		kmem_print_mem_alloc(g, alloc, s);
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	return 0;
-}
-
-static int __kmem_traces_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-
-	nvgpu_lock_tracker(g->vmallocs);
-	seq_puts(s, "Oustanding vmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->vmallocs, s);
-	seq_puts(s, "\n");
-	nvgpu_unlock_tracker(g->vmallocs);
-
-	nvgpu_lock_tracker(g->kmallocs);
-	seq_puts(s, "Oustanding kmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->kmallocs, s);
-	nvgpu_unlock_tracker(g->kmallocs);
-
-	return 0;
-}
-
-static int __kmem_traces_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_traces_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_traces_fops = {
-	.open = __kmem_traces_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-void nvgpu_kmem_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *node;
-
-	l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
-	if (IS_ERR_OR_NULL(l->debugfs_kmem))
-		return;
-
-	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
-				   l->debugfs_kmem,
-				   g->vmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
-				   l->debugfs_kmem,
-				   g->kmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file("traces", S_IRUGO,
-				   l->debugfs_kmem,
-				   g, &__kmem_traces_fops);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
deleted file mode 100644
index 44322b53..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_kmem.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_KMEM_H__
-#define __NVGPU_DEBUG_KMEM_H__
-
-struct gk20a;
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-void nvgpu_kmem_debugfs_init(struct gk20a *g);
-#endif
-
-#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
deleted file mode 100644
index f4ed992d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <nvgpu/enabled.h>
-#include "debug_pmu.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-
-static int lpwr_debug_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	if (g->ops.pmu.pmu_pg_engines_feature_list &&
-		g->ops.pmu.pmu_pg_engines_feature_list(g,
-		PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
-		NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
-		seq_printf(s, "PSTATE: %u\n"
-			"RPPG Enabled: %u\n"
-			"RPPG ref count: %u\n"
-			"RPPG state: %u\n"
-			"MSCG Enabled: %u\n"
-			"MSCG pstate state: %u\n"
-			"MSCG transition state: %u\n",
-			g->ops.clk_arb.get_current_pstate(g),
-			g->elpg_enabled, g->pmu.elpg_refcnt,
-			g->pmu.elpg_stat, g->mscg_enabled,
-			g->pmu.mscg_stat, g->pmu.mscg_transition_state);
-
-	} else
-		seq_printf(s, "ELPG Enabled: %u\n"
-			"ELPG ref count: %u\n"
-			"ELPG state: %u\n",
-			g->elpg_enabled, g->pmu.elpg_refcnt,
-			g->pmu.elpg_stat);
-
-	return 0;
-
-}
-
-static int lpwr_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, lpwr_debug_show, inode->i_private);
-}
-
-static const struct file_operations lpwr_debug_fops = {
-	.open		= lpwr_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int mscg_stat_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	u64 total_ingating, total_ungating, residency, divisor, dividend;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	int err;
-
-	/* Don't unnecessarily power on the device */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		nvgpu_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_ingating = g->pg_ingating_time_us +
-			(u64)pg_stat_data.ingating_time;
-	total_ungating = g->pg_ungating_time_us +
-			(u64)pg_stat_data.ungating_time;
-
-	divisor = total_ingating + total_ungating;
-
-	/* We compute the residency on a scale of 1000 */
-	dividend = total_ingating * 1000;
-
-	if (divisor)
-		residency = div64_u64(dividend, divisor);
-	else
-		residency = 0;
-
-	seq_printf(s,
-			"Time in MSCG: %llu us\n"
-			"Time out of MSCG: %llu us\n"
-			"MSCG residency ratio: %llu\n"
-			"MSCG Entry Count: %u\n"
-			"MSCG Avg Entry latency %u\n"
-			"MSCG Avg Exit latency %u\n",
-			total_ingating, total_ungating,
-			residency, pg_stat_data.gating_cnt,
-			pg_stat_data.avg_entry_latency_us,
-			pg_stat_data.avg_exit_latency_us);
-	return 0;
-
-}
-
-static int mscg_stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mscg_stat_show, inode->i_private);
-}
-
-static const struct file_operations mscg_stat_fops = {
-	.open		= mscg_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int mscg_transitions_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u32 total_gating_cnt;
-	int err;
-
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		nvgpu_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
-
-	seq_printf(s, "%u\n", total_gating_cnt);
-	return 0;
-
-}
-
-static int mscg_transitions_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mscg_transitions_show, inode->i_private);
-}
-
-static const struct file_operations mscg_transitions_fops = {
-	.open		= mscg_transitions_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int elpg_stat_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u64 total_ingating, total_ungating, residency, divisor, dividend;
-	int err;
-
-	/* Don't unnecessarily power on the device */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		nvgpu_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_ingating = g->pg_ingating_time_us +
-			(u64)pg_stat_data.ingating_time;
-	total_ungating = g->pg_ungating_time_us +
-			(u64)pg_stat_data.ungating_time;
-	divisor = total_ingating + total_ungating;
-
-	/* We compute the residency on a scale of 1000 */
-	dividend = total_ingating * 1000;
-
-	if (divisor)
-		residency = div64_u64(dividend, divisor);
-	else
-		residency = 0;
-
-	seq_printf(s,
-			"Time in ELPG: %llu us\n"
-			"Time out of ELPG: %llu us\n"
-			"ELPG residency ratio: %llu\n"
-			"ELPG Entry Count: %u\n"
-			"ELPG Avg Entry latency %u us\n"
-			"ELPG Avg Exit latency %u us\n",
-			total_ingating, total_ungating,
-			residency, pg_stat_data.gating_cnt,
-			pg_stat_data.avg_entry_latency_us,
-			pg_stat_data.avg_exit_latency_us);
-	return 0;
-
-}
-
-static int elpg_stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, elpg_stat_show, inode->i_private);
-}
-
-static const struct file_operations elpg_stat_fops = {
-	.open		= elpg_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int elpg_transitions_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u32 total_gating_cnt;
-	int err;
-
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		nvgpu_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
-
-	seq_printf(s, "%u\n", total_gating_cnt);
-	return 0;
-
-}
-
-static int elpg_transitions_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, elpg_transitions_show, inode->i_private);
-}
-
-static const struct file_operations elpg_transitions_fops = {
-	.open		= elpg_transitions_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int falc_trace_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct nvgpu_pmu *pmu = &g->pmu;
-	u32 i = 0, j = 0, k, l, m;
-	char part_str[40];
-	void *tracebuffer;
-	char *trace;
-	u32 *trace1;
-
-	/* allocate system memory to copy pmu trace buffer */
-	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
-	if (tracebuffer == NULL)
-		return -ENOMEM;
-
-	/* read pmu traces into system memory buffer */
-	nvgpu_mem_rd_n(g, &pmu->trace_buf,
-		       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
-
-	trace = (char *)tracebuffer;
-	trace1 = (u32 *)tracebuffer;
-
-	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
-		for (j = 0; j < 0x40; j++)
-			if (trace1[(i / 4) + j])
-				break;
-		if (j == 0x40)
-			break;
-		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
-		l = 0;
-		m = 0;
-		while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
-			if (k >= 40)
-				break;
-			strncpy(part_str, (trace+i+20+m), k);
-			part_str[k] = 0;
-			seq_printf(s, "%s0x%x", part_str,
-					trace1[(i / 4) + 1 + l]);
-			l++;
-			m += k + 2;
-		}
-		seq_printf(s, "%s", (trace+i+20+m));
-	}
-
-	nvgpu_kfree(g, tracebuffer);
-	return 0;
-}
-
-static int falc_trace_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, falc_trace_show, inode->i_private);
-}
-
-static const struct file_operations falc_trace_fops = {
-	.open		= falc_trace_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int perfmon_events_enable_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
-	return 0;
-
-}
-
-static int perfmon_events_enable_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, perfmon_events_enable_show, inode->i_private);
-}
-
-static ssize_t perfmon_events_enable_write(struct file *file,
-	const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct seq_file *s = file->private_data;
-	struct gk20a *g = s->private;
-	unsigned long val = 0;
-	char buf[40];
-	int buf_size;
-	int err;
-
-	memset(buf, 0, sizeof(buf));
-	buf_size = min(count, (sizeof(buf)-1));
-
-	if (copy_from_user(buf, userbuf, buf_size))
-		return -EFAULT;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	/* Don't turn on gk20a unnecessarily */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		if (val && !g->pmu.perfmon_sampling_enabled &&
-				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
-			g->pmu.perfmon_sampling_enabled = true;
-			g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
-		} else if (!val && g->pmu.perfmon_sampling_enabled &&
-				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
-			g->pmu.perfmon_sampling_enabled = false;
-			g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
-		}
-		gk20a_idle(g);
-	} else {
-		g->pmu.perfmon_sampling_enabled = val ? true : false;
-	}
-
-	return count;
-}
-
-static const struct file_operations perfmon_events_enable_fops = {
-	.open		= perfmon_events_enable_open,
-	.read		= seq_read,
-	.write		= perfmon_events_enable_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int perfmon_events_count_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
-	return 0;
-
-}
-
-static int perfmon_events_count_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, perfmon_events_count_show, inode->i_private);
-}
-
-static const struct file_operations perfmon_events_count_fops = {
-	.open		= perfmon_events_count_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int security_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%d\n", g->pmu.pmu_mode);
-	return 0;
-
-}
-
-static int security_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, security_show, inode->i_private);
-}
-
-static const struct file_operations security_fops = {
-	.open		= security_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int gk20a_pmu_debugfs_init(struct gk20a *g)
-{
-	struct dentry *d;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	d = debugfs_create_file(
-		"lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
-						&lpwr_debug_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
-						&mscg_stat_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"mscg_transitions", S_IRUGO, l->debugfs, g,
-						&mscg_transitions_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
-						&elpg_stat_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"elpg_transitions", S_IRUGO, l->debugfs, g,
-						&elpg_transitions_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"falc_trace", S_IRUGO, l->debugfs, g,
-						&falc_trace_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"perfmon_events_enable", S_IRUGO, l->debugfs, g,
-						&perfmon_events_enable_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"perfmon_events_count", S_IRUGO, l->debugfs, g,
-						&perfmon_events_count_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"pmu_security", S_IRUGO, l->debugfs, g,
-						&security_fops);
-	if (!d)
-		goto err_out;
-	return 0;
-err_out:
-	pr_err("%s: Failed to make debugfs node\n", __func__);
-	return -ENOMEM;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
deleted file mode 100644
index c4e3243d..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_pmu.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_PMU_H__
-#define __NVGPU_DEBUG_PMU_H__
-
-struct gk20a;
-int gk20a_pmu_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
deleted file mode 100644
index 5b7cbddf..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_sched.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "debug_sched.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	bool sched_busy = true;
-
-	int n = sched->bitmap_size / sizeof(u64);
-	int i;
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
-		sched_busy = false;
-		nvgpu_mutex_release(&sched->busy_lock);
-	}
-
-	seq_printf(s, "control_locked=%d\n", sched->control_locked);
-	seq_printf(s, "busy=%d\n", sched_busy);
-	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-
-	seq_puts(s, "active_tsg_bitmap\n");
-	for (i = 0; i < n; i++)
-		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
-
-	seq_puts(s, "recent_tsg_bitmap\n");
-	for (i = 0; i < n; i++)
-		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
-
-	nvgpu_mutex_release(&sched->status_lock);
-
-	gk20a_idle(g);
-
-	return 0;
-}
-
-static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations gk20a_sched_debugfs_fops = {
-	.open		= gk20a_sched_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void gk20a_sched_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
-			g, &gk20a_sched_debugfs_fops);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
deleted file mode 100644
index 34a8f55f..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_sched.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_SCHED_H__
-#define __NVGPU_DEBUG_SCHED_H__
-
-struct gk20a;
-void gk20a_sched_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.c b/drivers/gpu/nvgpu/common/linux/debug_xve.c
deleted file mode 100644
index 743702a2..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_xve.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <nvgpu/types.h>
-#include <nvgpu/xve.h>
-
-#include "debug_xve.h"
-#include "os_linux.h"
-
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-
-static ssize_t xve_link_speed_write(struct file *filp,
-				    const char __user *buff,
-				    size_t len, loff_t *off)
-{
-	struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
-	char kbuff[16];
-	u32 buff_size, check_len;
-	u32 link_speed = 0;
-	int ret;
-
-	buff_size = min_t(size_t, 16, len);
-
-	memset(kbuff, 0, 16);
-	if (copy_from_user(kbuff, buff, buff_size))
-		return -EFAULT;
-
-	check_len = strlen("Gen1");
-	if (strncmp(kbuff, "Gen1", check_len) == 0)
-		link_speed = GPU_XVE_SPEED_2P5;
-	else if (strncmp(kbuff, "Gen2", check_len) == 0)
-		link_speed = GPU_XVE_SPEED_5P0;
-	else if (strncmp(kbuff, "Gen3", check_len) == 0)
-		link_speed = GPU_XVE_SPEED_8P0;
-	else
-		nvgpu_err(g, "%s: Unknown PCIe speed: %s",
-			  __func__, kbuff);
-
-	if (!link_speed)
-		return -EINVAL;
-
-	/* Brief pause... To help rate limit this. */
-	nvgpu_msleep(250);
-
-	/*
-	 * And actually set the speed. Yay.
-	 */
-	ret = g->ops.xve.set_speed(g, link_speed);
-	if (ret)
-		return ret;
-
-	return len;
-}
-
-static int xve_link_speed_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	u32 speed;
-	int err;
-
-	err = g->ops.xve.get_speed(g, &speed);
-	if (err)
-		return err;
-
-	seq_printf(s, "Current PCIe speed:\n  %s\n", xve_speed_to_str(speed));
-
-	return 0;
-}
-
-static int xve_link_speed_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xve_link_speed_show, inode->i_private);
-}
-
-static const struct file_operations xve_link_speed_fops = {
-	.open = xve_link_speed_open,
-	.read = seq_read,
-	.write = xve_link_speed_write,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int xve_available_speeds_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	u32 available_speeds;
-
-	g->ops.xve.available_speeds(g, &available_speeds);
-
-	seq_puts(s, "Available PCIe bus speeds:\n");
-	if (available_speeds & GPU_XVE_SPEED_2P5)
-		seq_puts(s, "  Gen1\n");
-	if (available_speeds & GPU_XVE_SPEED_5P0)
-		seq_puts(s, "  Gen2\n");
-	if (available_speeds & GPU_XVE_SPEED_8P0)
-		seq_puts(s, "  Gen3\n");
-
-	return 0;
-}
-
-static int xve_available_speeds_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xve_available_speeds_show, inode->i_private);
-}
-
-static const struct file_operations xve_available_speeds_fops = {
-	.open = xve_available_speeds_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int xve_link_control_status_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	u32 link_status;
-
-	link_status = g->ops.xve.get_link_control_status(g);
-	seq_printf(s, "0x%08x\n", link_status);
-
-	return 0;
-}
-
-static int xve_link_control_status_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xve_link_control_status_show, inode->i_private);
-}
-
-static const struct file_operations xve_link_control_status_fops = {
-	.open = xve_link_control_status_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-int nvgpu_xve_debugfs_init(struct gk20a *g)
-{
-	int err = -ENODEV;
-
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *gpu_root = l->debugfs;
-
-	l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
-	if (IS_ERR_OR_NULL(l->debugfs_xve))
-		goto fail;
-
-	/*
-	 * These are just debug nodes. If they fail to get made it's not worth
-	 * worrying the higher level SW.
-	 */
-	debugfs_create_file("link_speed", S_IRUGO,
-			    l->debugfs_xve, g,
-			    &xve_link_speed_fops);
-	debugfs_create_file("available_speeds", S_IRUGO,
-			    l->debugfs_xve, g,
-			    &xve_available_speeds_fops);
-	debugfs_create_file("link_control_status", S_IRUGO,
-			    l->debugfs_xve, g,
-			    &xve_link_control_status_fops);
-
-	err = 0;
-fail:
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.h b/drivers/gpu/nvgpu/common/linux/debug_xve.h
deleted file mode 100644
index f3b1ac54..00000000
--- a/drivers/gpu/nvgpu/common/linux/debug_xve.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __NVGPU_DEBUG_XVE_H__
-#define __NVGPU_DEBUG_XVE_H__
-
-struct gk20a;
-int nvgpu_xve_debugfs_init(struct gk20a *g);
-
-#endif /* __NVGPU_DEBUG_SVE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
deleted file mode 100644
index f513dcd6..00000000
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/version.h>
-
-#include <nvgpu/log.h>
-#include <nvgpu/dma.h>
-#include <nvgpu/lock.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/gmmu.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/vidmem.h>
-
-#include <nvgpu/linux/dma.h>
-#include <nvgpu/linux/vidmem.h>
-
-#include "gk20a/gk20a.h"
-
-#include "platform_gk20a.h"
-#include "os_linux.h"
-
-#ifdef __DMA_ATTRS_LONGS
-#define NVGPU_DEFINE_DMA_ATTRS(x)                                     \
-        struct dma_attrs x = {                                  \
-                .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 },   \
-        }
-#define NVGPU_DMA_ATTR(attrs) &attrs
-#else
-#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
-#define NVGPU_DMA_ATTR(attrs) attrs
-#endif
-
-/*
- * Enough to hold all the possible flags in string form. When a new flag is
- * added it must be added here as well!!
- */
-#define NVGPU_DMA_STR_SIZE					\
-	sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
-
-/*
- * The returned string is kmalloc()ed here but must be freed by the caller.
- */
-static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
-{
-	char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
-	int bytes_available = NVGPU_DMA_STR_SIZE;
-
-	/*
-	 * Return the empty buffer if there's no flags. Makes it easier on the
-	 * calling code to just print it instead of any if (NULL) type logic.
-	 */
-	if (!flags)
-		return buf;
-
-#define APPEND_FLAG(flag, str_flag)					\
-	do {								\
-		if (flags & flag) {					\
-			strncat(buf, str_flag, bytes_available);	\
-			bytes_available -= strlen(str_flag);		\
-		}							\
-	} while (0)
-
-	APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
-	APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS,  "FORCE_CONTIGUOUS ");
-#undef APPEND_FLAG
-
-	return buf;
-}
-
-/**
- * __dma_dbg - Debug print for DMA allocs and frees.
- *
- * @g     - The GPU.
- * @size  - The requested size of the alloc (size_t).
- * @flags - The flags (unsigned long).
- * @type  - A string describing the type (i.e: sysmem or vidmem).
- * @what  - A string with 'alloc' or 'free'.
- *
- * @flags is the DMA flags. If there are none or it doesn't make sense to print
- * flags just pass 0.
- *
- * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
- */
-static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
-		      const char *type, const char *what)
-{
-	char *flags_str = NULL;
-
-	/*
-	 * Don't bother making the flags_str if debugging is
-	 * not enabled. This saves a malloc and a free.
-	 */
-	if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
-		return;
-
-	flags_str = nvgpu_dma_flags_to_str(g, flags);
-
-	__nvgpu_log_dbg(g, gpu_dbg_dma,
-			__func__, __LINE__,
-			"DMA %s: [%s] size=%-7zu "
-			"aligned=%-7zu total=%-10llukB %s",
-			what, type,
-			size, PAGE_ALIGN(size),
-			g->dma_memory_used >> 10,
-			flags_str);
-
-	if (flags_str)
-		nvgpu_kfree(g, flags_str);
-}
-
-#define dma_dbg_alloc(g, size, flags, type)				\
-	__dma_dbg(g, size, flags, type, "alloc")
-#define dma_dbg_free(g, size, flags, type)				\
-	__dma_dbg(g, size, flags, type, "free")
-
-/*
- * For after the DMA alloc is done.
- */
-#define __dma_dbg_done(g, size, type, what)				\
-	nvgpu_log(g, gpu_dbg_dma,					\
-		  "DMA %s: [%s] size=%-7zu Done!",			\
-		  what, type, size);					\
-
-#define dma_dbg_alloc_done(g, size, type)				\
-	__dma_dbg_done(g, size, type, "alloc")
-#define dma_dbg_free_done(g, size, type)				\
-	__dma_dbg_done(g, size, type, "free")
-
-#if defined(CONFIG_GK20A_VIDMEM)
-static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
-				size_t size)
-{
-	u64 addr = 0;
-
-	if (at)
-		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
-	else
-		addr = nvgpu_alloc(allocator, size);
-
-	return addr;
-}
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
-static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) *x
-#else
-static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) x
-#endif
-{
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
-	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
-		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
-#undef ATTR_ARG
-}
-
-int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_flags(g, 0, size, mem);
-}
-
-int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-		struct nvgpu_mem *mem)
-{
-	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 *
-		 * Incoming flags are ignored here, since bits other than the
-		 * no-kernel-mapping flag are ignored by the vidmem mapping
-		 * functions anyway.
-		 */
-		int err = nvgpu_dma_alloc_flags_vid(g,
-				NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
-}
-
-int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
-}
-
-int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-	int err;
-	dma_addr_t iova;
-	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
-	void *alloc_ret;
-
-	if (nvgpu_mem_is_valid(mem)) {
-		nvgpu_warn(g, "memory leak !!");
-		WARN_ON(1);
-	}
-
-	/*
-	 * WAR for IO coherent chips: the DMA API does not seem to generate
-	 * mappings that work correctly. Unclear why - Bug ID: 2040115.
-	 *
-	 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
-	 * and then make a vmap() ourselves.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
-
-	/*
-	 * Before the debug print so we see this in the total. But during
-	 * cleanup in the fail path this has to be subtracted.
-	 */
-	g->dma_memory_used += PAGE_ALIGN(size);
-
-	dma_dbg_alloc(g, size, flags, "sysmem");
-
-	/*
-	 * Save the old size but for actual allocation purposes the size is
-	 * going to be page aligned.
-	 */
-	mem->size = size;
-	size = PAGE_ALIGN(size);
-
-	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
-
-	alloc_ret = dma_alloc_attrs(d, size, &iova,
-				    GFP_KERNEL|__GFP_ZERO,
-				    NVGPU_DMA_ATTR(dma_attrs));
-	if (!alloc_ret)
-		return -ENOMEM;
-
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-		mem->priv.pages = alloc_ret;
-		err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
-						   mem->priv.pages,
-						   iova, size);
-	} else {
-		mem->cpu_va = alloc_ret;
-		err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
-					iova, size, flags);
-	}
-	if (err)
-		goto fail_free_dma;
-
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
-		mem->cpu_va = vmap(mem->priv.pages,
-				   size >> PAGE_SHIFT,
-				   0, PAGE_KERNEL);
-		if (!mem->cpu_va) {
-			err = -ENOMEM;
-			goto fail_free_sgt;
-		}
-	}
-
-	mem->aligned_size = size;
-	mem->aperture = APERTURE_SYSMEM;
-	mem->priv.flags = flags;
-
-	dma_dbg_alloc_done(g, mem->size, "sysmem");
-
-	return 0;
-
-fail_free_sgt:
-	nvgpu_free_sgtable(g, &mem->priv.sgt);
-fail_free_dma:
-	dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
-	mem->cpu_va = NULL;
-	mem->priv.sgt = NULL;
-	mem->size = 0;
-	g->dma_memory_used -= mem->aligned_size;
-	return err;
-}
-
-int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_flags_vid(g,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
-}
-
-int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem, u64 at)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	u64 addr;
-	int err;
-	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
-		&g->mm.vidmem.allocator :
-		&g->mm.vidmem.bootstrap_allocator;
-	int before_pending;
-
-	if (nvgpu_mem_is_valid(mem)) {
-		nvgpu_warn(g, "memory leak !!");
-		WARN_ON(1);
-	}
-
-	dma_dbg_alloc(g, size, flags, "vidmem");
-
-	mem->size = size;
-	size = PAGE_ALIGN(size);
-
-	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
-		return -ENOSYS;
-
-	/*
-	 * Our own allocator doesn't have any flags yet, and we can't
-	 * kernel-map these, so require explicit flags.
-	 */
-	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
-	addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
-	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-	if (!addr) {
-		/*
-		 * If memory is known to be freed soon, let the user know that
-		 * it may be available after a while.
-		 */
-		if (before_pending)
-			return -EAGAIN;
-		else
-			return -ENOMEM;
-	}
-
-	if (at)
-		mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
-
-	mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
-	if (!mem->priv.sgt) {
-		err = -ENOMEM;
-		goto fail_physfree;
-	}
-
-	err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
-	if (err)
-		goto fail_kfree;
-
-	nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
-	sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
-
-	mem->aligned_size = size;
-	mem->aperture = APERTURE_VIDMEM;
-	mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
-	mem->allocator = vidmem_alloc;
-	mem->priv.flags = flags;
-
-	nvgpu_init_list_node(&mem->clear_list_entry);
-
-	dma_dbg_alloc_done(g, mem->size, "vidmem");
-
-	return 0;
-
-fail_kfree:
-	nvgpu_kfree(g, mem->priv.sgt);
-fail_physfree:
-	nvgpu_free(&g->mm.vidmem.allocator, addr);
-	mem->size = 0;
-	return err;
-#else
-	return -ENOSYS;
-#endif
-}
-
-int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
-}
-
-int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using nvgpu_dma_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 */
-		int err = nvgpu_dma_alloc_map_flags_vid(vm,
-				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
-}
-
-int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
-}
-
-int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	nvgpu_dma_free(vm->mm->g, mem);
-	return err;
-}
-
-int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return nvgpu_dma_alloc_map_flags_vid(vm,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	nvgpu_dma_free(vm->mm->g, mem);
-	return err;
-}
-
-static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-
-	g->dma_memory_used -= mem->aligned_size;
-
-	dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
-
-	if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
-	    !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
-	    (mem->cpu_va || mem->priv.pages)) {
-		/*
-		 * Free side of WAR for bug 2040115.
-		 */
-		if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-			vunmap(mem->cpu_va);
-
-		if (mem->priv.flags) {
-			NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
-
-			nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
-
-			if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-				dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
-					sg_dma_address(mem->priv.sgt->sgl),
-					NVGPU_DMA_ATTR(dma_attrs));
-			} else {
-				dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
-					sg_dma_address(mem->priv.sgt->sgl),
-					NVGPU_DMA_ATTR(dma_attrs));
-			}
-		} else {
-			dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
-					sg_dma_address(mem->priv.sgt->sgl));
-		}
-		mem->cpu_va = NULL;
-		mem->priv.pages = NULL;
-	}
-
-	/*
-	 * When this flag is set we expect that pages is still populated but not
-	 * by the DMA API.
-	 */
-	if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
-		nvgpu_kfree(g, mem->priv.pages);
-
-	if (mem->priv.sgt)
-		nvgpu_free_sgtable(g, &mem->priv.sgt);
-
-	dma_dbg_free_done(g, mem->size, "sysmem");
-
-	mem->size = 0;
-	mem->aligned_size = 0;
-	mem->aperture = APERTURE_INVALID;
-}
-
-static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	size_t mem_size = mem->size;
-
-	dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
-
-	/* Sanity check - only this supported when allocating. */
-	WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
-		int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
-
-		/*
-		 * If there's an error here then that means we can't clear the
-		 * vidmem. That's too bad; however, we still own the nvgpu_mem
-		 * buf so we have to free that.
-		 *
-		 * We don't need to worry about the vidmem allocator itself
-		 * since when that gets cleaned up in the driver shutdown path
-		 * all the outstanding allocs are force freed.
-		 */
-		if (err)
-			nvgpu_kfree(g, mem);
-	} else {
-		nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
-		nvgpu_free(mem->allocator,
-			   (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
-		nvgpu_free_sgtable(g, &mem->priv.sgt);
-
-		mem->size = 0;
-		mem->aligned_size = 0;
-		mem->aperture = APERTURE_INVALID;
-	}
-
-	dma_dbg_free_done(g, mem_size, "vidmem");
-#endif
-}
-
-void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	switch (mem->aperture) {
-	case APERTURE_SYSMEM:
-		return nvgpu_dma_free_sys(g, mem);
-	case APERTURE_VIDMEM:
-		return nvgpu_dma_free_vid(g, mem);
-	default:
-		break; /* like free() on "null" memory */
-	}
-}
-
-void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
-{
-	if (mem->gpu_va)
-		nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
-	mem->gpu_va = 0;
-
-	nvgpu_dma_free(vm->mm->g, mem);
-}
-
-int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
-		      void *cpuva, u64 iova, size_t size, unsigned long flags)
-{
-	int err = 0;
-	struct sg_table *tbl;
-	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
-
-	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
-	if (!tbl) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
-	err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
-					size, NVGPU_DMA_ATTR(dma_attrs));
-	if (err)
-		goto fail;
-
-	sg_dma_address(tbl->sgl) = iova;
-	*sgt = tbl;
-
-	return 0;
-
-fail:
-	if (tbl)
-		nvgpu_kfree(g, tbl);
-
-	return err;
-}
-
-int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
-		      void *cpuva, u64 iova, size_t size)
-{
-	return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
-}
-
-int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
-				 struct page **pages, u64 iova, size_t size)
-{
-	int err = 0;
-	struct sg_table *tbl;
-
-	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
-	if (!tbl) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	err = sg_alloc_table_from_pages(tbl, pages,
-					DIV_ROUND_UP(size, PAGE_SIZE),
-					0, size, GFP_KERNEL);
-	if (err)
-		goto fail;
-
-	sg_dma_address(tbl->sgl) = iova;
-	*sgt = tbl;
-
-	return 0;
-
-fail:
-	if (tbl)
-		nvgpu_kfree(g, tbl);
-
-	return err;
-}
-
-void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
-{
-	sg_free_table(*sgt);
-	nvgpu_kfree(g, *sgt);
-	*sgt = NULL;
-}
-
-bool nvgpu_iommuable(struct gk20a *g)
-{
-#ifdef CONFIG_TEGRA_GK20A
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	/*
-	 * Check against the nvgpu device to see if it's been marked as
-	 * IOMMU'able.
-	 */
-	if (!device_is_iommuable(l->dev))
-		return false;
-#endif
-
-	return true;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c
deleted file mode 100644
index 129739f0..00000000
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
-* Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/device.h>
-#include <linux/dma-buf.h>
-#include <linux/scatterlist.h>
-
-#include <nvgpu/comptags.h>
-#include <nvgpu/enabled.h>
-
-#include <nvgpu/linux/vm.h>
-#include <nvgpu/linux/vidmem.h>
-
-#include "gk20a/gk20a.h"
-
-#include "platform_gk20a.h"
-#include "dmabuf.h"
-#include "os_linux.h"
-
-static void gk20a_mm_delete_priv(void *_priv)
-{
-	struct gk20a_buffer_state *s, *s_tmp;
-	struct gk20a_dmabuf_priv *priv = _priv;
-	struct gk20a *g;
-
-	if (!priv)
-		return;
-
-	g = priv->g;
-
-	if (priv->comptags.allocated && priv->comptags.lines) {
-		BUG_ON(!priv->comptag_allocator);
-		gk20a_comptaglines_free(priv->comptag_allocator,
-				priv->comptags.offset,
-				priv->comptags.lines);
-	}
-
-	/* Free buffer states */
-	nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
-				gk20a_buffer_state, list) {
-		gk20a_fence_put(s->fence);
-		nvgpu_list_del(&s->list);
-		nvgpu_kfree(g, s);
-	}
-
-	nvgpu_kfree(g, priv);
-}
-
-enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
-					  struct dma_buf *dmabuf)
-{
-	struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
-	bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
-
-	if (buf_owner == NULL) {
-		/* Not nvgpu-allocated, assume system memory */
-		return APERTURE_SYSMEM;
-	} else if (WARN_ON(buf_owner == g && unified_memory)) {
-		/* Looks like our video memory, but this gpu doesn't support
-		 * it. Warn about a bug and bail out */
-		nvgpu_warn(g,
-			"dmabuf is our vidmem but we don't have local vidmem");
-		return APERTURE_INVALID;
-	} else if (buf_owner != g) {
-		/* Someone else's vidmem */
-		return APERTURE_INVALID;
-	} else {
-		/* Yay, buf_owner == g */
-		return APERTURE_VIDMEM;
-	}
-}
-
-struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
-			      struct dma_buf_attachment **attachment)
-{
-	struct gk20a_dmabuf_priv *priv;
-
-	priv = dma_buf_get_drvdata(dmabuf, dev);
-	if (WARN_ON(!priv))
-		return ERR_PTR(-EINVAL);
-
-	nvgpu_mutex_acquire(&priv->lock);
-
-	if (priv->pin_count == 0) {
-		priv->attach = dma_buf_attach(dmabuf, dev);
-		if (IS_ERR(priv->attach)) {
-			nvgpu_mutex_release(&priv->lock);
-			return (struct sg_table *)priv->attach;
-		}
-
-		priv->sgt = dma_buf_map_attachment(priv->attach,
-						   DMA_BIDIRECTIONAL);
-		if (IS_ERR(priv->sgt)) {
-			dma_buf_detach(dmabuf, priv->attach);
-			nvgpu_mutex_release(&priv->lock);
-			return priv->sgt;
-		}
-	}
-
-	priv->pin_count++;
-	nvgpu_mutex_release(&priv->lock);
-	*attachment = priv->attach;
-	return priv->sgt;
-}
-
-void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
-		    struct dma_buf_attachment *attachment,
-		    struct sg_table *sgt)
-{
-	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
-	dma_addr_t dma_addr;
-
-	if (IS_ERR(priv) || !priv)
-		return;
-
-	nvgpu_mutex_acquire(&priv->lock);
-	WARN_ON(priv->sgt != sgt);
-	WARN_ON(priv->attach != attachment);
-	priv->pin_count--;
-	WARN_ON(priv->pin_count < 0);
-	dma_addr = sg_dma_address(priv->sgt->sgl);
-	if (priv->pin_count == 0) {
-		dma_buf_unmap_attachment(priv->attach, priv->sgt,
-					 DMA_BIDIRECTIONAL);
-		dma_buf_detach(dmabuf, priv->attach);
-	}
-	nvgpu_mutex_release(&priv->lock);
-}
-
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
-{
-	struct gk20a *g = gk20a_get_platform(dev)->g;
-	struct gk20a_dmabuf_priv *priv;
-
-	priv = dma_buf_get_drvdata(dmabuf, dev);
-	if (likely(priv))
-		return 0;
-
-	nvgpu_mutex_acquire(&g->mm.priv_lock);
-	priv = dma_buf_get_drvdata(dmabuf, dev);
-	if (priv)
-		goto priv_exist_or_err;
-
-	priv = nvgpu_kzalloc(g, sizeof(*priv));
-	if (!priv) {
-		priv = ERR_PTR(-ENOMEM);
-		goto priv_exist_or_err;
-	}
-
-	nvgpu_mutex_init(&priv->lock);
-	nvgpu_init_list_node(&priv->states);
-	priv->g = g;
-	dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
-
-priv_exist_or_err:
-	nvgpu_mutex_release(&g->mm.priv_lock);
-	if (IS_ERR(priv))
-		return -ENOMEM;
-
-	return 0;
-}
-
-int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
-			   u64 offset, struct gk20a_buffer_state **state)
-{
-	int err = 0;
-	struct gk20a_dmabuf_priv *priv;
-	struct gk20a_buffer_state *s;
-	struct device *dev = dev_from_gk20a(g);
-
-	if (WARN_ON(offset >= (u64)dmabuf->size))
-		return -EINVAL;
-
-	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
-	if (err)
-		return err;
-
-	priv = dma_buf_get_drvdata(dmabuf, dev);
-	if (WARN_ON(!priv))
-		return -ENOSYS;
-
-	nvgpu_mutex_acquire(&priv->lock);
-
-	nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
-		if (s->offset == offset)
-			goto out;
-
-	/* State not found, create state. */
-	s = nvgpu_kzalloc(g, sizeof(*s));
-	if (!s) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	s->offset = offset;
-	nvgpu_init_list_node(&s->list);
-	nvgpu_mutex_init(&s->lock);
-	nvgpu_list_add_tail(&s->list, &priv->states);
-
-out:
-	nvgpu_mutex_release(&priv->lock);
-	if (!err)
-		*state = s;
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.h b/drivers/gpu/nvgpu/common/linux/dmabuf.h
deleted file mode 100644
index 8399eaaf..00000000
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-* Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __COMMON_LINUX_DMABUF_H__
-#define __COMMON_LINUX_DMABUF_H__
-
-#include <nvgpu/comptags.h>
-#include <nvgpu/list.h>
-#include <nvgpu/lock.h>
-#include <nvgpu/gmmu.h>
-
-struct sg_table;
-struct dma_buf;
-struct dma_buf_attachment;
-struct device;
-
-struct gk20a;
-struct gk20a_buffer_state;
-
-struct gk20a_dmabuf_priv {
-	struct nvgpu_mutex lock;
-
-	struct gk20a *g;
-
-	struct gk20a_comptag_allocator *comptag_allocator;
-	struct gk20a_comptags comptags;
-
-	struct dma_buf_attachment *attach;
-	struct sg_table *sgt;
-
-	int pin_count;
-
-	struct nvgpu_list_node states;
-
-	u64 buffer_id;
-};
-
-struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
-			      struct dma_buf_attachment **attachment);
-void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
-		    struct dma_buf_attachment *attachment,
-		    struct sg_table *sgt);
-
-int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
-
-int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
-			   u64 offset, struct gk20a_buffer_state **state);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
deleted file mode 100644
index 8f33c5d2..00000000
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/reboot.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/defaults.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/nvgpu_common.h>
-#include <nvgpu/soc.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/debug.h>
-#include <nvgpu/sizes.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "module.h"
-#include "os_linux.h"
-#include "sysfs.h"
-#include "ioctl.h"
-#include "gk20a/regops_gk20a.h"
-
-#define EMC3D_DEFAULT_RATIO 750
-
-void nvgpu_kernel_restart(void *cmd)
-{
-	kernel_restart(cmd);
-}
-
-static void nvgpu_init_vars(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-
-	nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
-	nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
-
-	init_rwsem(&l->busy_lock);
-	nvgpu_rwsem_init(&g->deterministic_busy);
-
-	nvgpu_spinlock_init(&g->mc_enable_lock);
-
-	nvgpu_mutex_init(&platform->railgate_lock);
-	nvgpu_mutex_init(&g->dbg_sessions_lock);
-	nvgpu_mutex_init(&g->client_lock);
-	nvgpu_mutex_init(&g->poweron_lock);
-	nvgpu_mutex_init(&g->poweroff_lock);
-	nvgpu_mutex_init(&g->ctxsw_disable_lock);
-
-	l->regs_saved = l->regs;
-	l->bar1_saved = l->bar1;
-
-	g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
-
-	/* Set DMA parameters to allow larger sgt lists */
-	dev->dma_parms = &l->dma_parms;
-	dma_set_max_seg_size(dev, UINT_MAX);
-
-	/*
-	 * A default of 16GB is the largest supported DMA size that is
-	 * acceptable to all currently supported Tegra SoCs.
-	 */
-	if (!platform->dma_mask)
-		platform->dma_mask = DMA_BIT_MASK(34);
-
-	dma_set_mask(dev, platform->dma_mask);
-	dma_set_coherent_mask(dev, platform->dma_mask);
-
-	nvgpu_init_list_node(&g->profiler_objects);
-
-	nvgpu_init_list_node(&g->boardobj_head);
-	nvgpu_init_list_node(&g->boardobjgrp_head);
-}
-
-static void nvgpu_init_gr_vars(struct gk20a *g)
-{
-	gk20a_init_gr(g);
-
-	nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
-	g->gr.max_comptag_mem = totalram_pages
-				 >> (10 - (PAGE_SHIFT - 10));
-}
-
-static void nvgpu_init_timeout(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	g->timeouts_disabled_by_user = false;
-	nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
-
-	if (nvgpu_platform_is_silicon(g)) {
-		g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
-	} else if (nvgpu_platform_is_fpga(g)) {
-		g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
-	} else {
-		g->gr_idle_timeout_default = (u32)ULONG_MAX;
-	}
-	g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
-	g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
-}
-
-static void nvgpu_init_timeslice(struct gk20a *g)
-{
-	g->runlist_interleave = true;
-
-	g->timeslice_low_priority_us = 1300;
-	g->timeslice_medium_priority_us = 2600;
-	g->timeslice_high_priority_us = 5200;
-
-	g->min_timeslice_us = 1000;
-	g->max_timeslice_us = 50000;
-}
-
-static void nvgpu_init_pm_vars(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	/*
-	 * Set up initial power settings. For non-slicon platforms, disable
-	 * power features and for silicon platforms, read from platform data
-	 */
-	g->slcg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
-	g->blcg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
-	g->elcg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
-	g->elpg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
-	g->aelpg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
-	g->mscg_enabled =
-		nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
-	g->can_elpg =
-		nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
-
-	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
-		nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
-	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
-		nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
-	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
-		nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
-
-	g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
-	g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
-	g->has_syncpoints = platform->has_syncpoints;
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	g->has_cde = platform->has_cde;
-#endif
-	g->ptimer_src_freq = platform->ptimer_src_freq;
-	g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
-	g->can_railgate = platform->can_railgate_init;
-	g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
-	/* if default delay is not set, set default delay to 500msec */
-	if (platform->railgate_delay_init)
-		g->railgate_delay = platform->railgate_delay_init;
-	else
-		g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
-	__nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
-
-	/* set default values to aelpg parameters */
-	g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
-	g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
-	g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
-	g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
-	g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
-
-	__nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
-}
-
-static void nvgpu_init_vbios_vars(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	__nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
-	g->vbios_min_version = platform->vbios_min_version;
-}
-
-static void  nvgpu_init_ltc_vars(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	g->ltc_streamid = platform->ltc_streamid;
-}
-
-static void nvgpu_init_mm_vars(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
-
-	g->mm.disable_bigpage = platform->disable_bigpage;
-	__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
-			    platform->honors_aperture);
-	__nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
-			    platform->unified_memory);
-	__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
-			    platform->unify_address_spaces);
-
-	nvgpu_mutex_init(&g->mm.tlb_lock);
-	nvgpu_mutex_init(&g->mm.priv_lock);
-}
-
-int nvgpu_probe(struct gk20a *g,
-		const char *debugfs_symlink,
-		const char *interface_name,
-		struct class *class)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int err = 0;
-
-	nvgpu_init_vars(g);
-	nvgpu_init_gr_vars(g);
-	nvgpu_init_timeout(g);
-	nvgpu_init_timeslice(g);
-	nvgpu_init_pm_vars(g);
-	nvgpu_init_vbios_vars(g);
-	nvgpu_init_ltc_vars(g);
-	err = nvgpu_init_soc_vars(g);
-	if (err) {
-		nvgpu_err(g, "init soc vars failed");
-		return err;
-	}
-
-	/* Initialize the platform interface. */
-	err = platform->probe(dev);
-	if (err) {
-		if (err == -EPROBE_DEFER)
-			nvgpu_info(g, "platform probe failed");
-		else
-			nvgpu_err(g, "platform probe failed");
-		return err;
-	}
-
-	nvgpu_init_mm_vars(g);
-
-	/* platform probe can defer do user init only if probe succeeds */
-	err = gk20a_user_init(dev, interface_name, class);
-	if (err)
-		return err;
-
-	if (platform->late_probe) {
-		err = platform->late_probe(dev);
-		if (err) {
-			nvgpu_err(g, "late probe failed");
-			return err;
-		}
-	}
-
-	nvgpu_create_sysfs(dev);
-	gk20a_debug_init(g, debugfs_symlink);
-
-	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
-	if (!g->dbg_regops_tmp_buf) {
-		nvgpu_err(g, "couldn't allocate regops tmp buf");
-		return -ENOMEM;
-	}
-	g->dbg_regops_tmp_buf_ops =
-		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
-
-	g->remove_support = gk20a_remove_support;
-
-	nvgpu_ref_init(&g->refcount);
-
-	return 0;
-}
-
-/**
- * cyclic_delta - Returns delta of cyclic integers a and b.
- *
- * @a - First integer
- * @b - Second integer
- *
- * Note: if a is ahead of b, delta is positive.
- */
-static int cyclic_delta(int a, int b)
-{
-	return a - b;
-}
-
-/**
- * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
- *
- * @g - The GPU to wait on.
- *
- * Waits until all interrupt handlers that have been scheduled to run have
- * completed.
- */
-void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
-	int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
-
-	/* wait until all stalling irqs are handled */
-	NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
-		   cyclic_delta(stall_irq_threshold,
-				atomic_read(&l->sw_irq_stall_last_handled))
-		   <= 0, 0);
-
-	/* wait until all non-stalling irqs are handled */
-	NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
-		   cyclic_delta(nonstall_irq_threshold,
-				atomic_read(&l->sw_irq_nonstall_last_handled))
-		   <= 0, 0);
-}
-
-static void nvgpu_free_gk20a(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	kfree(l);
-}
-
-void nvgpu_init_gk20a(struct gk20a *g)
-{
-	g->free = nvgpu_free_gk20a;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.h b/drivers/gpu/nvgpu/common/linux/driver_common.h
deleted file mode 100644
index 6f42f775..00000000
--- a/drivers/gpu/nvgpu/common/linux/driver_common.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef NVGPU_LINUX_DRIVER_COMMON
-#define NVGPU_LINUX_DRIVER_COMMON
-
-void nvgpu_init_gk20a(struct gk20a *g);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/dt.c b/drivers/gpu/nvgpu/common/linux/dt.c
deleted file mode 100644
index 88e391e3..00000000
--- a/drivers/gpu/nvgpu/common/linux/dt.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/dt.h>
-#include <linux/of.h>
-
-#include "os_linux.h"
-
-int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
-				u32 index, u32 *value)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct device_node *np = dev->of_node;
-
-	return of_property_read_u32_index(np, name, index, value);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/firmware.c b/drivers/gpu/nvgpu/common/linux/firmware.c
deleted file mode 100644
index 9a4dc653..00000000
--- a/drivers/gpu/nvgpu/common/linux/firmware.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/firmware.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/firmware.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "os_linux.h"
-
-static const struct firmware *do_request_firmware(struct device *dev,
-		const char *prefix, const char *fw_name, int flags)
-{
-	const struct firmware *fw;
-	char *fw_path = NULL;
-	int path_len, err;
-
-	if (prefix) {
-		path_len = strlen(prefix) + strlen(fw_name);
-		path_len += 2; /* for the path separator and zero terminator*/
-
-		fw_path = nvgpu_kzalloc(get_gk20a(dev),
-					sizeof(*fw_path) * path_len);
-		if (!fw_path)
-			return NULL;
-
-		sprintf(fw_path, "%s/%s", prefix, fw_name);
-		fw_name = fw_path;
-	}
-
-	if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
-		err = request_firmware_direct(&fw, fw_name, dev);
-	else
-		err = request_firmware(&fw, fw_name, dev);
-
-	nvgpu_kfree(get_gk20a(dev), fw_path);
-	if (err)
-		return NULL;
-	return fw;
-}
-
-/* This is a simple wrapper around request_firmware that takes 'fw_name' and
- * applies an IP specific relative path prefix to it. The caller is
- * responsible for calling nvgpu_release_firmware later. */
-struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
-					      const char *fw_name,
-					      int flags)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct nvgpu_firmware *fw;
-	const struct firmware *linux_fw;
-
-	/* current->fs is NULL when calling from SYS_EXIT.
-	   Add a check here to prevent crash in request_firmware */
-	if (!current->fs || !fw_name)
-		return NULL;
-
-	fw = nvgpu_kzalloc(g, sizeof(*fw));
-	if (!fw)
-		return NULL;
-
-	linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
-
-#ifdef CONFIG_TEGRA_GK20A
-	/* TO BE REMOVED - Support loading from legacy SOC specific path. */
-	if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
-		struct gk20a_platform *platform = gk20a_get_platform(dev);
-		linux_fw = do_request_firmware(dev,
-				platform->soc_name, fw_name, flags);
-	}
-#endif
-
-	if (!linux_fw)
-		goto err;
-
-	fw->data = nvgpu_kmalloc(g, linux_fw->size);
-	if (!fw->data)
-		goto err_release;
-
-	memcpy(fw->data, linux_fw->data, linux_fw->size);
-	fw->size = linux_fw->size;
-
-	release_firmware(linux_fw);
-
-	return fw;
-
-err_release:
-	release_firmware(linux_fw);
-err:
-	nvgpu_kfree(g, fw);
-	return NULL;
-}
-
-void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
-{
-	if(!fw)
-		return;
-
-	nvgpu_kfree(g, fw->data);
-	nvgpu_kfree(g, fw);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/fuse.c b/drivers/gpu/nvgpu/common/linux/fuse.c
deleted file mode 100644
index 27851f92..00000000
--- a/drivers/gpu/nvgpu/common/linux/fuse.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <soc/tegra/fuse.h>
-
-#include <nvgpu/fuse.h>
-
-int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
-{
-	return tegra_sku_info.gpu_speedo_id;
-}
-
-/*
- * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
- * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
- */
-void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
-{
-	tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
-}
-
-void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
-{
-	tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
-}
-
-void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
-{
-	tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
-}
-
-void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
-{
-	tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
-}
-
-int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
-{
-	return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
-}
-
-int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
-{
-	return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/intr.c b/drivers/gpu/nvgpu/common/linux/intr.c
deleted file mode 100644
index 7ffc7e87..00000000
--- a/drivers/gpu/nvgpu/common/linux/intr.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <trace/events/gk20a.h>
-#include <linux/irqreturn.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/mc_gk20a.h"
-
-#include <nvgpu/atomic.h>
-#include <nvgpu/unit.h>
-#include "os_linux.h"
-
-irqreturn_t nvgpu_intr_stall(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 mc_intr_0;
-
-	trace_mc_gk20a_intr_stall(g->name);
-
-	if (!g->power_on)
-		return IRQ_NONE;
-
-	/* not from gpu when sharing irq with others */
-	mc_intr_0 = g->ops.mc.intr_stall(g);
-	if (unlikely(!mc_intr_0))
-		return IRQ_NONE;
-
-	g->ops.mc.intr_stall_pause(g);
-
-	atomic_inc(&l->hw_irq_stall_count);
-
-	trace_mc_gk20a_intr_stall_done(g->name);
-
-	return IRQ_WAKE_THREAD;
-}
-
-irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int hw_irq_count;
-
-	nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
-
-	trace_mc_gk20a_intr_thread_stall(g->name);
-
-	hw_irq_count = atomic_read(&l->hw_irq_stall_count);
-	g->ops.mc.isr_stall(g);
-	g->ops.mc.intr_stall_resume(g);
-	/* sync handled irq counter before re-enabling interrupts */
-	atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
-
-	nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
-
-	trace_mc_gk20a_intr_thread_stall_done(g->name);
-
-	return IRQ_HANDLED;
-}
-
-irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
-{
-	u32 non_stall_intr_val;
-	u32 hw_irq_count;
-	int ops_old, ops_new, ops = 0;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (!g->power_on)
-		return IRQ_NONE;
-
-	/* not from gpu when sharing irq with others */
-	non_stall_intr_val = g->ops.mc.intr_nonstall(g);
-	if (unlikely(!non_stall_intr_val))
-		return IRQ_NONE;
-
-	g->ops.mc.intr_nonstall_pause(g);
-
-	ops = g->ops.mc.isr_nonstall(g);
-	if (ops) {
-		do {
-			ops_old = atomic_read(&l->nonstall_ops);
-			ops_new  = ops_old | ops;
-		} while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
-						ops_old, ops_new));
-
-		queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
-	}
-
-	hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
-
-	/* sync handled irq counter before re-enabling interrupts */
-	atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
-
-	g->ops.mc.intr_nonstall_resume(g);
-
-	nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
-
-	return IRQ_HANDLED;
-}
-
-void nvgpu_intr_nonstall_cb(struct work_struct *work)
-{
-	struct nvgpu_os_linux *l =
-		container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
-	struct gk20a *g = &l->g;
-
-	do {
-		u32 ops;
-
-		ops = atomic_xchg(&l->nonstall_ops, 0);
-		mc_gk20a_handle_intr_nonstall(g, ops);
-	} while (atomic_read(&l->nonstall_ops) != 0);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/intr.h b/drivers/gpu/nvgpu/common/linux/intr.h
deleted file mode 100644
index d43cdccb..00000000
--- a/drivers/gpu/nvgpu/common/linux/intr.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __NVGPU_LINUX_INTR_H__
-#define __NVGPU_LINUX_INTR_H__
-struct gk20a;
-
-irqreturn_t nvgpu_intr_stall(struct gk20a *g);
-irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
-irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
-void nvgpu_intr_nonstall_cb(struct work_struct *work);
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c
deleted file mode 100644
index c06512a5..00000000
--- a/drivers/gpu/nvgpu/common/linux/io.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <nvgpu/io.h>
-#include <nvgpu/types.h>
-
-#include "os_linux.h"
-#include "gk20a/gk20a.h"
-
-void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (unlikely(!l->regs)) {
-		__gk20a_warn_on_no_regs();
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
-	} else {
-		writel_relaxed(v, l->regs + r);
-		nvgpu_wmb();
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
-	}
-}
-
-u32 nvgpu_readl(struct gk20a *g, u32 r)
-{
-	u32 v = __nvgpu_readl(g, r);
-
-	if (v == 0xffffffff)
-		__nvgpu_check_gpu_state(g);
-
-	return v;
-}
-
-u32 __nvgpu_readl(struct gk20a *g, u32 r)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 v = 0xffffffff;
-
-	if (unlikely(!l->regs)) {
-		__gk20a_warn_on_no_regs();
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
-	} else {
-		v = readl(l->regs + r);
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
-	}
-
-	return v;
-}
-
-void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (unlikely(!l->regs)) {
-		__gk20a_warn_on_no_regs();
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
-	} else {
-		nvgpu_wmb();
-		do {
-			writel_relaxed(v, l->regs + r);
-		} while (readl(l->regs + r) != v);
-		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
-	}
-}
-
-void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (unlikely(!l->bar1)) {
-		__gk20a_warn_on_no_regs();
-		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
-	} else {
-		nvgpu_wmb();
-		writel_relaxed(v, l->bar1 + b);
-		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
-	}
-}
-
-u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 v = 0xffffffff;
-
-	if (unlikely(!l->bar1)) {
-		__gk20a_warn_on_no_regs();
-		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
-	} else {
-		v = readl(l->bar1 + b);
-		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
-	}
-
-	return v;
-}
-
-bool nvgpu_io_exists(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	return l->regs != NULL;
-}
-
-bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	return r < resource_size(l->regs);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/io_usermode.c b/drivers/gpu/nvgpu/common/linux/io_usermode.c
deleted file mode 100644
index a7b728dd..00000000
--- a/drivers/gpu/nvgpu/common/linux/io_usermode.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <nvgpu/io.h>
-#include <nvgpu/types.h>
-
-#include "common/linux/os_linux.h"
-#include "gk20a/gk20a.h"
-
-#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
-
-void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
-
-	writel_relaxed(v, reg);
-	nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.c b/drivers/gpu/nvgpu/common/linux/ioctl.c
deleted file mode 100644
index 359e5103..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * NVGPU IOCTLs
- *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/file.h>
-
-#include <nvgpu/nvgpu_common.h>
-#include <nvgpu/ctxsw_trace.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/dbg_gpu_gk20a.h"
-
-#include "ioctl_channel.h"
-#include "ioctl_ctrl.h"
-#include "ioctl_as.h"
-#include "ioctl_tsg.h"
-#include "ioctl_dbg.h"
-#include "module.h"
-#include "os_linux.h"
-#include "ctxsw_trace.h"
-#include "platform_gk20a.h"
-
-#define GK20A_NUM_CDEVS 7
-
-const struct file_operations gk20a_channel_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_channel_release,
-	.open = gk20a_channel_open,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_channel_ioctl,
-#endif
-	.unlocked_ioctl = gk20a_channel_ioctl,
-};
-
-static const struct file_operations gk20a_ctrl_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_ctrl_dev_release,
-	.open = gk20a_ctrl_dev_open,
-	.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_ctrl_dev_ioctl,
-#endif
-};
-
-static const struct file_operations gk20a_dbg_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_dbg_gpu_dev_release,
-	.open = gk20a_dbg_gpu_dev_open,
-	.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
-	.poll = gk20a_dbg_gpu_dev_poll,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
-#endif
-};
-
-static const struct file_operations gk20a_as_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_as_dev_release,
-	.open = gk20a_as_dev_open,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_as_dev_ioctl,
-#endif
-	.unlocked_ioctl = gk20a_as_dev_ioctl,
-};
-
-/*
- * Note: We use a different 'open' to trigger handling of the profiler session.
- * Most of the code is shared between them...  Though, at some point if the
- * code does get too tangled trying to handle each in the same path we can
- * separate them cleanly.
- */
-static const struct file_operations gk20a_prof_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_dbg_gpu_dev_release,
-	.open = gk20a_prof_gpu_dev_open,
-	.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
-#endif
-};
-
-static const struct file_operations gk20a_tsg_ops = {
-	.owner = THIS_MODULE,
-	.release = nvgpu_ioctl_tsg_dev_release,
-	.open = nvgpu_ioctl_tsg_dev_open,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
-#endif
-	.unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
-};
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-static const struct file_operations gk20a_ctxsw_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_ctxsw_dev_release,
-	.open = gk20a_ctxsw_dev_open,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_ctxsw_dev_ioctl,
-#endif
-	.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
-	.poll = gk20a_ctxsw_dev_poll,
-	.read = gk20a_ctxsw_dev_read,
-	.mmap = gk20a_ctxsw_dev_mmap,
-};
-#endif
-
-static const struct file_operations gk20a_sched_ops = {
-	.owner = THIS_MODULE,
-	.release = gk20a_sched_dev_release,
-	.open = gk20a_sched_dev_open,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = gk20a_sched_dev_ioctl,
-#endif
-	.unlocked_ioctl = gk20a_sched_dev_ioctl,
-	.poll = gk20a_sched_dev_poll,
-	.read = gk20a_sched_dev_read,
-};
-
-static int gk20a_create_device(
-	struct device *dev, int devno,
-	const char *interface_name, const char *cdev_name,
-	struct cdev *cdev, struct device **out,
-	const struct file_operations *ops,
-	struct class *class)
-{
-	struct device *subdev;
-	int err;
-	struct gk20a *g = gk20a_from_dev(dev);
-
-	nvgpu_log_fn(g, " ");
-
-	cdev_init(cdev, ops);
-	cdev->owner = THIS_MODULE;
-
-	err = cdev_add(cdev, devno, 1);
-	if (err) {
-		dev_err(dev, "failed to add %s cdev\n", cdev_name);
-		return err;
-	}
-
-	subdev = device_create(class, NULL, devno, NULL,
-		interface_name, cdev_name);
-
-	if (IS_ERR(subdev)) {
-		err = PTR_ERR(dev);
-		cdev_del(cdev);
-		dev_err(dev, "failed to create %s device for %s\n",
-			cdev_name, dev_name(dev));
-		return err;
-	}
-
-	*out = subdev;
-	return 0;
-}
-
-void gk20a_user_deinit(struct device *dev, struct class *class)
-{
-	struct gk20a *g = gk20a_from_dev(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (l->channel.node) {
-		device_destroy(class, l->channel.cdev.dev);
-		cdev_del(&l->channel.cdev);
-	}
-
-	if (l->as_dev.node) {
-		device_destroy(class, l->as_dev.cdev.dev);
-		cdev_del(&l->as_dev.cdev);
-	}
-
-	if (l->ctrl.node) {
-		device_destroy(class, l->ctrl.cdev.dev);
-		cdev_del(&l->ctrl.cdev);
-	}
-
-	if (l->dbg.node) {
-		device_destroy(class, l->dbg.cdev.dev);
-		cdev_del(&l->dbg.cdev);
-	}
-
-	if (l->prof.node) {
-		device_destroy(class, l->prof.cdev.dev);
-		cdev_del(&l->prof.cdev);
-	}
-
-	if (l->tsg.node) {
-		device_destroy(class, l->tsg.cdev.dev);
-		cdev_del(&l->tsg.cdev);
-	}
-
-	if (l->ctxsw.node) {
-		device_destroy(class, l->ctxsw.cdev.dev);
-		cdev_del(&l->ctxsw.cdev);
-	}
-
-	if (l->sched.node) {
-		device_destroy(class, l->sched.cdev.dev);
-		cdev_del(&l->sched.cdev);
-	}
-
-	if (l->cdev_region)
-		unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
-}
-
-int gk20a_user_init(struct device *dev, const char *interface_name,
-		    struct class *class)
-{
-	int err;
-	dev_t devno;
-	struct gk20a *g = gk20a_from_dev(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
-	if (err) {
-		dev_err(dev, "failed to allocate devno\n");
-		goto fail;
-	}
-	l->cdev_region = devno;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "",
-				  &l->channel.cdev, &l->channel.node,
-				  &gk20a_channel_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-as",
-				  &l->as_dev.cdev, &l->as_dev.node,
-				  &gk20a_as_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
-				  &l->ctrl.cdev, &l->ctrl.node,
-				  &gk20a_ctrl_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
-				  &l->dbg.cdev, &l->dbg.node,
-				  &gk20a_dbg_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-prof",
-				  &l->prof.cdev, &l->prof.node,
-				  &gk20a_prof_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
-				  &l->tsg.cdev, &l->tsg.node,
-				  &gk20a_tsg_ops,
-				  class);
-	if (err)
-		goto fail;
-
-#if defined(CONFIG_GK20A_CTXSW_TRACE)
-	err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
-				  &l->ctxsw.cdev, &l->ctxsw.node,
-				  &gk20a_ctxsw_ops,
-				  class);
-	if (err)
-		goto fail;
-#endif
-
-	err = gk20a_create_device(dev, devno++, interface_name, "-sched",
-				  &l->sched.cdev, &l->sched.node,
-				  &gk20a_sched_ops,
-				  class);
-	if (err)
-		goto fail;
-
-	return 0;
-fail:
-	gk20a_user_deinit(dev, &nvgpu_class);
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.h b/drivers/gpu/nvgpu/common/linux/ioctl.h
deleted file mode 100644
index 7bf16711..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef __NVGPU_IOCTL_H__
-#define __NVGPU_IOCTL_H__
-
-struct device;
-struct class;
-
-int gk20a_user_init(struct device *dev, const char *interface_name,
-		    struct class *class);
-void gk20a_user_deinit(struct device *dev, struct class *class);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
deleted file mode 100644
index 47f612cc..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * GK20A Address Spaces
- *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <linux/cdev.h>
-#include <linux/uaccess.h>
-#include <linux/fs.h>
-
-#include <trace/events/gk20a.h>
-
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/gmmu.h>
-#include <nvgpu/vm_area.h>
-#include <nvgpu/log2.h>
-
-#include <nvgpu/linux/vm.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "ioctl_as.h"
-#include "os_linux.h"
-
-static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
-{
-	u32 core_flags = 0;
-
-	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
-		core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
-	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
-		core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
-
-	return core_flags;
-}
-
-static int gk20a_as_ioctl_bind_channel(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_bind_channel_args *args)
-{
-	int err = 0;
-	struct channel_gk20a *ch;
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-
-	nvgpu_log_fn(g, " ");
-
-	ch = gk20a_get_channel_from_file(args->channel_fd);
-	if (!ch)
-		return -EINVAL;
-
-	if (gk20a_channel_as_bound(ch)) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* this will set channel_gk20a->vm */
-	err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
-
-out:
-	gk20a_channel_put(ch);
-	return err;
-}
-
-static int gk20a_as_ioctl_alloc_space(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_alloc_space_args *args)
-{
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-
-	nvgpu_log_fn(g, " ");
-	return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
-				   &args->o_a.offset,
-				   gk20a_as_translate_as_alloc_space_flags(g,
-								args->flags));
-}
-
-static int gk20a_as_ioctl_free_space(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_free_space_args *args)
-{
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-
-	nvgpu_log_fn(g, " ");
-	return nvgpu_vm_area_free(as_share->vm, args->offset);
-}
-
-static int gk20a_as_ioctl_map_buffer_ex(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_map_buffer_ex_args *args)
-{
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-
-	nvgpu_log_fn(g, " ");
-
-	/* unsupported, direct kind control must be used */
-	if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
-		struct gk20a *g = as_share->vm->mm->g;
-		nvgpu_log_info(g, "Direct kind control must be requested");
-		return -EINVAL;
-	}
-
-	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
-				   &args->offset, args->flags,
-				   args->compr_kind,
-				   args->incompr_kind,
-				   args->buffer_offset,
-				   args->mapping_size,
-				   NULL);
-}
-
-static int gk20a_as_ioctl_unmap_buffer(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_unmap_buffer_args *args)
-{
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
-
-	return 0;
-}
-
-static int gk20a_as_ioctl_map_buffer_batch(
-	struct gk20a_as_share *as_share,
-	struct nvgpu_as_map_buffer_batch_args *args)
-{
-	struct gk20a *g = gk20a_from_vm(as_share->vm);
-	u32 i;
-	int err = 0;
-
-	struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
-		(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
-		args->unmaps;
-	struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
-		(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
-		args->maps;
-
-	struct vm_gk20a_mapping_batch batch;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
-	    args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
-		return -EINVAL;
-
-	nvgpu_vm_mapping_batch_start(&batch);
-
-	for (i = 0; i < args->num_unmaps; ++i) {
-		struct nvgpu_as_unmap_buffer_args unmap_args;
-
-		if (copy_from_user(&unmap_args, &user_unmap_args[i],
-				   sizeof(unmap_args))) {
-			err = -EFAULT;
-			break;
-		}
-
-		nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
-	}
-
-	if (err) {
-		nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
-
-		args->num_unmaps = i;
-		args->num_maps = 0;
-		return err;
-	}
-
-	for (i = 0; i < args->num_maps; ++i) {
-		s16 compressible_kind;
-		s16 incompressible_kind;
-
-		struct nvgpu_as_map_buffer_ex_args map_args;
-		memset(&map_args, 0, sizeof(map_args));
-
-		if (copy_from_user(&map_args, &user_map_args[i],
-				   sizeof(map_args))) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (map_args.flags &
-		    NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
-			compressible_kind = map_args.compr_kind;
-			incompressible_kind = map_args.incompr_kind;
-		} else {
-			/* direct kind control must be used */
-			err = -EINVAL;
-			break;
-		}
-
-		err = nvgpu_vm_map_buffer(
-			as_share->vm, map_args.dmabuf_fd,
-			&map_args.offset, map_args.flags,
-			compressible_kind, incompressible_kind,
-			map_args.buffer_offset,
-			map_args.mapping_size,
-			&batch);
-		if (err)
-			break;
-	}
-
-	nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
-
-	if (err)
-		args->num_maps = i;
-	/* note: args->num_unmaps will be unmodified, which is ok
-	 * since all unmaps are done */
-
-	return err;
-}
-
-static int gk20a_as_ioctl_get_va_regions(
-		struct gk20a_as_share *as_share,
-		struct nvgpu_as_get_va_regions_args *args)
-{
-	unsigned int i;
-	unsigned int write_entries;
-	struct nvgpu_as_va_region __user *user_region_ptr;
-	struct vm_gk20a *vm = as_share->vm;
-	struct gk20a *g = gk20a_from_vm(vm);
-	unsigned int page_sizes = gmmu_page_size_kernel;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!vm->big_pages)
-		page_sizes--;
-
-	write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
-	if (write_entries > page_sizes)
-		write_entries = page_sizes;
-
-	user_region_ptr =
-		(struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
-
-	for (i = 0; i < write_entries; ++i) {
-		struct nvgpu_as_va_region region;
-		struct nvgpu_allocator *vma = vm->vma[i];
-
-		memset(&region, 0, sizeof(struct nvgpu_as_va_region));
-
-		region.page_size = vm->gmmu_page_sizes[i];
-		region.offset = nvgpu_alloc_base(vma);
-		/* No __aeabi_uldivmod() on some platforms... */
-		region.pages = (nvgpu_alloc_end(vma) -
-			nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
-
-		if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
-			return -EFAULT;
-	}
-
-	args->buf_size =
-		page_sizes * sizeof(struct nvgpu_as_va_region);
-
-	return 0;
-}
-
-static int nvgpu_as_ioctl_get_sync_ro_map(
-	struct gk20a_as_share *as_share,
-	struct nvgpu_as_get_sync_ro_map_args *args)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct vm_gk20a *vm = as_share->vm;
-	struct gk20a *g = gk20a_from_vm(vm);
-	u64 base_gpuva;
-	u32 sync_size;
-	int err = 0;
-
-	if (!g->ops.fifo.get_sync_ro_map)
-		return -EINVAL;
-
-	if (!gk20a_platform_has_syncpoints(g))
-		return -EINVAL;
-
-	err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
-	if (err)
-		return err;
-
-	args->base_gpuva = base_gpuva;
-	args->sync_size = sync_size;
-
-	return err;
-#else
-	return -EINVAL;
-#endif
-}
-
-int gk20a_as_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l;
-	struct gk20a_as_share *as_share;
-	struct gk20a *g;
-	int err;
-
-	l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
-	g = &l->g;
-
-	nvgpu_log_fn(g, " ");
-
-	err = gk20a_as_alloc_share(g, 0, 0, &as_share);
-	if (err) {
-		nvgpu_log_fn(g, "failed to alloc share");
-		return err;
-	}
-
-	filp->private_data = as_share;
-	return 0;
-}
-
-int gk20a_as_dev_release(struct inode *inode, struct file *filp)
-{
-	struct gk20a_as_share *as_share = filp->private_data;
-
-	if (!as_share)
-		return 0;
-
-	return gk20a_as_release_share(as_share);
-}
-
-long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct gk20a_as_share *as_share = filp->private_data;
-	struct gk20a *g = gk20a_from_as(as_share->as);
-
-	u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
-
-	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
-		(_IOC_NR(cmd) == 0) ||
-		(_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
-		(_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	switch (cmd) {
-	case NVGPU_AS_IOCTL_BIND_CHANNEL:
-		trace_gk20a_as_ioctl_bind_channel(g->name);
-		err = gk20a_as_ioctl_bind_channel(as_share,
-			       (struct nvgpu_as_bind_channel_args *)buf);
-
-		break;
-	case NVGPU32_AS_IOCTL_ALLOC_SPACE:
-	{
-		struct nvgpu32_as_alloc_space_args *args32 =
-			(struct nvgpu32_as_alloc_space_args *)buf;
-		struct nvgpu_as_alloc_space_args args;
-
-		args.pages = args32->pages;
-		args.page_size = args32->page_size;
-		args.flags = args32->flags;
-		args.o_a.offset = args32->o_a.offset;
-		trace_gk20a_as_ioctl_alloc_space(g->name);
-		err = gk20a_as_ioctl_alloc_space(as_share, &args);
-		args32->o_a.offset = args.o_a.offset;
-		break;
-	}
-	case NVGPU_AS_IOCTL_ALLOC_SPACE:
-		trace_gk20a_as_ioctl_alloc_space(g->name);
-		err = gk20a_as_ioctl_alloc_space(as_share,
-				(struct nvgpu_as_alloc_space_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_FREE_SPACE:
-		trace_gk20a_as_ioctl_free_space(g->name);
-		err = gk20a_as_ioctl_free_space(as_share,
-				(struct nvgpu_as_free_space_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
-		trace_gk20a_as_ioctl_map_buffer(g->name);
-		err = gk20a_as_ioctl_map_buffer_ex(as_share,
-				(struct nvgpu_as_map_buffer_ex_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_UNMAP_BUFFER:
-		trace_gk20a_as_ioctl_unmap_buffer(g->name);
-		err = gk20a_as_ioctl_unmap_buffer(as_share,
-				(struct nvgpu_as_unmap_buffer_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_GET_VA_REGIONS:
-		trace_gk20a_as_ioctl_get_va_regions(g->name);
-		err = gk20a_as_ioctl_get_va_regions(as_share,
-				(struct nvgpu_as_get_va_regions_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
-		err = gk20a_as_ioctl_map_buffer_batch(as_share,
-				(struct nvgpu_as_map_buffer_batch_args *)buf);
-		break;
-	case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
-		err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
-			(struct nvgpu_as_get_sync_ro_map_args *)buf);
-		break;
-	default:
-		err = -ENOTTY;
-		break;
-	}
-
-	gk20a_idle(g);
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
-			err = -EFAULT;
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.h b/drivers/gpu/nvgpu/common/linux/ioctl_as.h
deleted file mode 100644
index b3de3782..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * GK20A Address Spaces
- *
- * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef __NVGPU_COMMON_LINUX_AS_H__
-#define __NVGPU_COMMON_LINUX_AS_H__
-
-struct inode;
-struct file;
-
-/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
- * num_maps */
-#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT	256
-
-/* struct file_operations driver interface */
-int gk20a_as_dev_open(struct inode *inode, struct file *filp);
-int gk20a_as_dev_release(struct inode *inode, struct file *filp);
-long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
deleted file mode 100644
index b04bb9de..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ /dev/null
@@ -1,1388 +0,0 @@
-/*
- * GK20A Graphics channel
- *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <trace/events/gk20a.h>
-#include <linux/file.h>
-#include <linux/anon_inodes.h>
-#include <linux/dma-buf.h>
-#include <linux/poll.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/semaphore.h>
-#include <nvgpu/timers.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/list.h>
-#include <nvgpu/debug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/error_notifier.h>
-#include <nvgpu/barrier.h>
-#include <nvgpu/nvhost.h>
-#include <nvgpu/os_sched.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/dbg_gpu_gk20a.h"
-#include "gk20a/fence_gk20a.h"
-
-#include "platform_gk20a.h"
-#include "ioctl_channel.h"
-#include "channel.h"
-#include "os_linux.h"
-#include "ctxsw_trace.h"
-
-/* the minimal size of client buffer */
-#define CSS_MIN_CLIENT_SNAPSHOT_SIZE				\
-		(sizeof(struct gk20a_cs_snapshot_fifo) +	\
-		sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
-
-static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
-{
-	switch (graphics_preempt_mode) {
-	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
-		return "WFI";
-	default:
-		return "?";
-	}
-}
-
-static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode)
-{
-	switch (compute_preempt_mode) {
-	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
-		return "WFI";
-	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
-		return "CTA";
-	default:
-		return "?";
-	}
-}
-
-static void gk20a_channel_trace_sched_param(
-	void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice,
-		u32 timeout, const char *interleave,
-		const char *graphics_preempt_mode,
-		const char *compute_preempt_mode),
-	struct channel_gk20a *ch)
-{
-	struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
-
-	if (!tsg)
-		return;
-
-	(trace)(ch->chid, ch->tsgid, ch->pid,
-		tsg_gk20a_from_ch(ch)->timeslice_us,
-		ch->timeout_ms_max,
-		gk20a_fifo_interleave_level_name(tsg->interleave_level),
-		gr_gk20a_graphics_preempt_mode_name(
-			tsg->gr_ctx.graphics_preempt_mode),
-		gr_gk20a_compute_preempt_mode_name(
-			tsg->gr_ctx.compute_preempt_mode));
-}
-
-/*
- * Although channels do have pointers back to the gk20a struct that they were
- * created under in cases where the driver is killed that pointer can be bad.
- * The channel memory can be freed before the release() function for a given
- * channel is called. This happens when the driver dies and userspace doesn't
- * get a chance to call release() until after the entire gk20a driver data is
- * unloaded and freed.
- */
-struct channel_priv {
-	struct gk20a *g;
-	struct channel_gk20a *c;
-};
-
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-
-void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	/* disable existing cyclestats buffer */
-	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
-	if (priv->cyclestate_buffer_handler) {
-		dma_buf_vunmap(priv->cyclestate_buffer_handler,
-				ch->cyclestate.cyclestate_buffer);
-		dma_buf_put(priv->cyclestate_buffer_handler);
-		priv->cyclestate_buffer_handler = NULL;
-		ch->cyclestate.cyclestate_buffer = NULL;
-		ch->cyclestate.cyclestate_buffer_size = 0;
-	}
-	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
-}
-
-static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
-		       struct nvgpu_cycle_stats_args *args)
-{
-	struct dma_buf *dmabuf;
-	void *virtual_address;
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	/* is it allowed to handle calls for current GPU? */
-	if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS))
-		return -ENOSYS;
-
-	if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
-
-		/* set up new cyclestats buffer */
-		dmabuf = dma_buf_get(args->dmabuf_fd);
-		if (IS_ERR(dmabuf))
-			return PTR_ERR(dmabuf);
-		virtual_address = dma_buf_vmap(dmabuf);
-		if (!virtual_address)
-			return -ENOMEM;
-
-		priv->cyclestate_buffer_handler = dmabuf;
-		ch->cyclestate.cyclestate_buffer = virtual_address;
-		ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
-		return 0;
-
-	} else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) {
-		gk20a_channel_free_cycle_stats_buffer(ch);
-		return 0;
-
-	} else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
-		/* no requst from GL */
-		return 0;
-
-	} else {
-		pr_err("channel already has cyclestats buffer\n");
-		return -EINVAL;
-	}
-}
-
-static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
-{
-	int ret;
-
-	nvgpu_mutex_acquire(&ch->cs_client_mutex);
-	if (ch->cs_client)
-		ret = gr_gk20a_css_flush(ch, ch->cs_client);
-	else
-		ret = -EBADF;
-	nvgpu_mutex_release(&ch->cs_client_mutex);
-
-	return ret;
-}
-
-static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
-				u32 dmabuf_fd,
-				u32 perfmon_id_count,
-				u32 *perfmon_id_start)
-{
-	int ret = 0;
-	struct gk20a *g = ch->g;
-	struct gk20a_cs_snapshot_client_linux *client_linux;
-	struct gk20a_cs_snapshot_client *client;
-
-	nvgpu_mutex_acquire(&ch->cs_client_mutex);
-	if (ch->cs_client) {
-		nvgpu_mutex_release(&ch->cs_client_mutex);
-		return -EEXIST;
-	}
-
-	client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
-	if (!client_linux) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	client_linux->dmabuf_fd   = dmabuf_fd;
-	client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
-	if (IS_ERR(client_linux->dma_handler)) {
-		ret = PTR_ERR(client_linux->dma_handler);
-		client_linux->dma_handler = NULL;
-		goto err_free;
-	}
-
-	client = &client_linux->cs_client;
-	client->snapshot_size = client_linux->dma_handler->size;
-	if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
-		ret = -ENOMEM;
-		goto err_put;
-	}
-
-	client->snapshot = (struct gk20a_cs_snapshot_fifo *)
-					dma_buf_vmap(client_linux->dma_handler);
-	if (!client->snapshot) {
-		ret = -ENOMEM;
-		goto err_put;
-	}
-
-	ch->cs_client = client;
-
-	ret = gr_gk20a_css_attach(ch,
-				perfmon_id_count,
-				perfmon_id_start,
-				ch->cs_client);
-
-	nvgpu_mutex_release(&ch->cs_client_mutex);
-
-	return ret;
-
-err_put:
-	dma_buf_put(client_linux->dma_handler);
-err_free:
-	nvgpu_kfree(g, client_linux);
-err:
-	nvgpu_mutex_release(&ch->cs_client_mutex);
-	return ret;
-}
-
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
-{
-	int ret;
-	struct gk20a_cs_snapshot_client_linux *client_linux;
-
-	nvgpu_mutex_acquire(&ch->cs_client_mutex);
-	if (!ch->cs_client) {
-		nvgpu_mutex_release(&ch->cs_client_mutex);
-		return 0;
-	}
-
-	client_linux = container_of(ch->cs_client,
-				struct gk20a_cs_snapshot_client_linux,
-				cs_client);
-
-	ret = gr_gk20a_css_detach(ch, ch->cs_client);
-
-	if (client_linux->dma_handler) {
-		if (ch->cs_client->snapshot)
-			dma_buf_vunmap(client_linux->dma_handler,
-					ch->cs_client->snapshot);
-		dma_buf_put(client_linux->dma_handler);
-	}
-
-	ch->cs_client = NULL;
-	nvgpu_kfree(ch->g, client_linux);
-
-	nvgpu_mutex_release(&ch->cs_client_mutex);
-
-	return ret;
-}
-
-static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
-			struct nvgpu_cycle_stats_snapshot_args *args)
-{
-	int ret;
-
-	/* is it allowed to handle calls for current GPU? */
-	if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT))
-		return -ENOSYS;
-
-	if (!args->dmabuf_fd)
-		return -EINVAL;
-
-	/* handle the command (most frequent cases first) */
-	switch (args->cmd) {
-	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
-		ret = gk20a_flush_cycle_stats_snapshot(ch);
-		args->extra = 0;
-		break;
-
-	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
-		ret = gk20a_attach_cycle_stats_snapshot(ch,
-						args->dmabuf_fd,
-						args->extra,
-						&args->extra);
-		break;
-
-	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
-		ret = gk20a_channel_free_cycle_stats_snapshot(ch);
-		args->extra = 0;
-		break;
-
-	default:
-		pr_err("cyclestats: unknown command %u\n", args->cmd);
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-#endif
-
-static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
-		struct nvgpu_channel_wdt_args *args)
-{
-	u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT |
-			NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
-
-	if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
-		ch->timeout.enabled = false;
-	else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
-		ch->timeout.enabled = true;
-	else
-		return -EINVAL;
-
-	if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
-		ch->timeout.limit_ms = args->timeout_ms;
-
-	ch->timeout.debug_dump = (args->wdt_status &
-			NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
-
-	return 0;
-}
-
-static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
-{
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
-	if (priv->error_notifier.dmabuf) {
-		dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
-		dma_buf_put(priv->error_notifier.dmabuf);
-		priv->error_notifier.dmabuf = NULL;
-		priv->error_notifier.notification = NULL;
-		priv->error_notifier.vaddr = NULL;
-	}
-	nvgpu_mutex_release(&priv->error_notifier.mutex);
-}
-
-static int gk20a_init_error_notifier(struct channel_gk20a *ch,
-		struct nvgpu_set_error_notifier *args)
-{
-	struct dma_buf *dmabuf;
-	void *va;
-	u64 end = args->offset + sizeof(struct nvgpu_notification);
-	struct nvgpu_channel_linux *priv = ch->os_priv;
-
-	if (!args->mem) {
-		pr_err("gk20a_init_error_notifier: invalid memory handle\n");
-		return -EINVAL;
-	}
-
-	dmabuf = dma_buf_get(args->mem);
-
-	gk20a_channel_free_error_notifiers(ch);
-
-	if (IS_ERR(dmabuf)) {
-		pr_err("Invalid handle: %d\n", args->mem);
-		return -EINVAL;
-	}
-
-	if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
-		dma_buf_put(dmabuf);
-		nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset");
-		return -EINVAL;
-	}
-
-	nvgpu_speculation_barrier();
-
-	/* map handle */
-	va = dma_buf_vmap(dmabuf);
-	if (!va) {
-		dma_buf_put(dmabuf);
-		pr_err("Cannot map notifier handle\n");
-		return -ENOMEM;
-	}
-
-	priv->error_notifier.notification = va + args->offset;
-	priv->error_notifier.vaddr = va;
-	memset(priv->error_notifier.notification, 0,
-		sizeof(struct nvgpu_notification));
-
-	/* set channel notifiers pointer */
-	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
-	priv->error_notifier.dmabuf = dmabuf;
-	nvgpu_mutex_release(&priv->error_notifier.mutex);
-
-	return 0;
-}
-
-/*
- * This returns the channel with a reference. The caller must
- * gk20a_channel_put() the ref back after use.
- *
- * NULL is returned if the channel was not found.
- */
-struct channel_gk20a *gk20a_get_channel_from_file(int fd)
-{
-	struct channel_gk20a *ch;
-	struct channel_priv *priv;
-	struct file *f = fget(fd);
-
-	if (!f)
-		return NULL;
-
-	if (f->f_op != &gk20a_channel_ops) {
-		fput(f);
-		return NULL;
-	}
-
-	priv = (struct channel_priv *)f->private_data;
-	ch = gk20a_channel_get(priv->c);
-	fput(f);
-	return ch;
-}
-
-int gk20a_channel_release(struct inode *inode, struct file *filp)
-{
-	struct channel_priv *priv = filp->private_data;
-	struct channel_gk20a *ch;
-	struct gk20a *g;
-
-	int err;
-
-	/* We could still end up here even if the channel_open failed, e.g.
-	 * if we ran out of hw channel IDs.
-	 */
-	if (!priv)
-		return 0;
-
-	ch = priv->c;
-	g = priv->g;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to release a channel!");
-		goto channel_release;
-	}
-
-	trace_gk20a_channel_release(dev_name(dev_from_gk20a(g)));
-
-	gk20a_channel_close(ch);
-	gk20a_channel_free_error_notifiers(ch);
-
-	gk20a_idle(g);
-
-channel_release:
-	gk20a_put(g);
-	nvgpu_kfree(g, filp->private_data);
-	filp->private_data = NULL;
-	return 0;
-}
-
-/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
-static int __gk20a_channel_open(struct gk20a *g,
-				struct file *filp, s32 runlist_id)
-{
-	int err;
-	struct channel_gk20a *ch;
-	struct channel_priv *priv;
-
-	nvgpu_log_fn(g, " ");
-
-	g = gk20a_get(g);
-	if (!g)
-		return -ENODEV;
-
-	trace_gk20a_channel_open(dev_name(dev_from_gk20a(g)));
-
-	priv = nvgpu_kzalloc(g, sizeof(*priv));
-	if (!priv) {
-		err = -ENOMEM;
-		goto free_ref;
-	}
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on, %d", err);
-		goto fail_busy;
-	}
-	/* All the user space channel should be non privilege */
-	ch = gk20a_open_new_channel(g, runlist_id, false,
-				nvgpu_current_pid(g), nvgpu_current_tid(g));
-	gk20a_idle(g);
-	if (!ch) {
-		nvgpu_err(g,
-			"failed to get f");
-		err = -ENOMEM;
-		goto fail_busy;
-	}
-
-	gk20a_channel_trace_sched_param(
-		trace_gk20a_channel_sched_defaults, ch);
-
-	priv->g = g;
-	priv->c = ch;
-
-	filp->private_data = priv;
-	return 0;
-
-fail_busy:
-	nvgpu_kfree(g, priv);
-free_ref:
-	gk20a_put(g);
-	return err;
-}
-
-int gk20a_channel_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
-			struct nvgpu_os_linux, channel.cdev);
-	struct gk20a *g = &l->g;
-	int ret;
-
-	nvgpu_log_fn(g, "start");
-	ret = __gk20a_channel_open(g, filp, -1);
-
-	nvgpu_log_fn(g, "end");
-	return ret;
-}
-
-int gk20a_channel_open_ioctl(struct gk20a *g,
-		struct nvgpu_channel_open_args *args)
-{
-	int err;
-	int fd;
-	struct file *file;
-	char name[64];
-	s32 runlist_id = args->in.runlist_id;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	err = get_unused_fd_flags(O_RDWR);
-	if (err < 0)
-		return err;
-	fd = err;
-
-	snprintf(name, sizeof(name), "nvhost-%s-fd%d",
-		 dev_name(dev_from_gk20a(g)), fd);
-
-	file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto clean_up;
-	}
-
-	err = __gk20a_channel_open(g, file, runlist_id);
-	if (err)
-		goto clean_up_file;
-
-	fd_install(fd, file);
-	args->out.channel_fd = fd;
-	return 0;
-
-clean_up_file:
-	fput(file);
-clean_up:
-	put_unused_fd(fd);
-	return err;
-}
-
-static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags)
-{
-	u32 flags = 0;
-
-	if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
-		flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR;
-
-	if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
-		flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC;
-
-	if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
-		flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE;
-
-	return flags;
-}
-
-static void nvgpu_get_gpfifo_ex_args(
-		struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args,
-		struct nvgpu_gpfifo_args *gpfifo_args)
-{
-	gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries;
-	gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs;
-	gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
-							alloc_gpfifo_ex_args->flags);
-}
-
-static void nvgpu_get_gpfifo_args(
-		struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args,
-		struct nvgpu_gpfifo_args *gpfifo_args)
-{
-	/*
-	 * Kernel can insert one extra gpfifo entry before user
-	 * submitted gpfifos and another one after, for internal usage.
-	 * Triple the requested size.
-	 */
-	gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3;
-	gpfifo_args->num_inflight_jobs = 0;
-	gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
-							alloc_gpfifo_args->flags);
-}
-
-static void nvgpu_get_fence_args(
-		struct nvgpu_fence *fence_args_in,
-		struct nvgpu_channel_fence *fence_args_out)
-{
-	fence_args_out->id = fence_args_in->id;
-	fence_args_out->value = fence_args_in->value;
-}
-
-static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
-					ulong id, u32 offset,
-					u32 payload, u32 timeout)
-{
-	struct dma_buf *dmabuf;
-	void *data;
-	u32 *semaphore;
-	int ret = 0;
-
-	/* do not wait if channel has timed out */
-	if (ch->has_timedout)
-		return -ETIMEDOUT;
-
-	dmabuf = dma_buf_get(id);
-	if (IS_ERR(dmabuf)) {
-		nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id);
-		return -EINVAL;
-	}
-
-	data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
-	if (!data) {
-		nvgpu_err(ch->g, "failed to map notifier memory");
-		ret = -EINVAL;
-		goto cleanup_put;
-	}
-
-	semaphore = data + (offset & ~PAGE_MASK);
-
-	ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
-			&ch->semaphore_wq,
-			*semaphore == payload || ch->has_timedout,
-			timeout);
-
-	dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
-cleanup_put:
-	dma_buf_put(dmabuf);
-	return ret;
-}
-
-static int gk20a_channel_wait(struct channel_gk20a *ch,
-			      struct nvgpu_wait_args *args)
-{
-	struct dma_buf *dmabuf;
-	struct gk20a *g = ch->g;
-	struct notification *notif;
-	struct timespec tv;
-	u64 jiffies;
-	ulong id;
-	u32 offset;
-	int remain, ret = 0;
-	u64 end;
-
-	nvgpu_log_fn(g, " ");
-
-	if (ch->has_timedout)
-		return -ETIMEDOUT;
-
-	switch (args->type) {
-	case NVGPU_WAIT_TYPE_NOTIFIER:
-		id = args->condition.notifier.dmabuf_fd;
-		offset = args->condition.notifier.offset;
-		end = offset + sizeof(struct notification);
-
-		dmabuf = dma_buf_get(id);
-		if (IS_ERR(dmabuf)) {
-			nvgpu_err(g, "invalid notifier nvmap handle 0x%lx",
-				   id);
-			return -EINVAL;
-		}
-
-		if (end > dmabuf->size || end < sizeof(struct notification)) {
-			dma_buf_put(dmabuf);
-			nvgpu_err(g, "invalid notifier offset");
-			return -EINVAL;
-		}
-
-		nvgpu_speculation_barrier();
-
-		notif = dma_buf_vmap(dmabuf);
-		if (!notif) {
-			nvgpu_err(g, "failed to map notifier memory");
-			return -ENOMEM;
-		}
-
-		notif = (struct notification *)((uintptr_t)notif + offset);
-
-		/* user should set status pending before
-		 * calling this ioctl */
-		remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
-				&ch->notifier_wq,
-				notif->status == 0 || ch->has_timedout,
-				args->timeout);
-
-		if (remain == 0 && notif->status != 0) {
-			ret = -ETIMEDOUT;
-			goto notif_clean_up;
-		} else if (remain < 0) {
-			ret = -EINTR;
-			goto notif_clean_up;
-		}
-
-		/* TBD: fill in correct information */
-		jiffies = get_jiffies_64();
-		jiffies_to_timespec(jiffies, &tv);
-		notif->timestamp.nanoseconds[0] = tv.tv_nsec;
-		notif->timestamp.nanoseconds[1] = tv.tv_sec;
-		notif->info32 = 0xDEADBEEF; /* should be object name */
-		notif->info16 = ch->chid; /* should be method offset */
-
-notif_clean_up:
-		dma_buf_vunmap(dmabuf, notif);
-		return ret;
-
-	case NVGPU_WAIT_TYPE_SEMAPHORE:
-		ret = gk20a_channel_wait_semaphore(ch,
-				args->condition.semaphore.dmabuf_fd,
-				args->condition.semaphore.offset,
-				args->condition.semaphore.payload,
-				args->timeout);
-
-		break;
-
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
-			    struct nvgpu_zcull_bind_args *args)
-{
-	struct gk20a *g = ch->g;
-	struct gr_gk20a *gr = &g->gr;
-
-	nvgpu_log_fn(gr->g, " ");
-
-	return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
-				args->gpu_va, args->mode);
-}
-
-static int gk20a_ioctl_channel_submit_gpfifo(
-	struct channel_gk20a *ch,
-	struct nvgpu_submit_gpfifo_args *args)
-{
-	struct nvgpu_channel_fence fence;
-	struct gk20a_fence *fence_out;
-	struct fifo_profile_gk20a *profile = NULL;
-	u32 submit_flags = 0;
-	int fd = -1;
-	struct gk20a *g = ch->g;
-
-	int ret = 0;
-	nvgpu_log_fn(g, " ");
-
-	profile = gk20a_fifo_profile_acquire(ch->g);
-	gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
-
-	if (ch->has_timedout)
-		return -ETIMEDOUT;
-
-	nvgpu_get_fence_args(&args->fence, &fence);
-	submit_flags =
-		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
-
-	/* Try and allocate an fd here*/
-	if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-		&& (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
-			fd = get_unused_fd_flags(O_RDWR);
-			if (fd < 0)
-				return fd;
-	}
-
-	ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
-					  submit_flags, &fence,
-					  &fence_out, profile);
-
-	if (ret) {
-		if (fd != -1)
-			put_unused_fd(fd);
-		goto clean_up;
-	}
-
-	/* Convert fence_out to something we can pass back to user space. */
-	if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
-		if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
-			ret = gk20a_fence_install_fd(fence_out, fd);
-			if (ret)
-				put_unused_fd(fd);
-			else
-				args->fence.id = fd;
-		} else {
-			args->fence.id = fence_out->syncpt_id;
-			args->fence.value = fence_out->syncpt_value;
-		}
-	}
-	gk20a_fence_put(fence_out);
-
-	gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
-	if (profile)
-		gk20a_fifo_profile_release(ch->g, profile);
-
-clean_up:
-	return ret;
-}
-
-/*
- * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_*
- * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_*
- */
-u32 nvgpu_get_common_runlist_level(u32 level)
-{
-	switch (level) {
-	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
-		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
-	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
-		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM;
-	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
-		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
-	default:
-		pr_err("%s: incorrect runlist level\n", __func__);
-	}
-
-	return level;
-}
-
-static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
-{
-	u32 flags = 0;
-
-	if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
-		flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP;
-
-	if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
-		flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP;
-
-	return flags;
-}
-
-static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch,
-	u32 class_num, u32 user_flags)
-{
-	return ch->g->ops.gr.alloc_obj_ctx(ch, class_num,
-			nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
-}
-
-/*
- * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
- * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
- */
-u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags)
-{
-	u32 flags = 0;
-
-	if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI)
-		flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
-	if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)
-		flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
-
-	return flags;
-}
-
-/*
- * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
- * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
- */
-u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags)
-{
-	u32 flags = 0;
-
-	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI)
-		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
-	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA)
-		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
-	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP)
-		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
-
-	return flags;
-}
-
-/*
- * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
- * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
- */
-u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode)
-{
-	switch (graphics_preempt_mode) {
-	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
-		return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
-	case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
-		return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
-	}
-
-	return graphics_preempt_mode;
-}
-
-/*
- * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
- * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
- */
-u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
-{
-	switch (compute_preempt_mode) {
-	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
-		return NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
-	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
-		return NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
-	case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
-		return NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
-	}
-
-	return compute_preempt_mode;
-}
-
-/*
- * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
- * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
- */
-static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
-{
-	switch (graphics_preempt_mode) {
-	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
-		return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
-	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
-		return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
-	}
-
-	return graphics_preempt_mode;
-}
-
-/*
- * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
- * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
- */
-static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
-{
-	switch (compute_preempt_mode) {
-	case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
-		return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
-	case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
-		return NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
-	case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
-		return NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
-	}
-
-	return compute_preempt_mode;
-}
-
-static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch,
-	u32 graphics_preempt_mode, u32 compute_preempt_mode)
-{
-	int err;
-
-	if (ch->g->ops.gr.set_preemption_mode) {
-		err = gk20a_busy(ch->g);
-		if (err) {
-			nvgpu_err(ch->g, "failed to power on, %d", err);
-			return err;
-		}
-		err = ch->g->ops.gr.set_preemption_mode(ch,
-			nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode),
-			nvgpu_get_common_compute_preempt_mode(compute_preempt_mode));
-		gk20a_idle(ch->g);
-	} else {
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch,
-	struct nvgpu_get_user_syncpoint_args *args)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct gk20a *g = ch->g;
-	int err;
-
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
-		nvgpu_err(g, "user syncpoints not supported");
-		return -EINVAL;
-	}
-
-	if (!gk20a_platform_has_syncpoints(g)) {
-		nvgpu_err(g, "syncpoints not supported");
-		return -EINVAL;
-	}
-
-	if (g->aggressive_sync_destroy_thresh) {
-		nvgpu_err(g, "sufficient syncpoints not available");
-		return -EINVAL;
-	}
-
-	nvgpu_mutex_acquire(&ch->sync_lock);
-	if (ch->user_sync) {
-		nvgpu_mutex_release(&ch->sync_lock);
-	} else {
-		ch->user_sync = gk20a_channel_sync_create(ch, true);
-		if (!ch->user_sync) {
-			nvgpu_mutex_release(&ch->sync_lock);
-			return -ENOMEM;
-		}
-		nvgpu_mutex_release(&ch->sync_lock);
-
-		if (g->ops.fifo.resetup_ramfc) {
-			err = g->ops.fifo.resetup_ramfc(ch);
-			if (err)
-				return err;
-		}
-	}
-
-	args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync);
-	args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev,
-						args->syncpoint_id);
-	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS))
-		args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync);
-	else
-		args->gpu_va = 0;
-
-	return 0;
-#else
-	return -EINVAL;
-#endif
-}
-
-long gk20a_channel_ioctl(struct file *filp,
-	unsigned int cmd, unsigned long arg)
-{
-	struct channel_priv *priv = filp->private_data;
-	struct channel_gk20a *ch = priv->c;
-	struct device *dev = dev_from_gk20a(ch->g);
-	u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
-	int err = 0;
-	struct gk20a *g = ch->g;
-
-	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
-		(_IOC_NR(cmd) == 0) ||
-		(_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
-		(_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	/* take a ref or return timeout if channel refs can't be taken */
-	ch = gk20a_channel_get(ch);
-	if (!ch)
-		return -ETIMEDOUT;
-
-	/* protect our sanity for threaded userspace - most of the channel is
-	 * not thread safe */
-	nvgpu_mutex_acquire(&ch->ioctl_lock);
-
-	/* this ioctl call keeps a ref to the file which keeps a ref to the
-	 * channel */
-
-	switch (cmd) {
-	case NVGPU_IOCTL_CHANNEL_OPEN:
-		err = gk20a_channel_open_ioctl(ch->g,
-			(struct nvgpu_channel_open_args *)buf);
-		break;
-	case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
-		break;
-	case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
-	{
-		struct nvgpu_alloc_obj_ctx_args *args =
-				(struct nvgpu_alloc_obj_ctx_args *)buf;
-
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags);
-		gk20a_idle(ch->g);
-		break;
-	}
-	case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
-	{
-		struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args =
-			(struct nvgpu_alloc_gpfifo_ex_args *)buf;
-		struct nvgpu_gpfifo_args gpfifo_args;
-
-		nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args);
-
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-
-		if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
-			err = -EINVAL;
-			gk20a_idle(ch->g);
-			break;
-		}
-		err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
-		gk20a_idle(ch->g);
-		break;
-	}
-	case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
-	{
-		struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
-			(struct nvgpu_alloc_gpfifo_args *)buf;
-		struct nvgpu_gpfifo_args gpfifo_args;
-
-		nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args);
-
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-
-		err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
-		gk20a_idle(ch->g);
-		break;
-	}
-	case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
-		err = gk20a_ioctl_channel_submit_gpfifo(ch,
-				(struct nvgpu_submit_gpfifo_args *)buf);
-		break;
-	case NVGPU_IOCTL_CHANNEL_WAIT:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-
-		/* waiting is thread-safe, not dropping this mutex could
-		 * deadlock in certain conditions */
-		nvgpu_mutex_release(&ch->ioctl_lock);
-
-		err = gk20a_channel_wait(ch,
-				(struct nvgpu_wait_args *)buf);
-
-		nvgpu_mutex_acquire(&ch->ioctl_lock);
-
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = gk20a_channel_zcull_bind(ch,
-				(struct nvgpu_zcull_bind_args *)buf);
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = gk20a_init_error_notifier(ch,
-				(struct nvgpu_set_error_notifier *)buf);
-		gk20a_idle(ch->g);
-		break;
-#ifdef CONFIG_GK20A_CYCLE_STATS
-	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = gk20a_channel_cycle_stats(ch,
-				(struct nvgpu_cycle_stats_args *)buf);
-		gk20a_idle(ch->g);
-		break;
-#endif
-	case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
-	{
-		u32 timeout =
-			(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
-		nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
-			   timeout, ch->chid);
-		ch->timeout_ms_max = timeout;
-		gk20a_channel_trace_sched_param(
-			trace_gk20a_channel_set_timeout, ch);
-		break;
-	}
-	case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
-	{
-		u32 timeout =
-			(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
-		bool timeout_debug_dump = !((u32)
-			((struct nvgpu_set_timeout_ex_args *)buf)->flags &
-			(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
-		nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
-			   timeout, ch->chid);
-		ch->timeout_ms_max = timeout;
-		ch->timeout_debug_dump = timeout_debug_dump;
-		gk20a_channel_trace_sched_param(
-			trace_gk20a_channel_set_timeout, ch);
-		break;
-	}
-	case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
-		((struct nvgpu_get_param_args *)buf)->value =
-			ch->has_timedout;
-		break;
-	case NVGPU_IOCTL_CHANNEL_ENABLE:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		if (ch->g->ops.fifo.enable_channel)
-			ch->g->ops.fifo.enable_channel(ch);
-		else
-			err = -ENOSYS;
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_DISABLE:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		if (ch->g->ops.fifo.disable_channel)
-			ch->g->ops.fifo.disable_channel(ch);
-		else
-			err = -ENOSYS;
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_PREEMPT:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = gk20a_fifo_preempt(ch->g, ch);
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
-		if (!capable(CAP_SYS_NICE)) {
-			err = -EPERM;
-			break;
-		}
-		if (!ch->g->ops.fifo.reschedule_runlist) {
-			err = -ENOSYS;
-			break;
-		}
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = ch->g->ops.fifo.reschedule_runlist(ch,
-			NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
-			((struct nvgpu_reschedule_runlist_args *)buf)->flags);
-		gk20a_idle(ch->g);
-		break;
-	case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = ch->g->ops.fifo.force_reset_ch(ch,
-				NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
-		gk20a_idle(ch->g);
-		break;
-#ifdef CONFIG_GK20A_CYCLE_STATS
-	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = gk20a_channel_cycle_stats_snapshot(ch,
-				(struct nvgpu_cycle_stats_snapshot_args *)buf);
-		gk20a_idle(ch->g);
-		break;
-#endif
-	case NVGPU_IOCTL_CHANNEL_WDT:
-		err = gk20a_channel_set_wdt_status(ch,
-				(struct nvgpu_channel_wdt_args *)buf);
-		break;
-	case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
-		err = nvgpu_ioctl_channel_set_preemption_mode(ch,
-		     ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
-		     ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
-		break;
-	case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
-		if (ch->g->ops.gr.set_boosted_ctx) {
-			bool boost =
-				((struct nvgpu_boosted_ctx_args *)buf)->boost;
-
-			err = gk20a_busy(ch->g);
-			if (err) {
-				dev_err(dev,
-					"%s: failed to host gk20a for ioctl cmd: 0x%x",
-					__func__, cmd);
-				break;
-			}
-			err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
-			gk20a_idle(ch->g);
-		} else {
-			err = -EINVAL;
-		}
-		break;
-	case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT:
-		err = gk20a_busy(ch->g);
-		if (err) {
-			dev_err(dev,
-				"%s: failed to host gk20a for ioctl cmd: 0x%x",
-				__func__, cmd);
-			break;
-		}
-		err = nvgpu_ioctl_channel_get_user_syncpoint(ch,
-		      (struct nvgpu_get_user_syncpoint_args *)buf);
-		gk20a_idle(ch->g);
-		break;
-	default:
-		dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
-		err = -ENOTTY;
-		break;
-	}
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
-
-	nvgpu_mutex_release(&ch->ioctl_lock);
-
-	gk20a_channel_put(ch);
-
-	nvgpu_log_fn(g, "end");
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
deleted file mode 100644
index 48cff1ea..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef __NVGPU_IOCTL_CHANNEL_H__
-#define __NVGPU_IOCTL_CHANNEL_H__
-
-#include <linux/fs.h>
-
-#include "gk20a/css_gr_gk20a.h"
-
-struct inode;
-struct file;
-struct gk20a;
-struct nvgpu_channel_open_args;
-
-struct gk20a_cs_snapshot_client_linux {
-	struct gk20a_cs_snapshot_client cs_client;
-
-	u32			dmabuf_fd;
-	struct dma_buf		*dma_handler;
-};
-
-int gk20a_channel_open(struct inode *inode, struct file *filp);
-int gk20a_channel_release(struct inode *inode, struct file *filp);
-long gk20a_channel_ioctl(struct file *filp,
-	unsigned int cmd, unsigned long arg);
-int gk20a_channel_open_ioctl(struct gk20a *g,
-		struct nvgpu_channel_open_args *args);
-
-int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
-void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
-
-extern const struct file_operations gk20a_channel_ops;
-
-u32 nvgpu_get_common_runlist_level(u32 level);
-
-u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
-u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
-u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
-u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
deleted file mode 100644
index a7c6a607..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/cdev.h>
-#include <linux/file.h>
-#include <linux/anon_inodes.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/bitops.h>
-#include <nvgpu/lock.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/kref.h>
-#include <nvgpu/log.h>
-#include <nvgpu/barrier.h>
-#include <nvgpu/cond.h>
-#include <nvgpu/list.h>
-#include <nvgpu/clk_arb.h>
-
-#include "gk20a/gk20a.h"
-#include "clk/clk.h"
-#include "pstate/pstate.h"
-#include "lpwr/lpwr.h"
-#include "volt/volt.h"
-
-#ifdef CONFIG_DEBUG_FS
-#include "common/linux/os_linux.h"
-#endif
-
-static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
-		struct file *filp)
-{
-	struct nvgpu_clk_dev *dev = filp->private_data;
-	struct nvgpu_clk_session *session = dev->session;
-
-
-	clk_arb_dbg(session->g, " ");
-
-	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
-	return 0;
-}
-
-static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
-{
-	unsigned int poll_mask = 0;
-
-	if (nvgpu_poll_mask & NVGPU_POLLIN)
-		poll_mask |= POLLIN;
-	if (nvgpu_poll_mask & NVGPU_POLLPRI)
-		poll_mask |= POLLPRI;
-	if (nvgpu_poll_mask & NVGPU_POLLOUT)
-		poll_mask |= POLLOUT;
-	if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
-		poll_mask |= POLLRDNORM;
-	if (nvgpu_poll_mask & NVGPU_POLLHUP)
-		poll_mask |= POLLHUP;
-
-	return poll_mask;
-}
-
-static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
-{
-	struct nvgpu_clk_dev *dev = filp->private_data;
-
-	clk_arb_dbg(dev->session->g, " ");
-
-	poll_wait(filp, &dev->readout_wq.wq, wait);
-	return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
-}
-
-void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
-{
-	nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
-}
-
-static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
-		struct file *filp)
-{
-	struct nvgpu_clk_dev *dev = filp->private_data;
-	struct nvgpu_clk_session *session = dev->session;
-	struct nvgpu_clk_arb *arb;
-
-	arb = session->g->clk_arb;
-
-	clk_arb_dbg(session->g, " ");
-
-	if (arb) {
-		nvgpu_spinlock_acquire(&arb->users_lock);
-		nvgpu_list_del(&dev->link);
-		nvgpu_spinlock_release(&arb->users_lock);
-		nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
-	}
-
-	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
-
-	return 0;
-}
-
-static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
-{
-	u32 nvgpu_gpu_event;
-
-	switch (nvgpu_event) {
-	case NVGPU_EVENT_VF_UPDATE:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
-		break;
-	case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
-		break;
-	case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
-		break;
-	case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
-		break;
-	case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
-		break;
-	case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
-		break;
-	case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
-		break;
-	case NVGPU_EVENT_ALARM_GPU_LOST:
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
-		break;
-		default:
-		/* Control shouldn't come here */
-		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
-		break;
-	}
-	return nvgpu_gpu_event;
-}
-
-static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
-		struct nvgpu_gpu_event_info *info) {
-
-	u32 tail, head;
-	u32 events = 0;
-	struct nvgpu_clk_notification *p_notif;
-
-	tail = nvgpu_atomic_read(&dev->queue.tail);
-	head = nvgpu_atomic_read(&dev->queue.head);
-
-	head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
-
-	if (_WRAPGTEQ(tail, head) && info) {
-		head++;
-		p_notif = &dev->queue.notifications[head % dev->queue.size];
-		events |= nvgpu_convert_gpu_event(p_notif->notification);
-		info->event_id = ffs(events) - 1;
-		info->timestamp = p_notif->timestamp;
-		nvgpu_atomic_set(&dev->queue.head, head);
-	}
-
-	return events;
-}
-
-static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
-					size_t size, loff_t *off)
-{
-	struct nvgpu_clk_dev *dev = filp->private_data;
-	struct nvgpu_gpu_event_info info;
-	ssize_t err;
-
-	clk_arb_dbg(dev->session->g,
-			"filp=%p, buf=%p, size=%zu", filp, buf, size);
-
-	if ((size - *off) < sizeof(info))
-		return 0;
-
-	memset(&info, 0, sizeof(info));
-	/* Get the oldest event from the queue */
-	while (!__pending_event(dev, &info)) {
-		if (filp->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
-				__pending_event(dev, &info), 0);
-		if (err)
-			return err;
-		if (info.timestamp)
-			break;
-	}
-
-	if (copy_to_user(buf + *off, &info, sizeof(info)))
-		return -EFAULT;
-
-	return sizeof(info);
-}
-
-static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
-		struct nvgpu_gpu_set_event_filter_args *args)
-{
-	struct gk20a *g = dev->session->g;
-	u32 mask;
-
-	nvgpu_log(g, gpu_dbg_fn, " ");
-
-	if (args->flags)
-		return -EINVAL;
-
-	if (args->size != 1)
-		return -EINVAL;
-
-	if (copy_from_user(&mask, (void __user *) args->buffer,
-			args->size * sizeof(u32)))
-		return -EFAULT;
-
-	/* update alarm mask */
-	nvgpu_atomic_set(&dev->enabled_mask, mask);
-
-	return 0;
-}
-
-static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
-		unsigned long arg)
-{
-	struct nvgpu_clk_dev *dev = filp->private_data;
-	struct gk20a *g = dev->session->g;
-	u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
-		|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
-		return -EINVAL;
-
-	BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	switch (cmd) {
-	case NVGPU_EVENT_IOCTL_SET_FILTER:
-		err = nvgpu_clk_arb_set_event_filter(dev,
-				(struct nvgpu_gpu_set_event_filter_args *)buf);
-		break;
-	default:
-		nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
-		err = -ENOTTY;
-	}
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
-
-	return err;
-}
-
-static const struct file_operations completion_dev_ops = {
-	.owner = THIS_MODULE,
-	.release = nvgpu_clk_arb_release_completion_dev,
-	.poll = nvgpu_clk_arb_poll_dev,
-};
-
-static const struct file_operations event_dev_ops = {
-	.owner = THIS_MODULE,
-	.release = nvgpu_clk_arb_release_event_dev,
-	.poll = nvgpu_clk_arb_poll_dev,
-	.read = nvgpu_clk_arb_read_event_dev,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
-#endif
-	.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
-};
-
-static int nvgpu_clk_arb_install_fd(struct gk20a *g,
-		struct nvgpu_clk_session *session,
-		const struct file_operations *fops,
-		struct nvgpu_clk_dev **_dev)
-{
-	struct file *file;
-	int fd;
-	int err;
-	int status;
-	char name[64];
-	struct nvgpu_clk_dev *dev;
-
-	clk_arb_dbg(g, " ");
-
-	dev = nvgpu_kzalloc(g, sizeof(*dev));
-	if (!dev)
-		return -ENOMEM;
-
-	status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
-		DEFAULT_EVENT_NUMBER);
-	if (status < 0)  {
-		err = status;
-		goto fail;
-	}
-
-	fd = get_unused_fd_flags(O_RDWR);
-	if (fd < 0) {
-		err = fd;
-		goto fail;
-	}
-
-	snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
-	file = anon_inode_getfile(name, fops, dev, O_RDWR);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto fail_fd;
-	}
-
-	fd_install(fd, file);
-
-	nvgpu_cond_init(&dev->readout_wq);
-
-	nvgpu_atomic_set(&dev->poll_mask, 0);
-
-	dev->session = session;
-	nvgpu_ref_init(&dev->refcount);
-
-	nvgpu_ref_get(&session->refcount);
-
-	*_dev = dev;
-
-	return fd;
-
-fail_fd:
-	put_unused_fd(fd);
-fail:
-	nvgpu_kfree(g, dev);
-
-	return err;
-}
-
-int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
-	struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
-{
-	struct nvgpu_clk_arb *arb = g->clk_arb;
-	struct nvgpu_clk_dev *dev;
-	int fd;
-
-	clk_arb_dbg(g, " ");
-
-	fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
-	if (fd < 0)
-		return fd;
-
-	/* TODO: alarm mask needs to be set to default value to prevent
-	 * failures of legacy tests. This will be removed when sanity is
-	 * updated
-	 */
-	if (alarm_mask)
-		nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
-	else
-		nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
-
-	dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
-
-	nvgpu_spinlock_acquire(&arb->users_lock);
-	nvgpu_list_add_tail(&dev->link, &arb->users);
-	nvgpu_spinlock_release(&arb->users_lock);
-
-	*event_fd = fd;
-
-	return 0;
-}
-
-int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
-	struct nvgpu_clk_session *session, int *request_fd)
-{
-	struct nvgpu_clk_dev *dev;
-	int fd;
-
-	clk_arb_dbg(g, " ");
-
-	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
-	if (fd < 0)
-		return fd;
-
-	*request_fd = fd;
-
-	return 0;
-}
-
-int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
-	struct nvgpu_clk_session *session, int request_fd)
-{
-	struct nvgpu_clk_arb *arb = g->clk_arb;
-	struct nvgpu_clk_dev *dev;
-	struct fd fd;
-	int err = 0;
-
-	clk_arb_dbg(g, " ");
-
-	fd  = fdget(request_fd);
-	if (!fd.file)
-		return -EINVAL;
-
-	if (fd.file->f_op != &completion_dev_ops) {
-		err = -EINVAL;
-		goto fdput_fd;
-	}
-
-	dev = (struct nvgpu_clk_dev *) fd.file->private_data;
-
-	if (!dev || dev->session != session) {
-		err = -EINVAL;
-		goto fdput_fd;
-	}
-	nvgpu_ref_get(&dev->refcount);
-	nvgpu_spinlock_acquire(&session->session_lock);
-	nvgpu_list_add(&dev->node, &session->targets);
-	nvgpu_spinlock_release(&session->session_lock);
-	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
-
-fdput_fd:
-	fdput(fd);
-	return err;
-}
-
-int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
-		int request_fd, u32 api_domain, u16 target_mhz)
-{
-	struct nvgpu_clk_dev *dev;
-	struct fd fd;
-	int err = 0;
-
-	clk_arb_dbg(session->g,
-			"domain=0x%08x target_mhz=%u", api_domain, target_mhz);
-
-	fd = fdget(request_fd);
-	if (!fd.file)
-		return -EINVAL;
-
-	if (fd.file->f_op != &completion_dev_ops) {
-		err = -EINVAL;
-		goto fdput_fd;
-	}
-
-	dev = fd.file->private_data;
-	if (!dev || dev->session != session) {
-		err = -EINVAL;
-		goto fdput_fd;
-	}
-
-	switch (api_domain) {
-	case NVGPU_CLK_DOMAIN_MCLK:
-		dev->mclk_target_mhz = target_mhz;
-		break;
-
-	case NVGPU_CLK_DOMAIN_GPCCLK:
-		dev->gpc2clk_target_mhz = target_mhz * 2ULL;
-		break;
-
-	default:
-		err = -EINVAL;
-	}
-
-fdput_fd:
-	fdput(fd);
-	return err;
-}
-
-u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
-{
-	u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
-	u32 api_domains = 0;
-
-	if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
-		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
-
-	if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
-		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
-
-	return api_domains;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	struct nvgpu_clk_arb *arb = g->clk_arb;
-	struct nvgpu_clk_arb_debug *debug;
-
-	u64 num;
-	s64 tmp, avg, std, max, min;
-
-	debug = NV_ACCESS_ONCE(arb->debug);
-	/* Make copy of structure and ensure no reordering */
-	nvgpu_smp_rmb();
-	if (!debug)
-		return -EINVAL;
-
-	std = debug->switch_std;
-	avg = debug->switch_avg;
-	max = debug->switch_max;
-	min = debug->switch_min;
-	num = debug->switch_num;
-
-	tmp = std;
-	do_div(tmp, num);
-	seq_printf(s, "Number of transitions: %lld\n",
-		num);
-	seq_printf(s, "max / min : %lld / %lld usec\n",
-		max, min);
-	seq_printf(s, "avg / std : %lld / %ld usec\n",
-		avg, int_sqrt(tmp));
-
-	return 0;
-}
-
-static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
-}
-
-static const struct file_operations nvgpu_clk_arb_stats_fops = {
-	.open		= nvgpu_clk_arb_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
-int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct dentry *gpu_root = l->debugfs;
-	struct dentry *d;
-
-	nvgpu_log(g, gpu_dbg_info, "g=%p", g);
-
-	d = debugfs_create_file(
-			"arb_stats",
-			S_IRUGO,
-			gpu_root,
-			g,
-			&nvgpu_clk_arb_stats_fops);
-	if (!d)
-		return -ENOMEM;
-
-	return 0;
-}
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
deleted file mode 100644
index 73a8131d..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ /dev/null
@@ -1,1962 +0,0 @@
-/*
- * Copyright (c) 2011-2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/uaccess.h>
-#include <linux/cdev.h>
-#include <linux/file.h>
-#include <linux/anon_inodes.h>
-#include <linux/fs.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/bitops.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/ptimer.h>
-#include <nvgpu/vidmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/sizes.h>
-
-#include <nvgpu/linux/vidmem.h>
-
-#include "ioctl_ctrl.h"
-#include "ioctl_dbg.h"
-#include "ioctl_as.h"
-#include "ioctl_tsg.h"
-#include "ioctl_channel.h"
-#include "gk20a/gk20a.h"
-#include "gk20a/fence_gk20a.h"
-
-#include "platform_gk20a.h"
-#include "os_linux.h"
-#include "dmabuf.h"
-#include "channel.h"
-
-#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \
-	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
-#define MHZ_TO_HZ(a) ((u64)a * MHZ)
-
-struct gk20a_ctrl_priv {
-	struct device *dev;
-	struct gk20a *g;
-	struct nvgpu_clk_session *clk_session;
-};
-
-static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags)
-{
-	u32 core_flags = 0;
-
-	if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED)
-		core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED;
-
-	return core_flags;
-}
-
-int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l;
-	struct gk20a *g;
-	struct gk20a_ctrl_priv *priv;
-	int err = 0;
-
-	l = container_of(inode->i_cdev,
-			 struct nvgpu_os_linux, ctrl.cdev);
-	g = gk20a_get(&l->g);
-	if (!g)
-		return -ENODEV;
-
-	nvgpu_log_fn(g, " ");
-
-	priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv));
-	if (!priv) {
-		err = -ENOMEM;
-		goto free_ref;
-	}
-	filp->private_data = priv;
-	priv->dev = dev_from_gk20a(g);
-	/*
-	 * We dont close the arbiter fd's after driver teardown to support
-	 * GPU_LOST events, so we store g here, instead of dereferencing the
-	 * dev structure on teardown
-	 */
-	priv->g = g;
-
-	if (!g->sw_ready) {
-		err = gk20a_busy(g);
-		if (err)
-			goto free_ref;
-		gk20a_idle(g);
-	}
-
-	err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
-free_ref:
-	if (err)
-		gk20a_put(g);
-	return err;
-}
-int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
-{
-	struct gk20a_ctrl_priv *priv = filp->private_data;
-	struct gk20a *g = priv->g;
-
-	nvgpu_log_fn(g, " ");
-
-	if (priv->clk_session)
-		nvgpu_clk_arb_release_session(g, priv->clk_session);
-
-	gk20a_put(g);
-	nvgpu_kfree(g, priv);
-
-	return 0;
-}
-
-struct nvgpu_flags_mapping {
-	u64 ioctl_flag;
-	int enabled_flag;
-};
-
-static struct nvgpu_flags_mapping flags_mapping[] = {
-	{NVGPU_GPU_FLAGS_HAS_SYNCPOINTS,
-		NVGPU_HAS_SYNCPOINTS},
-	{NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS,
-		NVGPU_SUPPORT_PARTIAL_MAPPINGS},
-	{NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS,
-		NVGPU_SUPPORT_SPARSE_ALLOCS},
-	{NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS,
-		NVGPU_SUPPORT_SYNC_FENCE_FDS},
-	{NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS,
-		NVGPU_SUPPORT_CYCLE_STATS},
-	{NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT,
-		NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT},
-	{NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS,
-		NVGPU_SUPPORT_USERSPACE_MANAGED_AS},
-	{NVGPU_GPU_FLAGS_SUPPORT_TSG,
-		NVGPU_SUPPORT_TSG},
-	{NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS,
-		NVGPU_SUPPORT_CLOCK_CONTROLS},
-	{NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE,
-		NVGPU_SUPPORT_GET_VOLTAGE},
-	{NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT,
-		NVGPU_SUPPORT_GET_CURRENT},
-	{NVGPU_GPU_FLAGS_SUPPORT_GET_POWER,
-		NVGPU_SUPPORT_GET_POWER},
-	{NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE,
-		NVGPU_SUPPORT_GET_TEMPERATURE},
-	{NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT,
-		NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT},
-	{NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS,
-		NVGPU_SUPPORT_DEVICE_EVENTS},
-	{NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE,
-		NVGPU_SUPPORT_FECS_CTXSW_TRACE},
-	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
-		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
-	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
-		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
-	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
-		NVGPU_SUPPORT_DETERMINISTIC_OPTS},
-	{NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS,
-		NVGPU_SUPPORT_SYNCPOINT_ADDRESS},
-	{NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT,
-		NVGPU_SUPPORT_USER_SYNCPOINT},
-	{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
-		NVGPU_SUPPORT_IO_COHERENCE},
-	{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
-		NVGPU_SUPPORT_RESCHEDULE_RUNLIST},
-	{NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL,
-		NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL},
-	{NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF,
-		NVGPU_ECC_ENABLED_SM_LRF},
-	{NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM,
-		NVGPU_ECC_ENABLED_SM_SHM},
-	{NVGPU_GPU_FLAGS_ECC_ENABLED_TEX,
-		NVGPU_ECC_ENABLED_TEX},
-	{NVGPU_GPU_FLAGS_ECC_ENABLED_LTC,
-		NVGPU_ECC_ENABLED_LTC},
-	{NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS,
-		NVGPU_SUPPORT_TSG_SUBCONTEXTS},
-	{NVGPU_GPU_FLAGS_SUPPORT_SCG,
-		NVGPU_SUPPORT_SCG},
-	{NVGPU_GPU_FLAGS_SUPPORT_VPR,
-		NVGPU_SUPPORT_VPR},
-};
-
-static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
-{
-	unsigned int i;
-	u64 ioctl_flags = 0;
-
-	for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) {
-		if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag))
-			ioctl_flags |= flags_mapping[i].ioctl_flag;
-	}
-
-	return ioctl_flags;
-}
-
-static void nvgpu_set_preemption_mode_flags(struct gk20a *g,
-	struct nvgpu_gpu_characteristics *gpu)
-{
-	struct nvgpu_preemption_modes_rec preemption_mode_rec;
-
-	g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec);
-
-	gpu->graphics_preemption_mode_flags =
-		nvgpu_get_ioctl_graphics_preempt_mode_flags(
-			preemption_mode_rec.graphics_preemption_mode_flags);
-	gpu->compute_preemption_mode_flags =
-		nvgpu_get_ioctl_compute_preempt_mode_flags(
-			preemption_mode_rec.compute_preemption_mode_flags);
-
-	gpu->default_graphics_preempt_mode =
-		nvgpu_get_ioctl_graphics_preempt_mode(
-			preemption_mode_rec.default_graphics_preempt_mode);
-	gpu->default_compute_preempt_mode =
-		nvgpu_get_ioctl_compute_preempt_mode(
-			preemption_mode_rec.default_compute_preempt_mode);
-}
-
-static long
-gk20a_ctrl_ioctl_gpu_characteristics(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_characteristics *request)
-{
-	struct nvgpu_gpu_characteristics gpu;
-	long err = 0;
-
-	if (gk20a_busy(g)) {
-		nvgpu_err(g, "failed to power on gpu");
-		return -EINVAL;
-	}
-
-	memset(&gpu, 0, sizeof(gpu));
-
-	gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
-	gpu.on_board_video_memory_size = 0; /* integrated GPU */
-
-	gpu.num_gpc = g->gr.gpc_count;
-	gpu.max_gpc_count = g->gr.max_gpc_count;
-
-	gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
-
-	gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
-
-	gpu.compression_page_size = g->ops.fb.compression_page_size(g);
-
-	gpu.gpc_mask = (1 << g->gr.gpc_count)-1;
-
-	gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
-
-	gpu.arch = g->params.gpu_arch;
-	gpu.impl = g->params.gpu_impl;
-	gpu.rev = g->params.gpu_rev;
-	gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT;
-	gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ?
-		NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0;
-	gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS);
-	gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS);
-	gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS);
-	gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS);
-	gpu.inline_to_memory_class =
-		g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS);
-	gpu.dma_copy_class =
-		g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
-
-	gpu.vbios_version = g->bios.vbios_version;
-	gpu.vbios_oem_version = g->bios.vbios_oem_version;
-
-	gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
-	gpu.pde_coverage_bit_count =
-		g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
-	gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
-
-	gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
-	gpu.sm_arch_spa_version = g->params.sm_arch_spa_version;
-	gpu.sm_arch_warp_count = g->params.sm_arch_warp_count;
-
-	gpu.max_css_buffer_size = g->gr.max_css_buffer_size;
-
-	gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
-	gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
-	gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
-	gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
-	gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
-	gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST;
-	gpu.gpu_va_bit_count = 40;
-
-	strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
-	gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g);
-	gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
-	gpu.max_ltc_per_fbp =  g->ops.gr.get_max_ltc_per_fbp(g);
-	gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
-	gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
-	gpu.gr_gobs_per_comptagline_per_slice =
-		g->gr.gobs_per_comptagline_per_slice;
-	gpu.num_ltc = g->ltc_count;
-	gpu.lts_per_ltc = g->gr.slices_per_ltc;
-	gpu.cbc_cache_line_size = g->gr.cacheline_size;
-	gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline;
-
-	if (g->ops.clk.get_maxrate)
-		gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
-
-	gpu.local_video_memory_size = g->mm.vidmem.size;
-
-	gpu.pci_vendor_id = g->pci_vendor_id;
-	gpu.pci_device_id = g->pci_device_id;
-	gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id;
-	gpu.pci_subsystem_device_id = g->pci_subsystem_device_id;
-	gpu.pci_class = g->pci_class;
-	gpu.pci_revision = g->pci_revision;
-
-	nvgpu_set_preemption_mode_flags(g, &gpu);
-
-	if (request->gpu_characteristics_buf_size > 0) {
-		size_t write_size = sizeof(gpu);
-
-		if (write_size > request->gpu_characteristics_buf_size)
-			write_size = request->gpu_characteristics_buf_size;
-
-		err = copy_to_user((void __user *)(uintptr_t)
-				   request->gpu_characteristics_buf_addr,
-				   &gpu, write_size);
-	}
-
-	if (err == 0)
-		request->gpu_characteristics_buf_size = sizeof(gpu);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int gk20a_ctrl_prepare_compressible_read(
-		struct gk20a *g,
-		struct nvgpu_gpu_prepare_compressible_read_args *args)
-{
-	int ret = -ENOSYS;
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct nvgpu_channel_fence fence;
-	struct gk20a_fence *fence_out = NULL;
-	int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
-		args->submit_flags);
-	int fd = -1;
-
-	fence.id = args->fence.syncpt_id;
-	fence.value = args->fence.syncpt_value;
-
-	/* Try and allocate an fd here*/
-	if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
-		&& (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) {
-			fd = get_unused_fd_flags(O_RDWR);
-			if (fd < 0)
-				return fd;
-	}
-
-	ret = gk20a_prepare_compressible_read(l, args->handle,
-			args->request_compbits, args->offset,
-			args->compbits_hoffset, args->compbits_voffset,
-			args->scatterbuffer_offset,
-			args->width, args->height, args->block_height_log2,
-			submit_flags, &fence, &args->valid_compbits,
-			&args->zbc_color, &fence_out);
-
-	if (ret) {
-		if (fd != -1)
-			put_unused_fd(fd);
-		return ret;
-	}
-
-	/* Convert fence_out to something we can pass back to user space. */
-	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
-		if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
-			if (fence_out) {
-				ret = gk20a_fence_install_fd(fence_out, fd);
-				if (ret)
-					put_unused_fd(fd);
-				else
-					args->fence.fd = fd;
-			} else {
-				args->fence.fd = -1;
-				put_unused_fd(fd);
-			}
-		} else {
-			if (fence_out) {
-				args->fence.syncpt_id = fence_out->syncpt_id;
-				args->fence.syncpt_value =
-						fence_out->syncpt_value;
-			} else {
-				args->fence.syncpt_id = -1;
-				args->fence.syncpt_value = 0;
-			}
-		}
-	}
-	gk20a_fence_put(fence_out);
-#endif
-
-	return ret;
-}
-
-static int gk20a_ctrl_mark_compressible_write(
-		struct gk20a *g,
-		struct nvgpu_gpu_mark_compressible_write_args *args)
-{
-	int ret = -ENOSYS;
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	ret = gk20a_mark_compressible_write(g, args->handle,
-			args->valid_compbits, args->offset, args->zbc_color);
-#endif
-
-	return ret;
-}
-
-static int gk20a_ctrl_alloc_as(
-		struct gk20a *g,
-		struct nvgpu_alloc_as_args *args)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_as_share *as_share;
-	int err;
-	int fd;
-	struct file *file;
-	char name[64];
-
-	err = get_unused_fd_flags(O_RDWR);
-	if (err < 0)
-		return err;
-	fd = err;
-
-	snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd);
-
-	file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto clean_up;
-	}
-
-	err = gk20a_as_alloc_share(g, args->big_page_size,
-				   gk20a_as_translate_as_alloc_flags(g,
-					   args->flags),
-				   &as_share);
-	if (err)
-		goto clean_up_file;
-
-	fd_install(fd, file);
-	file->private_data = as_share;
-
-	args->as_fd = fd;
-	return 0;
-
-clean_up_file:
-	fput(file);
-clean_up:
-	put_unused_fd(fd);
-	return err;
-}
-
-static int gk20a_ctrl_open_tsg(struct gk20a *g,
-			       struct nvgpu_gpu_open_tsg_args *args)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int err;
-	int fd;
-	struct file *file;
-	char name[64];
-
-	err = get_unused_fd_flags(O_RDWR);
-	if (err < 0)
-		return err;
-	fd = err;
-
-	snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd);
-
-	file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto clean_up;
-	}
-
-	err = nvgpu_ioctl_tsg_open(g, file);
-	if (err)
-		goto clean_up_file;
-
-	fd_install(fd, file);
-	args->tsg_fd = fd;
-	return 0;
-
-clean_up_file:
-	fput(file);
-clean_up:
-	put_unused_fd(fd);
-	return err;
-}
-
-static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
-				    struct nvgpu_gpu_get_tpc_masks_args *args)
-{
-	struct gr_gk20a *gr = &g->gr;
-	int err = 0;
-	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
-
-	if (args->mask_buf_size > 0) {
-		size_t write_size = gpc_tpc_mask_size;
-
-		if (write_size > args->mask_buf_size)
-			write_size = args->mask_buf_size;
-
-		err = copy_to_user((void __user *)(uintptr_t)
-				   args->mask_buf_addr,
-				   gr->gpc_tpc_mask, write_size);
-	}
-
-	if (err == 0)
-		args->mask_buf_size = gpc_tpc_mask_size;
-
-	return err;
-}
-
-static int gk20a_ctrl_get_fbp_l2_masks(
-	struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
-{
-	struct gr_gk20a *gr = &g->gr;
-	int err = 0;
-	const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
-
-	if (args->mask_buf_size > 0) {
-		size_t write_size = fbp_l2_mask_size;
-
-		if (write_size > args->mask_buf_size)
-			write_size = args->mask_buf_size;
-
-		err = copy_to_user((void __user *)(uintptr_t)
-				   args->mask_buf_addr,
-				   gr->fbp_rop_l2_en_mask, write_size);
-	}
-
-	if (err == 0)
-		args->mask_buf_size = fbp_l2_mask_size;
-
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
-		struct nvgpu_gpu_l2_fb_args *args)
-{
-	int err = 0;
-
-	if ((!args->l2_flush && !args->fb_flush) ||
-	    (!args->l2_flush && args->l2_invalidate))
-		return -EINVAL;
-
-	if (args->l2_flush)
-		g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
-
-	if (args->fb_flush)
-		g->ops.mm.fb_flush(g);
-
-	return err;
-}
-
-/* Invalidate i-cache for kepler & maxwell */
-static int nvgpu_gpu_ioctl_inval_icache(
-		struct gk20a *g,
-		struct nvgpu_gpu_inval_icache_args *args)
-{
-	struct channel_gk20a *ch;
-	int err;
-
-	ch = gk20a_get_channel_from_file(args->channel_fd);
-	if (!ch)
-		return -EINVAL;
-
-	/* Take the global lock, since we'll be doing global regops */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = g->ops.gr.inval_icache(g, ch);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_channel_put(ch);
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
-		struct gk20a *g,
-		struct nvgpu_gpu_mmu_debug_mode_args *args)
-{
-	if (gk20a_busy(g)) {
-		nvgpu_err(g, "failed to power on gpu");
-		return -EINVAL;
-	}
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	g->ops.fb.set_debug_mode(g, args->state == 1);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_idle(g);
-	return 0;
-}
-
-static int nvgpu_gpu_ioctl_set_debug_mode(
-		struct gk20a *g,
-		struct nvgpu_gpu_sm_debug_mode_args *args)
-{
-	struct channel_gk20a *ch;
-	int err;
-
-	ch = gk20a_get_channel_from_file(args->channel_fd);
-	if (!ch)
-		return -EINVAL;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	if (g->ops.gr.set_sm_debug_mode)
-		err = g->ops.gr.set_sm_debug_mode(g, ch,
-				args->sms, !!args->enable);
-	else
-		err = -ENOSYS;
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_channel_put(ch);
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
-{
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-	    return err;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = g->ops.gr.trigger_suspend(g);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
-		struct nvgpu_gpu_wait_pause_args *args)
-{
-	int err;
-	struct warpstate *ioctl_w_state;
-	struct nvgpu_warpstate *w_state = NULL;
-	u32 sm_count, ioctl_size, size, sm_id;
-
-	sm_count = g->gr.gpc_count * g->gr.tpc_count;
-
-	ioctl_size = sm_count * sizeof(struct warpstate);
-	ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
-	if (!ioctl_w_state)
-		return -ENOMEM;
-
-	size = sm_count * sizeof(struct nvgpu_warpstate);
-	w_state = nvgpu_kzalloc(g, size);
-	if (!w_state) {
-		err = -ENOMEM;
-		goto out_free;
-	}
-
-	err = gk20a_busy(g);
-	if (err)
-		goto out_free;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	g->ops.gr.wait_for_pause(g, w_state);
-
-	for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
-		ioctl_w_state[sm_id].valid_warps[0] =
-			w_state[sm_id].valid_warps[0];
-		ioctl_w_state[sm_id].valid_warps[1] =
-			w_state[sm_id].valid_warps[1];
-		ioctl_w_state[sm_id].trapped_warps[0] =
-			w_state[sm_id].trapped_warps[0];
-		ioctl_w_state[sm_id].trapped_warps[1] =
-			w_state[sm_id].trapped_warps[1];
-		ioctl_w_state[sm_id].paused_warps[0] =
-			w_state[sm_id].paused_warps[0];
-		ioctl_w_state[sm_id].paused_warps[1] =
-			w_state[sm_id].paused_warps[1];
-	}
-	/* Copy to user space - pointed by "args->pwarpstate" */
-	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate,
-	    w_state, ioctl_size)) {
-		nvgpu_log_fn(g, "copy_to_user failed!");
-		err = -EFAULT;
-	}
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_idle(g);
-
-out_free:
-	nvgpu_kfree(g, w_state);
-	nvgpu_kfree(g, ioctl_w_state);
-
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
-{
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-	    return err;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = g->ops.gr.resume_from_pause(g);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
-{
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = g->ops.gr.clear_sm_errors(g);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_ioctl_has_any_exception(
-		struct gk20a *g,
-		struct nvgpu_gpu_tpc_exception_en_status_args *args)
-{
-	u32 tpc_exception_en;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	args->tpc_exception_en_sm_mask = tpc_exception_en;
-
-	return 0;
-}
-
-static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
-				    struct nvgpu_gpu_num_vsms *args)
-{
-	struct gr_gk20a *gr = &g->gr;
-	args->num_vsms = gr->no_of_sm;
-	return 0;
-}
-
-static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
-				    struct nvgpu_gpu_vsms_mapping *args)
-{
-	int err = 0;
-	struct gr_gk20a *gr = &g->gr;
-	size_t write_size = gr->no_of_sm *
-				sizeof(struct nvgpu_gpu_vsms_mapping_entry);
-	struct nvgpu_gpu_vsms_mapping_entry *vsms_buf;
-	u32 i;
-
-	vsms_buf = nvgpu_kzalloc(g, write_size);
-	if (vsms_buf == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < gr->no_of_sm; i++) {
-		vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index;
-		if (g->ops.gr.get_nonpes_aware_tpc)
-			vsms_buf[i].tpc_index =
-				g->ops.gr.get_nonpes_aware_tpc(g,
-					gr->sm_to_cluster[i].gpc_index,
-					gr->sm_to_cluster[i].tpc_index);
-		else
-			vsms_buf[i].tpc_index =
-				gr->sm_to_cluster[i].tpc_index;
-	}
-
-	err = copy_to_user((void __user *)(uintptr_t)
-			   args->vsms_map_buf_addr,
-			   vsms_buf, write_size);
-	nvgpu_kfree(g, vsms_buf);
-
-	return err;
-}
-
-static int nvgpu_gpu_get_cpu_time_correlation_info(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
-{
-	struct nvgpu_cpu_time_correlation_sample *samples;
-	int err;
-	u32 i;
-
-	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT ||
-	    args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC)
-		return -EINVAL;
-
-	samples = nvgpu_kzalloc(g, args->count *
-		sizeof(struct nvgpu_cpu_time_correlation_sample));
-	if (!samples) {
-		return -ENOMEM;
-	}
-
-	err = g->ops.ptimer.get_timestamps_zipper(g,
-			args->source_id, args->count, samples);
-	if (!err) {
-		for (i = 0; i < args->count; i++) {
-			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
-			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
-		}
-	}
-
-	nvgpu_kfree(g, samples);
-
-	return err;
-}
-
-static int nvgpu_gpu_get_gpu_time(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_gpu_time_args *args)
-{
-	u64 time;
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = g->ops.ptimer.read_ptimer(g, &time);
-	if (!err)
-		args->gpu_timestamp = time;
-
-	gk20a_idle(g);
-	return err;
-}
-
-static int nvgpu_gpu_get_engine_info(
-	struct gk20a *g,
-	struct nvgpu_gpu_get_engine_info_args *args)
-{
-	int err = 0;
-	u32 engine_enum = ENGINE_INVAL_GK20A;
-	u32 report_index = 0;
-	u32 engine_id_idx;
-	const u32 max_buffer_engines = args->engine_info_buf_size /
-		sizeof(struct nvgpu_gpu_get_engine_info_item);
-	struct nvgpu_gpu_get_engine_info_item __user *dst_item_list =
-		(void __user *)(uintptr_t)args->engine_info_buf_addr;
-
-	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines;
-		++engine_id_idx) {
-		u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx];
-		const struct fifo_engine_info_gk20a *src_info =
-			&g->fifo.engine_info[active_engine_id];
-		struct nvgpu_gpu_get_engine_info_item dst_info;
-
-		memset(&dst_info, 0, sizeof(dst_info));
-
-		engine_enum = src_info->engine_enum;
-
-		switch (engine_enum) {
-		case ENGINE_GR_GK20A:
-			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR;
-			break;
-
-		case ENGINE_GRCE_GK20A:
-			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY;
-			break;
-
-		case ENGINE_ASYNC_CE_GK20A:
-			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY;
-			break;
-
-		default:
-			nvgpu_err(g, "Unmapped engine enum %u",
-				  engine_enum);
-			continue;
-		}
-
-		dst_info.engine_instance = src_info->inst_id;
-		dst_info.runlist_id = src_info->runlist_id;
-
-		if (report_index < max_buffer_engines) {
-			err = copy_to_user(&dst_item_list[report_index],
-					   &dst_info, sizeof(dst_info));
-			if (err)
-				goto clean_up;
-		}
-
-		++report_index;
-	}
-
-	args->engine_info_buf_size =
-		report_index * sizeof(struct nvgpu_gpu_get_engine_info_item);
-
-clean_up:
-	return err;
-}
-
-static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
-			struct nvgpu_gpu_alloc_vidmem_args *args)
-{
-	u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
-	int fd;
-
-	nvgpu_log_fn(g, " ");
-
-	/* not yet supported */
-	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
-		return -EINVAL;
-
-	/* not yet supported */
-	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
-		return -EINVAL;
-
-	if (args->in.size & (SZ_4K - 1))
-		return -EINVAL;
-
-	if (!args->in.size)
-		return -EINVAL;
-
-	if (align & (align - 1))
-		return -EINVAL;
-
-	if (align > roundup_pow_of_two(args->in.size)) {
-		/* log this special case, buddy allocator detail */
-		nvgpu_warn(g,
-			"alignment larger than buffer size rounded up to power of 2 is not supported");
-		return -EINVAL;
-	}
-
-	fd = nvgpu_vidmem_export_linux(g, args->in.size);
-	if (fd < 0)
-		return fd;
-
-	args->out.dmabuf_fd = fd;
-
-	nvgpu_log_fn(g, "done, fd=%d", fd);
-
-	return 0;
-}
-
-static int nvgpu_gpu_get_memory_state(struct gk20a *g,
-			struct nvgpu_gpu_get_memory_state_args *args)
-{
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved[0] || args->reserved[1] ||
-	    args->reserved[2] || args->reserved[3])
-		return -EINVAL;
-
-	err = nvgpu_vidmem_get_space(g, &args->total_free_bytes);
-
-	nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes);
-
-	return err;
-}
-
-static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain)
-{
-	u32 domain = 0;
-
-	if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK)
-		domain = NVGPU_CLK_DOMAIN_MCLK;
-	else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK)
-		domain = NVGPU_CLK_DOMAIN_GPCCLK;
-	else
-		domain = NVGPU_CLK_DOMAIN_MAX + 1;
-
-	return domain;
-}
-
-static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
-		struct gk20a_ctrl_priv *priv,
-		struct nvgpu_gpu_clk_vf_points_args *args)
-{
-	struct nvgpu_gpu_clk_vf_point clk_point;
-	struct nvgpu_gpu_clk_vf_point __user *entry;
-	struct nvgpu_clk_session *session = priv->clk_session;
-	u32 clk_domains = 0;
-	int err;
-	u16 last_mhz;
-	u16 *fpoints;
-	u32 i;
-	u32 max_points = 0;
-	u32 num_points = 0;
-	u16 min_mhz;
-	u16 max_mhz;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!session || args->flags)
-		return -EINVAL;
-
-	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
-	args->num_entries = 0;
-
-	if (!nvgpu_clk_arb_is_valid_domain(g,
-				nvgpu_gpu_convert_clk_domain(args->clk_domain)))
-		return -EINVAL;
-
-	err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
-			nvgpu_gpu_convert_clk_domain(args->clk_domain),
-			&max_points, NULL);
-	if (err)
-		return err;
-
-	if (!args->max_entries) {
-		args->max_entries = max_points;
-		return 0;
-	}
-
-	if (args->max_entries < max_points)
-		return -EINVAL;
-
-	err = nvgpu_clk_arb_get_arbiter_clk_range(g,
-			nvgpu_gpu_convert_clk_domain(args->clk_domain),
-			&min_mhz, &max_mhz);
-	if (err)
-		return err;
-
-	fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16));
-	if (!fpoints)
-		return -ENOMEM;
-
-	err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
-			nvgpu_gpu_convert_clk_domain(args->clk_domain),
-			&max_points, fpoints);
-	if (err)
-		goto fail;
-
-	entry = (struct nvgpu_gpu_clk_vf_point __user *)
-			(uintptr_t)args->clk_vf_point_entries;
-
-	last_mhz = 0;
-	num_points = 0;
-	for (i = 0; (i < max_points) && !err; i++) {
-
-		/* filter out duplicate frequencies */
-		if (fpoints[i] == last_mhz)
-			continue;
-
-		/* filter out out-of-range frequencies */
-		if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz))
-			continue;
-
-		last_mhz = fpoints[i];
-		clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]);
-
-		err = copy_to_user((void __user *)entry, &clk_point,
-				sizeof(clk_point));
-
-		num_points++;
-		entry++;
-	}
-
-	args->num_entries = num_points;
-
-fail:
-	nvgpu_kfree(g, fpoints);
-	return err;
-}
-
-static int nvgpu_gpu_clk_get_range(struct gk20a *g,
-		struct gk20a_ctrl_priv *priv,
-		struct nvgpu_gpu_clk_range_args *args)
-{
-	struct nvgpu_gpu_clk_range clk_range;
-	struct nvgpu_gpu_clk_range __user *entry;
-	struct nvgpu_clk_session *session = priv->clk_session;
-
-	u32 clk_domains = 0;
-	u32 num_domains;
-	u32 num_entries;
-	u32 i;
-	int bit;
-	int err;
-	u16 min_mhz, max_mhz;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!session)
-		return -EINVAL;
-
-	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
-	num_domains = hweight_long(clk_domains);
-
-	if (!args->flags) {
-		if (!args->num_entries) {
-			args->num_entries = num_domains;
-			return 0;
-		}
-
-		if (args->num_entries < num_domains)
-			return -EINVAL;
-
-		args->num_entries = 0;
-		num_entries = num_domains;
-
-	} else {
-		if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
-			return -EINVAL;
-
-		num_entries = args->num_entries;
-		if (num_entries > num_domains)
-			return -EINVAL;
-	}
-
-	entry = (struct nvgpu_gpu_clk_range __user *)
-			(uintptr_t)args->clk_range_entries;
-
-	for (i = 0; i < num_entries; i++, entry++) {
-
-		if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
-			if (copy_from_user(&clk_range, (void __user *)entry,
-					sizeof(clk_range)))
-				return -EFAULT;
-		} else {
-			bit = ffs(clk_domains) - 1;
-			clk_range.clk_domain = bit;
-			clk_domains &= ~BIT(bit);
-		}
-
-		clk_range.flags = 0;
-		err = nvgpu_clk_arb_get_arbiter_clk_range(g,
-				nvgpu_gpu_convert_clk_domain(clk_range.clk_domain),
-				&min_mhz, &max_mhz);
-		clk_range.min_hz = MHZ_TO_HZ(min_mhz);
-		clk_range.max_hz = MHZ_TO_HZ(max_mhz);
-
-		if (err)
-			return err;
-
-		err = copy_to_user(entry, &clk_range, sizeof(clk_range));
-		if (err)
-			return -EFAULT;
-	}
-
-	args->num_entries = num_entries;
-
-	return 0;
-}
-
-static int nvgpu_gpu_clk_set_info(struct gk20a *g,
-		struct gk20a_ctrl_priv *priv,
-		struct nvgpu_gpu_clk_set_info_args *args)
-{
-	struct nvgpu_gpu_clk_info clk_info;
-	struct nvgpu_gpu_clk_info __user *entry;
-	struct nvgpu_clk_session *session = priv->clk_session;
-
-	int fd;
-	u32 clk_domains = 0;
-	u16 freq_mhz;
-	int i;
-	int ret;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!session || args->flags)
-		return -EINVAL;
-
-	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
-	if (!clk_domains)
-		return -EINVAL;
-
-	entry = (struct nvgpu_gpu_clk_info __user *)
-			(uintptr_t)args->clk_info_entries;
-
-	for (i = 0; i < args->num_entries; i++, entry++) {
-
-		if (copy_from_user(&clk_info, entry, sizeof(clk_info)))
-			return -EFAULT;
-
-		if (!nvgpu_clk_arb_is_valid_domain(g,
-					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
-			return -EINVAL;
-	}
-
-	entry = (struct nvgpu_gpu_clk_info __user *)
-			(uintptr_t)args->clk_info_entries;
-
-	ret = nvgpu_clk_arb_install_request_fd(g, session, &fd);
-	if (ret < 0)
-		return ret;
-
-	for (i = 0; i < args->num_entries; i++, entry++) {
-
-		if (copy_from_user(&clk_info, (void __user *)entry,
-				sizeof(clk_info)))
-			return -EFAULT;
-		freq_mhz = HZ_TO_MHZ(clk_info.freq_hz);
-
-		nvgpu_clk_arb_set_session_target_mhz(session, fd,
-				nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
-	}
-
-	ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
-	if (ret < 0)
-		return ret;
-
-	args->completion_fd = fd;
-
-	return ret;
-}
-
-static int nvgpu_gpu_clk_get_info(struct gk20a *g,
-		struct gk20a_ctrl_priv *priv,
-		struct nvgpu_gpu_clk_get_info_args *args)
-{
-	struct nvgpu_gpu_clk_info clk_info;
-	struct nvgpu_gpu_clk_info __user *entry;
-	struct nvgpu_clk_session *session = priv->clk_session;
-	u32 clk_domains = 0;
-	u32 num_domains;
-	u32 num_entries;
-	u32 i;
-	u16 freq_mhz;
-	int err;
-	int bit;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!session)
-		return -EINVAL;
-
-	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
-	num_domains = hweight_long(clk_domains);
-
-	if (!args->flags) {
-		if (!args->num_entries) {
-			args->num_entries = num_domains;
-			return 0;
-		}
-
-		if (args->num_entries < num_domains)
-			return -EINVAL;
-
-		args->num_entries = 0;
-		num_entries = num_domains;
-
-	} else {
-		if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
-			return -EINVAL;
-
-		num_entries = args->num_entries;
-		if (num_entries > num_domains * 3)
-			return -EINVAL;
-	}
-
-	entry = (struct nvgpu_gpu_clk_info __user *)
-			(uintptr_t)args->clk_info_entries;
-
-	for (i = 0; i < num_entries; i++, entry++) {
-
-		if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
-			if (copy_from_user(&clk_info, (void __user *)entry,
-					sizeof(clk_info)))
-				return -EFAULT;
-		} else {
-			bit = ffs(clk_domains) - 1;
-			clk_info.clk_domain = bit;
-			clk_domains &= ~BIT(bit);
-			clk_info.clk_type = args->clk_type;
-		}
-
-		switch (clk_info.clk_type) {
-		case NVGPU_GPU_CLK_TYPE_TARGET:
-			err = nvgpu_clk_arb_get_session_target_mhz(session,
-					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
-					&freq_mhz);
-			break;
-		case NVGPU_GPU_CLK_TYPE_ACTUAL:
-			err = nvgpu_clk_arb_get_arbiter_actual_mhz(g,
-					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
-					&freq_mhz);
-			break;
-		case NVGPU_GPU_CLK_TYPE_EFFECTIVE:
-			err = nvgpu_clk_arb_get_arbiter_effective_mhz(g,
-					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
-					&freq_mhz);
-			break;
-		default:
-			freq_mhz = 0;
-			err = -EINVAL;
-			break;
-		}
-		if (err)
-			return err;
-
-		clk_info.flags = 0;
-		clk_info.freq_hz = MHZ_TO_HZ(freq_mhz);
-
-		err = copy_to_user((void __user *)entry, &clk_info,
-				sizeof(clk_info));
-		if (err)
-			return -EFAULT;
-	}
-
-	args->num_entries = num_entries;
-
-	return 0;
-}
-
-static int nvgpu_gpu_get_event_fd(struct gk20a *g,
-	struct gk20a_ctrl_priv *priv,
-	struct nvgpu_gpu_get_event_fd_args *args)
-{
-	struct nvgpu_clk_session *session = priv->clk_session;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!session)
-		return -EINVAL;
-
-	return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd,
-		args->flags);
-}
-
-static int nvgpu_gpu_get_voltage(struct gk20a *g,
-		struct nvgpu_gpu_get_voltage_args *args)
-{
-	int err = -EINVAL;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved)
-		return -EINVAL;
-
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE))
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-	    return err;
-
-	switch (args->which) {
-	case NVGPU_GPU_VOLTAGE_CORE:
-		err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
-		break;
-	case NVGPU_GPU_VOLTAGE_SRAM:
-		err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage);
-		break;
-	case NVGPU_GPU_VOLTAGE_BUS:
-		err = pmgr_pwr_devices_get_voltage(g, &args->voltage);
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_get_current(struct gk20a *g,
-		struct nvgpu_gpu_get_current_args *args)
-{
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
-		return -EINVAL;
-
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT))
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = pmgr_pwr_devices_get_current(g, &args->currnt);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_get_power(struct gk20a *g,
-		struct nvgpu_gpu_get_power_args *args)
-{
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
-		return -EINVAL;
-
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER))
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = pmgr_pwr_devices_get_power(g, &args->power);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_get_temperature(struct gk20a *g,
-		struct nvgpu_gpu_get_temperature_args *args)
-{
-	int err;
-	u32 temp_f24_8;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
-		return -EINVAL;
-
-	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE))
-		return -EINVAL;
-
-	if (!g->ops.therm.get_internal_sensor_curr_temp)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8);
-
-	gk20a_idle(g);
-
-	args->temp_f24_8 = (s32)temp_f24_8;
-
-	return err;
-}
-
-static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
-		struct nvgpu_gpu_set_therm_alert_limit_args *args)
-{
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
-		return -EINVAL;
-
-	if (!g->ops.therm.configure_therm_alert)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8);
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
-		u32 flags)
-{
-	int err = 0;
-	bool allow;
-	bool disallow;
-
-	allow = flags &
-		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
-
-	disallow = flags &
-		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
-
-	/* Can't be both at the same time */
-	if (allow && disallow)
-		return -EINVAL;
-
-	/* Nothing to do */
-	if (!allow && !disallow)
-		return 0;
-
-	/*
-	 * Moving into explicit idle or back from it? A call that doesn't
-	 * change the status is a no-op.
-	 */
-	if (!ch->deterministic_railgate_allowed &&
-			allow) {
-		gk20a_idle(ch->g);
-	} else if (ch->deterministic_railgate_allowed &&
-			!allow) {
-		err = gk20a_busy(ch->g);
-		if (err) {
-			nvgpu_warn(ch->g,
-				"cannot busy to restore deterministic ch");
-			return err;
-		}
-	}
-	ch->deterministic_railgate_allowed = allow;
-
-	return err;
-}
-
-static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
-{
-	if (!ch->deterministic)
-		return -EINVAL;
-
-	return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
-}
-
-static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
-		struct nvgpu_gpu_set_deterministic_opts_args *args)
-{
-	int __user *user_channels;
-	u32 i = 0;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	user_channels = (int __user *)(uintptr_t)args->channels;
-
-	/* Upper limit; prevent holding deterministic_busy for long */
-	if (args->num_channels > g->fifo.num_channels) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Trivial sanity check first */
-	if (!access_ok(VERIFY_READ, user_channels,
-				args->num_channels * sizeof(int))) {
-		err = -EFAULT;
-		goto out;
-	}
-
-	nvgpu_rwsem_down_read(&g->deterministic_busy);
-
-	/* note: we exit at the first failure */
-	for (; i < args->num_channels; i++) {
-		int ch_fd = 0;
-		struct channel_gk20a *ch;
-
-		if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
-			/* User raced with above access_ok */
-			err = -EFAULT;
-			break;
-		}
-
-		ch = gk20a_get_channel_from_file(ch_fd);
-		if (!ch) {
-			err = -EINVAL;
-			break;
-		}
-
-		err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
-
-		gk20a_channel_put(ch);
-
-		if (err)
-			break;
-	}
-
-	nvgpu_rwsem_up_read(&g->deterministic_busy);
-
-out:
-	args->num_channels = i;
-	return err;
-}
-
-static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
-		struct nvgpu_gpu_read_single_sm_error_state_args *args)
-{
-	struct gr_gk20a *gr = &g->gr;
-	struct nvgpu_gr_sm_error_state *sm_error_state;
-	struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
-	u32 sm_id;
-	int err = 0;
-
-	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	sm_error_state = gr->sm_error_states + sm_id;
-	sm_error_state_record.global_esr =
-		sm_error_state->hww_global_esr;
-	sm_error_state_record.warp_esr =
-		sm_error_state->hww_warp_esr;
-	sm_error_state_record.warp_esr_pc =
-		sm_error_state->hww_warp_esr_pc;
-	sm_error_state_record.global_esr_report_mask =
-		sm_error_state->hww_global_esr_report_mask;
-	sm_error_state_record.warp_esr_report_mask =
-		sm_error_state->hww_warp_esr_report_mask;
-
-	if (args->record_size > 0) {
-		size_t write_size = sizeof(*sm_error_state);
-
-		if (write_size > args->record_size)
-			write_size = args->record_size;
-
-		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-		err = copy_to_user((void __user *)(uintptr_t)
-						args->record_mem,
-				   &sm_error_state_record,
-				   write_size);
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		if (err) {
-			nvgpu_err(g, "copy_to_user failed!");
-			return err;
-		}
-
-		args->record_size = write_size;
-	}
-
-	return 0;
-}
-
-long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct gk20a_ctrl_priv *priv = filp->private_data;
-	struct gk20a *g = priv->g;
-	struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
-	struct nvgpu_gpu_zcull_get_info_args *get_info_args;
-	struct nvgpu_gpu_zbc_set_table_args *set_table_args;
-	struct nvgpu_gpu_zbc_query_table_args *query_table_args;
-	u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
-	struct gr_zcull_info *zcull_info;
-	struct zbc_entry *zbc_val;
-	struct zbc_query_params *zbc_tbl;
-	int i, err = 0;
-
-	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) ||
-		(_IOC_NR(cmd) == 0) ||
-		(_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) ||
-		(_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	if (!g->sw_ready) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_idle(g);
-	}
-
-	switch (cmd) {
-	case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
-		get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
-
-		get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
-
-		break;
-	case NVGPU_GPU_IOCTL_ZCULL_GET_INFO:
-		get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf;
-
-		memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args));
-
-		zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
-		if (zcull_info == NULL)
-			return -ENOMEM;
-
-		err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
-		if (err) {
-			nvgpu_kfree(g, zcull_info);
-			break;
-		}
-
-		get_info_args->width_align_pixels = zcull_info->width_align_pixels;
-		get_info_args->height_align_pixels = zcull_info->height_align_pixels;
-		get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
-		get_info_args->aliquot_total = zcull_info->aliquot_total;
-		get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
-		get_info_args->region_header_size = zcull_info->region_header_size;
-		get_info_args->subregion_header_size = zcull_info->subregion_header_size;
-		get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
-		get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
-		get_info_args->subregion_count = zcull_info->subregion_count;
-
-		nvgpu_kfree(g, zcull_info);
-		break;
-	case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
-		set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;
-
-		zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry));
-		if (zbc_val == NULL)
-			return -ENOMEM;
-
-		zbc_val->format = set_table_args->format;
-		zbc_val->type = set_table_args->type;
-
-		switch (zbc_val->type) {
-		case GK20A_ZBC_TYPE_COLOR:
-			for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
-				zbc_val->color_ds[i] = set_table_args->color_ds[i];
-				zbc_val->color_l2[i] = set_table_args->color_l2[i];
-			}
-			break;
-		case GK20A_ZBC_TYPE_DEPTH:
-		case T19X_ZBC:
-			zbc_val->depth = set_table_args->depth;
-			break;
-		default:
-			err = -EINVAL;
-		}
-
-		if (!err) {
-			err = gk20a_busy(g);
-			if (!err) {
-				err = g->ops.gr.zbc_set_table(g, &g->gr,
-							     zbc_val);
-				gk20a_idle(g);
-			}
-		}
-
-		if (zbc_val)
-			nvgpu_kfree(g, zbc_val);
-		break;
-	case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE:
-		query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf;
-
-		zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params));
-		if (zbc_tbl == NULL)
-			return -ENOMEM;
-
-		zbc_tbl->type = query_table_args->type;
-		zbc_tbl->index_size = query_table_args->index_size;
-
-		err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl);
-
-		if (!err) {
-			switch (zbc_tbl->type) {
-			case GK20A_ZBC_TYPE_COLOR:
-				for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
-					query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
-					query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
-				}
-				break;
-			case GK20A_ZBC_TYPE_DEPTH:
-			case T19X_ZBC:
-				query_table_args->depth = zbc_tbl->depth;
-				break;
-			case GK20A_ZBC_TYPE_INVALID:
-				query_table_args->index_size = zbc_tbl->index_size;
-				break;
-			default:
-				err = -EINVAL;
-			}
-			if (!err) {
-				query_table_args->format = zbc_tbl->format;
-				query_table_args->ref_cnt = zbc_tbl->ref_cnt;
-			}
-		}
-
-		if (zbc_tbl)
-			nvgpu_kfree(g, zbc_tbl);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
-		err = gk20a_ctrl_ioctl_gpu_characteristics(
-			g, (struct nvgpu_gpu_get_characteristics *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ:
-		err = gk20a_ctrl_prepare_compressible_read(g,
-			(struct nvgpu_gpu_prepare_compressible_read_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE:
-		err = gk20a_ctrl_mark_compressible_write(g,
-			(struct nvgpu_gpu_mark_compressible_write_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_ALLOC_AS:
-		err = gk20a_ctrl_alloc_as(g,
-			(struct nvgpu_alloc_as_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_OPEN_TSG:
-		err = gk20a_ctrl_open_tsg(g,
-			(struct nvgpu_gpu_open_tsg_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
-		err = gk20a_ctrl_get_tpc_masks(g,
-			(struct nvgpu_gpu_get_tpc_masks_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
-		err = gk20a_ctrl_get_fbp_l2_masks(g,
-			(struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
-		/* this arg type here, but ..gpu_open_channel_args in nvgpu.h
-		 * for consistency - they are the same */
-		err = gk20a_channel_open_ioctl(g,
-			(struct nvgpu_channel_open_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_FLUSH_L2:
-		err = nvgpu_gpu_ioctl_l2_fb_ops(g,
-			   (struct nvgpu_gpu_l2_fb_args *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_INVAL_ICACHE:
-		err = gr_gk20a_elpg_protected_call(g,
-				nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
-		break;
-
-	case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
-		err =  nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
-				(struct nvgpu_gpu_mmu_debug_mode_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
-		err = gr_gk20a_elpg_protected_call(g,
-				nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
-		break;
-
-	case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND:
-		err = nvgpu_gpu_ioctl_trigger_suspend(g);
-		break;
-
-	case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
-		err = nvgpu_gpu_ioctl_wait_for_pause(g,
-				(struct nvgpu_gpu_wait_pause_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE:
-		err = nvgpu_gpu_ioctl_resume_from_pause(g);
-		break;
-
-	case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS:
-		err = nvgpu_gpu_ioctl_clear_sm_errors(g);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
-		err =  nvgpu_gpu_ioctl_has_any_exception(g,
-				(struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_NUM_VSMS:
-		err = gk20a_ctrl_get_num_vsms(g,
-			(struct nvgpu_gpu_num_vsms *)buf);
-		break;
-	case NVGPU_GPU_IOCTL_VSMS_MAPPING:
-		err = gk20a_ctrl_vsm_mapping(g,
-			(struct nvgpu_gpu_vsms_mapping *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO:
-		err = nvgpu_gpu_get_cpu_time_correlation_info(g,
-			(struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_GPU_TIME:
-		err = nvgpu_gpu_get_gpu_time(g,
-			(struct nvgpu_gpu_get_gpu_time_args *)buf);
-		break;
-
-        case NVGPU_GPU_IOCTL_GET_ENGINE_INFO:
-		err = nvgpu_gpu_get_engine_info(g,
-			(struct nvgpu_gpu_get_engine_info_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
-		err = nvgpu_gpu_alloc_vidmem(g,
-			(struct nvgpu_gpu_alloc_vidmem_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_MEMORY_STATE:
-		err = nvgpu_gpu_get_memory_state(g,
-			(struct nvgpu_gpu_get_memory_state_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_CLK_GET_RANGE:
-		err = nvgpu_gpu_clk_get_range(g, priv,
-			(struct nvgpu_gpu_clk_range_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS:
-		err = nvgpu_gpu_clk_get_vf_points(g, priv,
-			(struct nvgpu_gpu_clk_vf_points_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_CLK_SET_INFO:
-		err = nvgpu_gpu_clk_set_info(g, priv,
-			(struct nvgpu_gpu_clk_set_info_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_CLK_GET_INFO:
-		err = nvgpu_gpu_clk_get_info(g, priv,
-			(struct nvgpu_gpu_clk_get_info_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_EVENT_FD:
-		err = nvgpu_gpu_get_event_fd(g, priv,
-			(struct nvgpu_gpu_get_event_fd_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_VOLTAGE:
-		err = nvgpu_gpu_get_voltage(g,
-			(struct nvgpu_gpu_get_voltage_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_CURRENT:
-		err = nvgpu_gpu_get_current(g,
-			(struct nvgpu_gpu_get_current_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_POWER:
-		err = nvgpu_gpu_get_power(g,
-			(struct nvgpu_gpu_get_power_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_GET_TEMPERATURE:
-		err = nvgpu_gpu_get_temperature(g,
-			(struct nvgpu_gpu_get_temperature_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT:
-		err = nvgpu_gpu_set_therm_alert_limit(g,
-			(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
-		err = nvgpu_gpu_set_deterministic_opts(g,
-			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
-		break;
-
-	case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
-		err = nvgpu_gpu_read_single_sm_error_state(g,
-			(struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
-		break;
-
-	default:
-		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
-		err = -ENOTTY;
-		break;
-	}
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h
deleted file mode 100644
index 8b4a5e59..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __NVGPU_IOCTL_CTRL_H__
-#define __NVGPU_IOCTL_CTRL_H__
-
-int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
-int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
-long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
deleted file mode 100644
index 31e7e2cb..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ /dev/null
@@ -1,2003 +0,0 @@
-/*
- * Tegra GK20A GPU Debugger/Profiler Driver
- *
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/cdev.h>
-#include <linux/uaccess.h>
-#include <linux/dma-buf.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/vm.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/cond.h>
-
-#include <nvgpu/linux/vidmem.h>
-#include <nvgpu/linux/vm.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/gr_gk20a.h"
-#include "gk20a/regops_gk20a.h"
-#include "gk20a/dbg_gpu_gk20a.h"
-#include "os_linux.h"
-#include "platform_gk20a.h"
-#include "ioctl_dbg.h"
-
-/* turn seriously unwieldy names -> something shorter */
-#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x
-
-/* silly allocator - just increment id */
-static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
-static int generate_unique_id(void)
-{
-	return nvgpu_atomic_add_return(1, &unique_id);
-}
-
-static int alloc_profiler(struct gk20a *g,
-			  struct dbg_profiler_object_data **_prof)
-{
-	struct dbg_profiler_object_data *prof;
-	*_prof = NULL;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	prof = nvgpu_kzalloc(g, sizeof(*prof));
-	if (!prof)
-		return -ENOMEM;
-
-	prof->prof_handle = generate_unique_id();
-	*_prof = prof;
-	return 0;
-}
-
-static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux)
-{
-	struct dbg_session_gk20a_linux *dbg_s_linux;
-	*_dbg_s_linux = NULL;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux));
-	if (!dbg_s_linux)
-		return -ENOMEM;
-
-	dbg_s_linux->dbg_s.id = generate_unique_id();
-	*_dbg_s_linux = dbg_s_linux;
-	return 0;
-}
-
-static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
-				      struct gr_gk20a *gr);
-
-static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
-
-static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
-				struct nvgpu_dbg_gpu_exec_reg_ops_args *args);
-
-static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
-				struct nvgpu_dbg_gpu_powergate_args *args);
-
-static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
-			      struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
-
-static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
-			      struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
-
-static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
-
-static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s,
-				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
-
-static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux,
-				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
-
-static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
-			   struct nvgpu_dbg_gpu_profiler_reserve_args *args);
-
-static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_perfbuf_map_args *args);
-
-static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
-
-static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
-			  int timeout_mode);
-
-static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
-								u32 profiler_handle);
-
-static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s);
-
-static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s);
-
-static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
-								u32 profiler_handle);
-
-static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
-
-static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
-		struct file *filp, bool is_profiler);
-
-unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
-{
-	unsigned int mask = 0;
-	struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data;
-	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait);
-
-	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
-
-	if (dbg_s->dbg_events.events_enabled &&
-			dbg_s->dbg_events.num_pending_events > 0) {
-		nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d",
-				dbg_s->id);
-		nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
-				dbg_s->dbg_events.num_pending_events);
-		mask = (POLLPRI | POLLIN);
-	}
-
-	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
-
-	return mask;
-}
-
-int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
-{
-	struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data;
-	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
-	struct gk20a *g = dbg_s->g;
-	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
-
-	nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name);
-
-	/* unbind channels */
-	dbg_unbind_all_channels_gk20a(dbg_s);
-
-	/* Powergate/Timeout enable is called here as possibility of dbg_session
-	 * which called powergate/timeout disable ioctl, to be killed without
-	 * calling powergate/timeout enable ioctl
-	 */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
-	nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
-
-	/* If this session owned the perf buffer, release it */
-	if (g->perfbuf.owner == dbg_s)
-		gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
-
-	/* Per-context profiler objects were released when we called
-	 * dbg_unbind_all_channels. We could still have global ones.
-	 */
-	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-		if (prof_obj->session_id == dbg_s->id) {
-			if (prof_obj->has_reservation)
-				g->ops.dbg_session_ops.
-				  release_profiler_reservation(dbg_s, prof_obj);
-			nvgpu_list_del(&prof_obj->prof_obj_entry);
-			nvgpu_kfree(g, prof_obj);
-		}
-	}
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
-	nvgpu_mutex_destroy(&dbg_s->ioctl_lock);
-
-	nvgpu_kfree(g, dbg_session_linux);
-	gk20a_put(g);
-
-	return 0;
-}
-
-int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
-			 struct nvgpu_os_linux, prof.cdev);
-	struct gk20a *g = &l->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-	return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
-}
-
-static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
-			 struct nvgpu_dbg_gpu_timeout_args *args)
-{
-	int err;
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	return err;
-}
-
-static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct gr_gk20a *gr = &g->gr;
-	u32 sm_id;
-	struct channel_gk20a *ch;
-	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
-	struct nvgpu_gr_sm_error_state sm_error_state;
-	int err = 0;
-
-	/* Not currently supported in the virtual case */
-	if (g->is_virtual)
-		return -ENOSYS;
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
-		return -EINVAL;
-
-	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	if (args->sm_error_state_record_size > 0) {
-		size_t read_size = sizeof(sm_error_state_record);
-
-		if (read_size > args->sm_error_state_record_size)
-			read_size = args->sm_error_state_record_size;
-
-		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-		err = copy_from_user(&sm_error_state_record,
-			  (void __user *)(uintptr_t)
-				args->sm_error_state_record_mem,
-			  read_size);
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		if (err)
-			return -ENOMEM;
-	}
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	sm_error_state.hww_global_esr =
-		sm_error_state_record.hww_global_esr;
-	sm_error_state.hww_warp_esr =
-		sm_error_state_record.hww_warp_esr;
-	sm_error_state.hww_warp_esr_pc =
-		sm_error_state_record.hww_warp_esr_pc;
-	sm_error_state.hww_global_esr_report_mask =
-		sm_error_state_record.hww_global_esr_report_mask;
-	sm_error_state.hww_warp_esr_report_mask =
-		sm_error_state_record.hww_warp_esr_report_mask;
-
-	err = gr_gk20a_elpg_protected_call(g,
-			g->ops.gr.update_sm_error_state(g, ch,
-					sm_id, &sm_error_state));
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-
-static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct gr_gk20a *gr = &g->gr;
-	struct nvgpu_gr_sm_error_state *sm_error_state;
-	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
-	u32 sm_id;
-	int err = 0;
-
-	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	sm_error_state = gr->sm_error_states + sm_id;
-	sm_error_state_record.hww_global_esr =
-		sm_error_state->hww_global_esr;
-	sm_error_state_record.hww_warp_esr =
-		sm_error_state->hww_warp_esr;
-	sm_error_state_record.hww_warp_esr_pc =
-		sm_error_state->hww_warp_esr_pc;
-	sm_error_state_record.hww_global_esr_report_mask =
-		sm_error_state->hww_global_esr_report_mask;
-	sm_error_state_record.hww_warp_esr_report_mask =
-		sm_error_state->hww_warp_esr_report_mask;
-
-	if (args->sm_error_state_record_size > 0) {
-		size_t write_size = sizeof(*sm_error_state);
-
-		if (write_size > args->sm_error_state_record_size)
-			write_size = args->sm_error_state_record_size;
-
-		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-		err = copy_to_user((void __user *)(uintptr_t)
-						args->sm_error_state_record_mem,
-				   &sm_error_state_record,
-				   write_size);
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		if (err) {
-			nvgpu_err(g, "copy_to_user failed!");
-			return err;
-		}
-
-		args->sm_error_state_record_size = write_size;
-	}
-
-	return 0;
-}
-
-
-static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
-
-	dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
-
-	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
-
-	return 0;
-}
-
-static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
-			  int timeout_mode)
-{
-	struct gk20a *g = dbg_s->g;
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
-			timeout_mode);
-
-	switch (timeout_mode) {
-	case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
-		if (dbg_s->is_timeout_disabled == true)
-			nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
-		dbg_s->is_timeout_disabled = false;
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE:
-		if (dbg_s->is_timeout_disabled == false)
-			nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
-		dbg_s->is_timeout_disabled = true;
-		break;
-
-	default:
-		nvgpu_err(g,
-			   "unrecognized dbg gpu timeout mode : 0x%x",
-			   timeout_mode);
-		err = -EINVAL;
-		break;
-	}
-
-	if (!err)
-		nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, "
-				"timeouts disabled refcount %d",
-			dbg_s->is_timeout_disabled ? "true" : "false",
-			nvgpu_atomic_read(&g->timeouts_disabled_refcount));
-	return err;
-}
-
-static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
-		struct file *filp, bool is_profiler)
-{
-	struct nvgpu_os_linux *l;
-	struct dbg_session_gk20a_linux *dbg_session_linux;
-	struct dbg_session_gk20a *dbg_s;
-	struct gk20a *g;
-
-	struct device *dev;
-
-	int err;
-
-	if (!is_profiler)
-		l = container_of(inode->i_cdev,
-				 struct nvgpu_os_linux, dbg.cdev);
-	else
-		l = container_of(inode->i_cdev,
-				 struct nvgpu_os_linux, prof.cdev);
-	g = gk20a_get(&l->g);
-	if (!g)
-		return -ENODEV;
-
-	dev = dev_from_gk20a(g);
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name);
-
-	err  = alloc_session(g, &dbg_session_linux);
-	if (err)
-		goto free_ref;
-
-	dbg_s = &dbg_session_linux->dbg_s;
-
-	filp->private_data = dbg_session_linux;
-	dbg_session_linux->dev   = dev;
-	dbg_s->g     = g;
-	dbg_s->is_profiler = is_profiler;
-	dbg_s->is_pg_disabled = false;
-	dbg_s->is_timeout_disabled = false;
-
-	nvgpu_cond_init(&dbg_s->dbg_events.wait_queue);
-	nvgpu_init_list_node(&dbg_s->ch_list);
-	err = nvgpu_mutex_init(&dbg_s->ch_list_lock);
-	if (err)
-		goto err_free_session;
-	err = nvgpu_mutex_init(&dbg_s->ioctl_lock);
-	if (err)
-		goto err_destroy_lock;
-	dbg_s->dbg_events.events_enabled = false;
-	dbg_s->dbg_events.num_pending_events = 0;
-
-	return 0;
-
-err_destroy_lock:
-	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
-err_free_session:
-	nvgpu_kfree(g, dbg_session_linux);
-free_ref:
-	gk20a_put(g);
-	return err;
-}
-
-void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
-{
-	nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue);
-}
-
-static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
-			struct dbg_session_channel_data *ch_data)
-{
-	struct gk20a *g = dbg_s->g;
-	int chid;
-	struct dbg_session_data *session_data;
-	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
-	struct dbg_session_channel_data_linux *ch_data_linux;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	chid = ch_data->chid;
-
-	/* If there's a profiler ctx reservation record associated with this
-	 * session/channel pair, release it.
-	 */
-	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-		if ((prof_obj->session_id == dbg_s->id) &&
-			(prof_obj->ch->chid == chid)) {
-			if (prof_obj->has_reservation) {
-				g->ops.dbg_session_ops.
-				  release_profiler_reservation(dbg_s, prof_obj);
-			}
-			nvgpu_list_del(&prof_obj->prof_obj_entry);
-			nvgpu_kfree(g, prof_obj);
-		}
-	}
-
-	nvgpu_list_del(&ch_data->ch_entry);
-
-	session_data = ch_data->session_data;
-	nvgpu_list_del(&session_data->dbg_s_entry);
-	nvgpu_kfree(dbg_s->g, session_data);
-
-	ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux,
-										ch_data);
-
-	fput(ch_data_linux->ch_f);
-	nvgpu_kfree(dbg_s->g, ch_data_linux);
-
-	return 0;
-}
-
-static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
-			  struct nvgpu_dbg_gpu_bind_channel_args *args)
-{
-	struct file *f;
-	struct gk20a *g = dbg_s->g;
-	struct channel_gk20a *ch;
-	struct dbg_session_channel_data_linux *ch_data_linux;
-	struct dbg_session_data *session_data;
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
-		   g->name, args->channel_fd);
-
-	/*
-	 * Although gk20a_get_channel_from_file gives us a channel ref, need to
-	 * hold a ref to the file during the session lifetime. See comment in
-	 * struct dbg_session_channel_data.
-	 */
-	f = fget(args->channel_fd);
-	if (!f)
-		return -ENODEV;
-
-	ch = gk20a_get_channel_from_file(args->channel_fd);
-	if (!ch) {
-		nvgpu_log_fn(g, "no channel found for fd");
-		err = -EINVAL;
-		goto out_fput;
-	}
-
-	nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	nvgpu_mutex_acquire(&ch->dbg_s_lock);
-
-	ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux));
-	if (!ch_data_linux) {
-		err = -ENOMEM;
-		goto out_chput;
-	}
-	ch_data_linux->ch_f = f;
-	ch_data_linux->ch_data.channel_fd = args->channel_fd;
-	ch_data_linux->ch_data.chid = ch->chid;
-	ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a;
-	nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry);
-
-	session_data = nvgpu_kzalloc(g, sizeof(*session_data));
-	if (!session_data) {
-		err = -ENOMEM;
-		goto out_kfree;
-	}
-	session_data->dbg_s = dbg_s;
-	nvgpu_init_list_node(&session_data->dbg_s_entry);
-	ch_data_linux->ch_data.session_data = session_data;
-
-	nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
-
-	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
-	nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list);
-	nvgpu_mutex_release(&dbg_s->ch_list_lock);
-
-	nvgpu_mutex_release(&ch->dbg_s_lock);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	gk20a_channel_put(ch);
-
-	return 0;
-
-out_kfree:
-	nvgpu_kfree(g, ch_data_linux);
-out_chput:
-	gk20a_channel_put(ch);
-	nvgpu_mutex_release(&ch->dbg_s_lock);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-out_fput:
-	fput(f);
-	return err;
-}
-
-static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
-{
-	struct dbg_session_channel_data *ch_data, *tmp;
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
-	nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
-				dbg_session_channel_data, ch_entry)
-		ch_data->unbind_single_channel(dbg_s, ch_data);
-	nvgpu_mutex_release(&dbg_s->ch_list_lock);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	return 0;
-}
-
-/*
- * Convert common regops op values of the form of NVGPU_DBG_REG_OP_*
- * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
- */
-static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
-{
-	switch (regops_op) {
-	case REGOP(READ_32):
-		return REGOP_LINUX(READ_32);
-	case REGOP(WRITE_32):
-		return REGOP_LINUX(WRITE_32);
-	case REGOP(READ_64):
-		return REGOP_LINUX(READ_64);
-	case REGOP(WRITE_64):
-		return REGOP_LINUX(WRITE_64);
-	case REGOP(READ_08):
-		return REGOP_LINUX(READ_08);
-	case REGOP(WRITE_08):
-		return REGOP_LINUX(WRITE_08);
-	}
-
-	return regops_op;
-}
-
-/*
- * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
- * into common regops op values of the form of NVGPU_DBG_REG_OP_*
- */
-static u32 nvgpu_get_regops_op_values_common(u32 regops_op)
-{
-	switch (regops_op) {
-	case REGOP_LINUX(READ_32):
-		return REGOP(READ_32);
-	case REGOP_LINUX(WRITE_32):
-		return REGOP(WRITE_32);
-	case REGOP_LINUX(READ_64):
-		return REGOP(READ_64);
-	case REGOP_LINUX(WRITE_64):
-		return REGOP(WRITE_64);
-	case REGOP_LINUX(READ_08):
-		return REGOP(READ_08);
-	case REGOP_LINUX(WRITE_08):
-		return REGOP(WRITE_08);
-	}
-
-	return regops_op;
-}
-
-/*
- * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
- * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
- */
-static u32 nvgpu_get_regops_type_values_linux(u32 regops_type)
-{
-	switch (regops_type) {
-	case REGOP(TYPE_GLOBAL):
-		return REGOP_LINUX(TYPE_GLOBAL);
-	case REGOP(TYPE_GR_CTX):
-		return REGOP_LINUX(TYPE_GR_CTX);
-	case REGOP(TYPE_GR_CTX_TPC):
-		return REGOP_LINUX(TYPE_GR_CTX_TPC);
-	case REGOP(TYPE_GR_CTX_SM):
-		return REGOP_LINUX(TYPE_GR_CTX_SM);
-	case REGOP(TYPE_GR_CTX_CROP):
-		return REGOP_LINUX(TYPE_GR_CTX_CROP);
-	case REGOP(TYPE_GR_CTX_ZROP):
-		return REGOP_LINUX(TYPE_GR_CTX_ZROP);
-	case REGOP(TYPE_GR_CTX_QUAD):
-		return REGOP_LINUX(TYPE_GR_CTX_QUAD);
-	}
-
-	return regops_type;
-}
-
-/*
- * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
- * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
- */
-static u32 nvgpu_get_regops_type_values_common(u32 regops_type)
-{
-	switch (regops_type) {
-	case REGOP_LINUX(TYPE_GLOBAL):
-		return REGOP(TYPE_GLOBAL);
-	case REGOP_LINUX(TYPE_GR_CTX):
-		return REGOP(TYPE_GR_CTX);
-	case REGOP_LINUX(TYPE_GR_CTX_TPC):
-		return REGOP(TYPE_GR_CTX_TPC);
-	case REGOP_LINUX(TYPE_GR_CTX_SM):
-		return REGOP(TYPE_GR_CTX_SM);
-	case REGOP_LINUX(TYPE_GR_CTX_CROP):
-		return REGOP(TYPE_GR_CTX_CROP);
-	case REGOP_LINUX(TYPE_GR_CTX_ZROP):
-		return REGOP(TYPE_GR_CTX_ZROP);
-	case REGOP_LINUX(TYPE_GR_CTX_QUAD):
-		return REGOP(TYPE_GR_CTX_QUAD);
-	}
-
-	return regops_type;
-}
-
-/*
- * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_*
- * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
- */
-static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
-{
-	switch (regops_status) {
-	case REGOP(STATUS_SUCCESS):
-		return REGOP_LINUX(STATUS_SUCCESS);
-	case REGOP(STATUS_INVALID_OP):
-		return REGOP_LINUX(STATUS_INVALID_OP);
-	case REGOP(STATUS_INVALID_TYPE):
-		return REGOP_LINUX(STATUS_INVALID_TYPE);
-	case REGOP(STATUS_INVALID_OFFSET):
-		return REGOP_LINUX(STATUS_INVALID_OFFSET);
-	case REGOP(STATUS_UNSUPPORTED_OP):
-		return REGOP_LINUX(STATUS_UNSUPPORTED_OP);
-	case REGOP(STATUS_INVALID_MASK ):
-		return REGOP_LINUX(STATUS_INVALID_MASK);
-	}
-
-	return regops_status;
-}
-
-/*
- * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
- * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_*
- */
-static u32 nvgpu_get_regops_status_values_common(u32 regops_status)
-{
-	switch (regops_status) {
-	case REGOP_LINUX(STATUS_SUCCESS):
-		return REGOP(STATUS_SUCCESS);
-	case REGOP_LINUX(STATUS_INVALID_OP):
-		return REGOP(STATUS_INVALID_OP);
-	case REGOP_LINUX(STATUS_INVALID_TYPE):
-		return REGOP(STATUS_INVALID_TYPE);
-	case REGOP_LINUX(STATUS_INVALID_OFFSET):
-		return REGOP(STATUS_INVALID_OFFSET);
-	case REGOP_LINUX(STATUS_UNSUPPORTED_OP):
-		return REGOP(STATUS_UNSUPPORTED_OP);
-	case REGOP_LINUX(STATUS_INVALID_MASK ):
-		return REGOP(STATUS_INVALID_MASK);
-	}
-
-	return regops_status;
-}
-
-static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in,
-		struct nvgpu_dbg_reg_op *out, u32 num_ops)
-{
-	u32 i;
-
-	if(in == NULL || out == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < num_ops; i++) {
-		out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
-		out[i].type = nvgpu_get_regops_type_values_common(in[i].type);
-		out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
-		out[i].quad = in[i].quad;
-		out[i].group_mask = in[i].group_mask;
-		out[i].sub_group_mask = in[i].sub_group_mask;
-		out[i].offset = in[i].offset;
-		out[i].value_lo = in[i].value_lo;
-		out[i].value_hi = in[i].value_hi;
-		out[i].and_n_mask_lo = in[i].and_n_mask_lo;
-		out[i].and_n_mask_hi = in[i].and_n_mask_hi;
-	}
-
-	return 0;
-}
-
-static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in,
-		struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops)
-{
-	u32 i;
-
-	if(in == NULL || out == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < num_ops; i++) {
-		out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
-		out[i].type = nvgpu_get_regops_type_values_linux(in[i].type);
-		out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
-		out[i].quad = in[i].quad;
-		out[i].group_mask = in[i].group_mask;
-		out[i].sub_group_mask = in[i].sub_group_mask;
-		out[i].offset = in[i].offset;
-		out[i].value_lo = in[i].value_lo;
-		out[i].value_hi = in[i].value_hi;
-		out[i].and_n_mask_lo = in[i].and_n_mask_lo;
-		out[i].and_n_mask_hi = in[i].and_n_mask_hi;
-	}
-
-	return 0;
-}
-
-static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
-				struct nvgpu_dbg_gpu_exec_reg_ops_args *args)
-{
-	int err = 0, powergate_err = 0;
-	bool is_pg_disabled = false;
-
-	struct gk20a *g = dbg_s->g;
-	struct channel_gk20a *ch;
-
-	nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
-
-	if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
-		nvgpu_err(g, "regops limit exceeded");
-		return -EINVAL;
-	}
-
-	if (args->num_ops == 0) {
-		/* Nothing to do */
-		return 0;
-	}
-
-	if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
-		nvgpu_err(g, "reg ops work buffer not allocated");
-		return -ENODEV;
-	}
-
-	if (!dbg_s->id) {
-		nvgpu_err(g, "can't call reg_ops on an unbound debugger session");
-		return -EINVAL;
-	}
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!dbg_s->is_profiler && !ch) {
-		nvgpu_err(g, "bind a channel before regops for a debugging session");
-		return -EINVAL;
-	}
-
-	/* be sure that ctx info is in place */
-	if (!g->is_virtual &&
-		!gr_context_info_available(dbg_s, &g->gr)) {
-		nvgpu_err(g, "gr context data not available");
-		return -ENODEV;
-	}
-
-	/* since exec_reg_ops sends methods to the ucode, it must take the
-	 * global gpu lock to protect against mixing methods from debug sessions
-	 * on other channels */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	if (!dbg_s->is_pg_disabled && !g->is_virtual) {
-		/* In the virtual case, the server will handle
-		 * disabling/enabling powergating when processing reg ops
-		 */
-		powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
-					true);
-		is_pg_disabled = true;
-	}
-
-	if (!powergate_err) {
-		u64 ops_offset = 0; /* index offset */
-
-		struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL;
-
-		linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops *
-				sizeof(struct nvgpu_dbg_gpu_reg_op));
-
-		if (!linux_fragment)
-			return -ENOMEM;
-
-		while (ops_offset < args->num_ops && !err) {
-			const u64 num_ops =
-				min(args->num_ops - ops_offset,
-				    (u64)(g->dbg_regops_tmp_buf_ops));
-			const u64 fragment_size =
-				num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op);
-
-			void __user *const fragment =
-				(void __user *)(uintptr_t)
-				(args->ops +
-				 ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op));
-
-			nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu",
-				     ops_offset, num_ops);
-
-			nvgpu_log_fn(g, "Copying regops from userspace");
-
-			if (copy_from_user(linux_fragment,
-					   fragment, fragment_size)) {
-				nvgpu_err(g, "copy_from_user failed!");
-				err = -EFAULT;
-				break;
-			}
-
-			err = nvgpu_get_regops_data_common(linux_fragment,
-					g->dbg_regops_tmp_buf, num_ops);
-
-			if (err)
-				break;
-
-			err = g->ops.dbg_session_ops.exec_reg_ops(
-				dbg_s, g->dbg_regops_tmp_buf, num_ops);
-
-			err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,
-					linux_fragment, num_ops);
-
-			if (err)
-				break;
-
-			nvgpu_log_fn(g, "Copying result to userspace");
-
-			if (copy_to_user(fragment, linux_fragment,
-					 fragment_size)) {
-				nvgpu_err(g, "copy_to_user failed!");
-				err = -EFAULT;
-				break;
-			}
-
-			ops_offset += num_ops;
-		}
-
-		nvgpu_kfree(g, linux_fragment);
-
-		/* enable powergate, if previously disabled */
-		if (is_pg_disabled) {
-			powergate_err =
-				g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
-					false);
-		}
-	}
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	if (!err && powergate_err)
-		err = powergate_err;
-
-	if (err)
-		nvgpu_err(g, "dbg regops failed");
-
-	return err;
-}
-
-static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
-				struct nvgpu_dbg_gpu_powergate_args *args)
-{
-	int err;
-	struct gk20a *g = dbg_s->g;
-	nvgpu_log_fn(g, "%s  powergate mode = %d",
-		      g->name, args->mode);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) {
-		err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true);
-	} else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) {
-		err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
-	} else {
-		nvgpu_err(g, "invalid powergate mode");
-		err = -EINVAL;
-	}
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return  err;
-}
-
-static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
-			       struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args)
-{
-	int err;
-	struct gk20a *g = dbg_s->g;
-	struct channel_gk20a *ch_gk20a;
-
-	nvgpu_log_fn(g, "%s smpc ctxsw mode = %d",
-		     g->name, args->mode);
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to poweron");
-		return err;
-	}
-
-	/* Take the global lock, since we'll be doing global regops */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch_gk20a) {
-		nvgpu_err(g,
-			  "no bound channel for smpc ctxsw mode update");
-		err = -EINVAL;
-		goto clean_up;
-	}
-
-	err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a,
-				args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
-	if (err) {
-		nvgpu_err(g,
-			  "error (%d) during smpc ctxsw mode update", err);
-		goto clean_up;
-	}
-
-	err = g->ops.regops.apply_smpc_war(dbg_s);
- clean_up:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	gk20a_idle(g);
-	return  err;
-}
-
-static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
-			       struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
-{
-	int err;
-	struct gk20a *g = dbg_s->g;
-	struct channel_gk20a *ch_gk20a;
-
-	nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
-
-	/* Must have a valid reservation to enable/disable hwpm cxtsw.
-	 * Just print an error message for now, but eventually this should
-	 * return an error, at the point where all client sw has been
-	 * cleaned up.
-	 */
-	if (!dbg_s->has_profiler_reservation) {
-		nvgpu_err(g,
-			"session doesn't have a valid reservation");
-	}
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to poweron");
-		return err;
-	}
-
-	/* Take the global lock, since we'll be doing global regops */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch_gk20a) {
-		nvgpu_err(g,
-			  "no bound channel for pm ctxsw mode update");
-		err = -EINVAL;
-		goto clean_up;
-	}
-	if (!dbg_s->is_pg_disabled) {
-		nvgpu_err(g, "powergate is not disabled");
-		err = -ENOSYS;
-		goto clean_up;
-	}
-	err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
-		args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
-	if (err)
-		nvgpu_err(g,
-			"error (%d) during pm ctxsw mode update", err);
-	/* gk20a would require a WAR to set the core PM_ENABLE bit, not
-	 * added here with gk20a being deprecated
-	 */
- clean_up:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	gk20a_idle(g);
-	return  err;
-}
-
-static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct channel_gk20a *ch;
-	int err = 0, action = args->mode;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to poweron");
-		return err;
-	}
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	/* Suspend GPU context switching */
-	err = gr_gk20a_disable_ctxsw(g);
-	if (err) {
-		nvgpu_err(g, "unable to stop gr ctxsw");
-		/* this should probably be ctx-fatal... */
-		goto clean_up;
-	}
-
-	switch (action) {
-	case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
-		gr_gk20a_suspend_context(ch);
-		break;
-
-	case NVGPU_DBG_GPU_RESUME_ALL_SMS:
-		gr_gk20a_resume_context(ch);
-		break;
-	}
-
-	err = gr_gk20a_enable_ctxsw(g);
-	if (err)
-		nvgpu_err(g, "unable to restart ctxsw!");
-
-clean_up:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	gk20a_idle(g);
-
-	return  err;
-}
-
-static int nvgpu_ioctl_allocate_profiler_object(
-				struct dbg_session_gk20a_linux *dbg_session_linux,
-				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
-{
-	int err = 0;
-	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
-	struct gk20a *g = get_gk20a(dbg_session_linux->dev);
-	struct dbg_profiler_object_data *prof_obj;
-
-	nvgpu_log_fn(g, "%s", g->name);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	err = alloc_profiler(g, &prof_obj);
-	if (err)
-		goto clean_up;
-
-	prof_obj->session_id = dbg_s->id;
-
-	if (dbg_s->is_profiler)
-		prof_obj->ch = NULL;
-	else {
-		prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-		if (prof_obj->ch == NULL) {
-			nvgpu_err(g,
-				"bind a channel for dbg session");
-			nvgpu_kfree(g, prof_obj);
-			err = -EINVAL;
-			goto clean_up;
-		}
-	}
-
-	/* Return handle to client */
-	args->profiler_handle = prof_obj->prof_handle;
-
-	nvgpu_init_list_node(&prof_obj->prof_obj_entry);
-
-	nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
-clean_up:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return  err;
-}
-
-static int nvgpu_ioctl_free_profiler_object(
-				struct dbg_session_gk20a_linux *dbg_s_linux,
-				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
-{
-	int err = 0;
-	struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
-	struct gk20a *g = get_gk20a(dbg_s_linux->dev);
-	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
-	bool obj_found = false;
-
-	nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x",
-		     g->name, dbg_s->id, args->profiler_handle);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	/* Remove profiler object from the list, if a match is found */
-	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-		if (prof_obj->prof_handle == args->profiler_handle) {
-			if (prof_obj->session_id != dbg_s->id) {
-				nvgpu_err(g,
-						"invalid handle %x",
-						args->profiler_handle);
-				err = -EINVAL;
-				break;
-			}
-			if (prof_obj->has_reservation)
-				g->ops.dbg_session_ops.
-				  release_profiler_reservation(dbg_s, prof_obj);
-			nvgpu_list_del(&prof_obj->prof_obj_entry);
-			nvgpu_kfree(g, prof_obj);
-			obj_found = true;
-			break;
-		}
-	}
-	if (!obj_found) {
-		nvgpu_err(g, "profiler %x not found",
-							args->profiler_handle);
-		err = -EINVAL;
-	}
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return  err;
-}
-
-static struct dbg_profiler_object_data *find_matching_prof_obj(
-						struct dbg_session_gk20a *dbg_s,
-						u32 profiler_handle)
-{
-	struct gk20a *g = dbg_s->g;
-	struct dbg_profiler_object_data *prof_obj;
-
-	nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-		if (prof_obj->prof_handle == profiler_handle) {
-			if (prof_obj->session_id != dbg_s->id) {
-				nvgpu_err(g,
-						"invalid handle %x",
-						profiler_handle);
-				return NULL;
-			}
-			return prof_obj;
-		}
-	}
-	return NULL;
-}
-
-/* used in scenarios where the debugger session can take just the inter-session
- * lock for performance, but the profiler session must take the per-gpu lock
- * since it might not have an associated channel. */
-static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
-{
-	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-
-	if (dbg_s->is_profiler || !ch)
-		nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
-	else
-		nvgpu_mutex_acquire(&ch->dbg_s_lock);
-}
-
-static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
-{
-	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-
-	if (dbg_s->is_profiler || !ch)
-		nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
-	else
-		nvgpu_mutex_release(&ch->dbg_s_lock);
-}
-
-static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
-{
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
-
-	dbg_s->dbg_events.events_enabled = true;
-	dbg_s->dbg_events.num_pending_events = 0;
-
-	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
-}
-
-static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
-{
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
-
-	dbg_s->dbg_events.events_enabled = false;
-	dbg_s->dbg_events.num_pending_events = 0;
-
-	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
-}
-
-static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
-{
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
-
-	if (dbg_s->dbg_events.events_enabled &&
-			dbg_s->dbg_events.num_pending_events > 0)
-		dbg_s->dbg_events.num_pending_events--;
-
-	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
-}
-
-
-static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
-			  struct nvgpu_dbg_gpu_events_ctrl_args *args)
-{
-	int ret = 0;
-	struct channel_gk20a *ch;
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch) {
-		nvgpu_err(g, "no channel bound to dbg session");
-		return -EINVAL;
-	}
-
-	switch (args->cmd) {
-	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
-		gk20a_dbg_gpu_events_enable(dbg_s);
-		break;
-
-	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
-		gk20a_dbg_gpu_events_disable(dbg_s);
-		break;
-
-	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
-		gk20a_dbg_gpu_events_clear(dbg_s);
-		break;
-
-	default:
-		nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x",
-			   args->cmd);
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_perfbuf_map_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct mm_gk20a *mm = &g->mm;
-	int err;
-	u32 virt_size;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	if (g->perfbuf.owner) {
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return -EBUSY;
-	}
-
-	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, "perfbuf");
-	if (!mm->perfbuf.vm) {
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return -ENOMEM;
-	}
-
-	err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
-			args->dmabuf_fd,
-			&args->offset,
-			0,
-			0,
-			0,
-			0,
-			args->mapping_size,
-			NULL);
-	if (err)
-		goto err_remove_vm;
-
-	/* perf output buffer may not cross a 4GB boundary */
-	virt_size = u64_lo32(args->mapping_size);
-	if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
-		err = -EINVAL;
-		goto err_unmap;
-	}
-
-	err = g->ops.dbg_session_ops.perfbuffer_enable(g,
-						args->offset, virt_size);
-	if (err)
-		goto err_unmap;
-
-	g->perfbuf.owner = dbg_s;
-	g->perfbuf.offset = args->offset;
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	return 0;
-
-err_unmap:
-	nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
-err_remove_vm:
-	nvgpu_vm_put(mm->perfbuf.vm);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return err;
-}
-
-static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	int err;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	if ((g->perfbuf.owner != dbg_s) ||
-					(g->perfbuf.offset != args->offset)) {
-		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return -EINVAL;
-	}
-
-	err = gk20a_perfbuf_release_locked(g, args->offset);
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	return err;
-}
-
-static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
-			  struct nvgpu_dbg_gpu_pc_sampling_args *args)
-{
-	struct channel_gk20a *ch;
-	struct gk20a *g = dbg_s->g;
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
-		return -EINVAL;
-
-	nvgpu_log_fn(g, " ");
-
-	return g->ops.gr.update_pc_sampling ?
-		g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
-}
-
-static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
-		struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct gr_gk20a *gr = &g->gr;
-	u32 sm_id;
-	struct channel_gk20a *ch;
-	int err = 0;
-
-	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
-	if (!ch)
-		return -EINVAL;
-
-	sm_id = args->sm_id;
-	if (sm_id >= gr->no_of_sm)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	err = gr_gk20a_elpg_protected_call(g,
-			g->ops.gr.clear_sm_error_state(g, ch, sm_id));
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int
-nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	int err = 0;
-	int ctx_resident_ch_fd = -1;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	switch (args->action) {
-	case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
-		err = g->ops.gr.suspend_contexts(g, dbg_s,
-					&ctx_resident_ch_fd);
-		break;
-
-	case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS:
-		err = g->ops.gr.resume_contexts(g, dbg_s,
-					&ctx_resident_ch_fd);
-		break;
-	}
-
-	if (ctx_resident_ch_fd < 0) {
-		args->is_resident_context = 0;
-	} else {
-		args->is_resident_context = 1;
-		args->resident_context_fd = ctx_resident_ch_fd;
-	}
-
-	gk20a_idle(g);
-
-	return err;
-}
-
-static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
-		struct nvgpu_dbg_gpu_access_fb_memory_args *args)
-{
-	struct gk20a *g = dbg_s->g;
-	struct dma_buf *dmabuf;
-	void __user *user_buffer = (void __user *)(uintptr_t)args->buffer;
-	void *buffer;
-	u64 size, access_size, offset;
-	u64 access_limit_size = SZ_4K;
-	int err = 0;
-
-	if ((args->offset & 3) || (!args->size) || (args->size & 3))
-		return -EINVAL;
-
-	dmabuf = dma_buf_get(args->dmabuf_fd);
-	if (IS_ERR(dmabuf))
-		return -EINVAL;
-
-	if ((args->offset > dmabuf->size) ||
-	    (args->size > dmabuf->size) ||
-	    (args->offset + args->size > dmabuf->size)) {
-		err = -EINVAL;
-		goto fail_dmabuf_put;
-	}
-
-	buffer = nvgpu_big_zalloc(g, access_limit_size);
-	if (!buffer) {
-		err = -ENOMEM;
-		goto fail_dmabuf_put;
-	}
-
-	size = args->size;
-	offset = 0;
-
-	err = gk20a_busy(g);
-	if (err)
-		goto fail_free_buffer;
-
-	while (size) {
-		/* Max access size of access_limit_size in one loop */
-		access_size = min(access_limit_size, size);
-
-		if (args->cmd ==
-		    NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) {
-			err = copy_from_user(buffer, user_buffer + offset,
-					     access_size);
-			if (err)
-				goto fail_idle;
-		}
-
-		err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer,
-					 args->offset + offset, access_size,
-					 args->cmd);
-		if (err)
-			goto fail_idle;
-
-		if (args->cmd ==
-		    NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) {
-			err = copy_to_user(user_buffer + offset,
-					   buffer, access_size);
-			if (err)
-				goto fail_idle;
-		}
-
-		size -= access_size;
-		offset += access_size;
-	}
-
-fail_idle:
-	gk20a_idle(g);
-fail_free_buffer:
-	nvgpu_big_free(g, buffer);
-fail_dmabuf_put:
-	dma_buf_put(dmabuf);
-
-	return err;
-}
-
-static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
-			   struct nvgpu_dbg_gpu_profiler_reserve_args *args)
-{
-	if (args->acquire)
-		return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
-
-	return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
-}
-
-static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
-			 struct nvgpu_dbg_gpu_timeout_args *args)
-{
-	bool status;
-	struct gk20a *g = dbg_s->g;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	status = nvgpu_is_timeouts_enabled(g);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-	if (status)
-		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
-	else
-		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
-}
-
-/* In order to perform a context relative op the context has
- * to be created already... which would imply that the
- * context switch mechanism has already been put in place.
- * So by the time we perform such an opertation it should always
- * be possible to query for the appropriate context offsets, etc.
- *
- * But note: while the dbg_gpu bind requires the a channel fd,
- * it doesn't require an allocated gr/compute obj at that point...
- */
-static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
-				      struct gr_gk20a *gr)
-{
-	int err;
-
-	nvgpu_mutex_acquire(&gr->ctx_mutex);
-	err = !gr->ctx_vars.golden_image_initialized;
-	nvgpu_mutex_release(&gr->ctx_mutex);
-	if (err)
-		return false;
-	return true;
-
-}
-
-static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
-{
-	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = mm->perfbuf.vm;
-	int err;
-
-	err = g->ops.dbg_session_ops.perfbuffer_disable(g);
-
-	nvgpu_vm_unmap(vm, offset, NULL);
-	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
-	nvgpu_vm_put(vm);
-
-	g->perfbuf.owner = NULL;
-	g->perfbuf.offset = 0;
-	return err;
-}
-
-static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
-								u32 profiler_handle)
-{
-	struct gk20a *g = dbg_s->g;
-	struct dbg_profiler_object_data *prof_obj;
-	int err = 0;
-
-	nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	/* Find matching object. */
-	prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
-
-	if (!prof_obj) {
-		nvgpu_err(g, "object not found");
-		err = -EINVAL;
-		goto exit;
-	}
-
-	if (prof_obj->has_reservation)
-		g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj);
-	else {
-		nvgpu_err(g, "No reservation found");
-		err = -EINVAL;
-		goto exit;
-	}
-exit:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return err;
-}
-
-static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
-								u32 profiler_handle)
-{
-	struct gk20a *g = dbg_s->g;
-	struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
-	int err = 0;
-
-	nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
-
-	if (g->profiler_reservation_count < 0) {
-		nvgpu_err(g, "Negative reservation count!");
-		return -EINVAL;
-	}
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	/* Find matching object. */
-	my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
-
-	if (!my_prof_obj) {
-		nvgpu_err(g, "object not found");
-		err = -EINVAL;
-		goto exit;
-	}
-
-	/* If we already have the reservation, we're done */
-	if (my_prof_obj->has_reservation) {
-		err = 0;
-		goto exit;
-	}
-
-	if (my_prof_obj->ch == NULL) {
-		/* Global reservations are only allowed if there are no other
-		 * global or per-context reservations currently held
-		 */
-		if (!g->ops.dbg_session_ops.check_and_set_global_reservation(
-							dbg_s, my_prof_obj)) {
-			nvgpu_err(g,
-				"global reserve: have existing reservation");
-			err =  -EBUSY;
-		}
-	} else if (g->global_profiler_reservation_held) {
-		/* If there's a global reservation,
-		 * we can't take a per-context one.
-		 */
-		nvgpu_err(g,
-			"per-ctxt reserve: global reservation in effect");
-		err = -EBUSY;
-	} else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
-		/* TSG: check that another channel in the TSG
-		 * doesn't already have the reservation
-		 */
-		int my_tsgid = my_prof_obj->ch->tsgid;
-
-		nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-			if (prof_obj->has_reservation &&
-					(prof_obj->ch->tsgid == my_tsgid)) {
-				nvgpu_err(g,
-				    "per-ctxt reserve (tsg): already reserved");
-				err = -EBUSY;
-				goto exit;
-			}
-		}
-
-		if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
-							dbg_s, my_prof_obj)) {
-			/* Another guest OS has the global reservation */
-			nvgpu_err(g,
-				"per-ctxt reserve: global reservation in effect");
-			err = -EBUSY;
-		}
-	} else {
-		/* channel: check that some other profiler object doesn't
-		 * already have the reservation.
-		 */
-		struct channel_gk20a *my_ch = my_prof_obj->ch;
-
-		nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
-				dbg_profiler_object_data, prof_obj_entry) {
-			if (prof_obj->has_reservation &&
-						(prof_obj->ch == my_ch)) {
-				nvgpu_err(g,
-				    "per-ctxt reserve (ch): already reserved");
-				err = -EBUSY;
-				goto exit;
-			}
-		}
-
-		if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
-							dbg_s, my_prof_obj)) {
-			/* Another guest OS has the global reservation */
-			nvgpu_err(g,
-				"per-ctxt reserve: global reservation in effect");
-			err = -EBUSY;
-		}
-	}
-exit:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-	return err;
-}
-
-static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
-			  struct nvgpu_dbg_gpu_unbind_channel_args *args)
-{
-	struct dbg_session_channel_data *ch_data;
-	struct gk20a *g = dbg_s->g;
-	bool channel_found = false;
-	struct channel_gk20a *ch;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
-		   g->name, args->channel_fd);
-
-	ch = gk20a_get_channel_from_file(args->channel_fd);
-	if (!ch) {
-		nvgpu_log_fn(g, "no channel found for fd");
-		return -EINVAL;
-	}
-
-	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
-	nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
-				dbg_session_channel_data, ch_entry) {
-		if (ch->chid == ch_data->chid) {
-			channel_found = true;
-			break;
-		}
-	}
-	nvgpu_mutex_release(&dbg_s->ch_list_lock);
-
-	if (!channel_found) {
-		nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd);
-		err = -EINVAL;
-		goto out;
-	}
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
-	err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
-	nvgpu_mutex_release(&dbg_s->ch_list_lock);
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-
-out:
-	gk20a_channel_put(ch);
-	return err;
-}
-
-int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
-				 struct nvgpu_os_linux, dbg.cdev);
-	struct gk20a *g = &l->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-	return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
-}
-
-long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
-			     unsigned long arg)
-{
-	struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data;
-	struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
-	struct gk20a *g = dbg_s->g;
-	u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-
-	if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) ||
-	    (_IOC_NR(cmd) == 0) ||
-	    (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) ||
-	    (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	if (!g->sw_ready) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_idle(g);
-	}
-
-	/* protect from threaded user space calls */
-	nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
-
-	switch (cmd) {
-	case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
-		err = dbg_bind_channel_gk20a(dbg_s,
-			     (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_REG_OPS:
-		err = nvgpu_ioctl_channel_reg_ops(dbg_s,
-			   (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_POWERGATE:
-		err = nvgpu_ioctl_powergate_gk20a(dbg_s,
-			   (struct nvgpu_dbg_gpu_powergate_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
-		err = gk20a_dbg_gpu_events_ctrl(dbg_s,
-			   (struct nvgpu_dbg_gpu_events_ctrl_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
-		err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
-			   (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
-		err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
-			   (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
-		err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
-		       (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
-		err = gk20a_perfbuf_map(dbg_s,
-		       (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
-		err = gk20a_perfbuf_unmap(dbg_s,
-		       (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
-		err = gk20a_dbg_pc_sampling(dbg_s,
-			   (struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE:
-		err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s,
-		       (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_TIMEOUT:
-		err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s,
-			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT:
-		nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s,
-			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
-		err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
-		   (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE:
-		err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s,
-		  (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE:
-		err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s,
-		  (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL:
-		err = dbg_unbind_channel_gk20a(dbg_s,
-			     (struct nvgpu_dbg_gpu_unbind_channel_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS:
-		err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s,
-		      (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY:
-		err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s,
-			(struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
-		err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux,
-			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
-		err = nvgpu_ioctl_free_profiler_object(dbg_s_linux,
-			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
-		err = nvgpu_ioctl_profiler_reserve(dbg_s,
-			   (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
-		break;
-
-	default:
-		nvgpu_err(g,
-			   "unrecognized dbg gpu ioctl cmd: 0x%x",
-			   cmd);
-		err = -ENOTTY;
-		break;
-	}
-
-	nvgpu_mutex_release(&dbg_s->ioctl_lock);
-
-	nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *)arg,
-				   buf, _IOC_SIZE(cmd));
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h
deleted file mode 100644
index bd76045b..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Tegra GK20A GPU Debugger Driver
- *
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef DBG_GPU_IOCTL_GK20A_H
-#define DBG_GPU_IOCTL_GK20A_H
-#include <linux/poll.h>
-
-#include "gk20a/dbg_gpu_gk20a.h"
-
-/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
- * of regops */
-#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
-
-struct dbg_session_gk20a_linux {
-	struct device	*dev;
-	struct dbg_session_gk20a dbg_s;
-};
-
-struct dbg_session_channel_data_linux {
-	/*
-	 * We have to keep a ref to the _file_, not the channel, because
-	 * close(channel_fd) is synchronous and would deadlock if we had an
-	 * open debug session fd holding a channel ref at that time. Holding a
-	 * ref to the file makes close(channel_fd) just drop a kernel ref to
-	 * the file; the channel will close when the last file ref is dropped.
-	 */
-	struct file *ch_f;
-	struct dbg_session_channel_data ch_data;
-};
-
-/* module debug driver interface */
-int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
-int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
-long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
-
-/* used by profiler driver interface */
-int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
-
-#endif
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
deleted file mode 100644
index 4ef99ded..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/cdev.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <uapi/linux/nvgpu.h>
-#include <linux/anon_inodes.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/os_sched.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/tsg_gk20a.h"
-#include "gv11b/fifo_gv11b.h"
-#include "platform_gk20a.h"
-#include "ioctl_tsg.h"
-#include "ioctl_channel.h"
-#include "os_linux.h"
-
-struct tsg_private {
-	struct gk20a *g;
-	struct tsg_gk20a *tsg;
-};
-
-static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
-{
-	struct channel_gk20a *ch;
-	int err;
-
-	ch = gk20a_get_channel_from_file(ch_fd);
-	if (!ch)
-		return -EINVAL;
-
-	err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
-
-	gk20a_channel_put(ch);
-	return err;
-}
-
-static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
-	struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	struct channel_gk20a *ch;
-	struct gr_gk20a *gr = &g->gr;
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-
-	nvgpu_mutex_acquire(&sched->control_lock);
-	if (sched->control_locked) {
-		err = -EPERM;
-		goto mutex_release;
-	}
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu");
-		goto mutex_release;
-	}
-
-	ch = gk20a_get_channel_from_file(arg->channel_fd);
-	if (!ch) {
-		err = -EINVAL;
-		goto idle;
-	}
-
-	if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
-		if ((arg->num_active_tpcs > gr->max_tpc_count) ||
-				!(arg->num_active_tpcs)) {
-			nvgpu_err(g, "Invalid num of active TPCs");
-			err = -EINVAL;
-			goto ch_put;
-		}
-		tsg->tpc_num_initialized = true;
-		tsg->num_active_tpcs = arg->num_active_tpcs;
-		tsg->tpc_pg_enabled = true;
-	} else {
-		tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
-	}
-
-	if (arg->subcontext_id < g->fifo.max_subctx_count) {
-		ch->subctx_id = arg->subcontext_id;
-	} else {
-		err = -EINVAL;
-		goto ch_put;
-	}
-
-	nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
-				ch->chid, ch->subctx_id);
-
-	/* Use runqueue selector 1 for all ASYNC ids */
-	if (ch->subctx_id > CHANNEL_INFO_VEID0)
-		ch->runqueue_sel = 1;
-
-	err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
-ch_put:
-	gk20a_channel_put(ch);
-idle:
-	gk20a_idle(g);
-mutex_release:
-	nvgpu_mutex_release(&sched->control_lock);
-	return err;
-}
-
-static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
-{
-	struct channel_gk20a *ch;
-	int err = 0;
-
-	ch = gk20a_get_channel_from_file(ch_fd);
-	if (!ch)
-		return -EINVAL;
-
-	if (ch->tsgid != tsg->tsgid) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = gk20a_tsg_unbind_channel(ch);
-
-	/*
-	 * Mark the channel timedout since channel unbound from TSG
-	 * has no context of its own so it can't serve any job
-	 */
-	ch->has_timedout = true;
-
-out:
-	gk20a_channel_put(ch);
-	return err;
-}
-
-static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
-				unsigned int event_id,
-				struct gk20a_event_id_data **event_id_data)
-{
-	struct gk20a_event_id_data *local_event_id_data;
-	bool event_found = false;
-
-	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
-	nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
-					gk20a_event_id_data, event_id_node) {
-		if (local_event_id_data->event_id == event_id) {
-			event_found = true;
-			break;
-		}
-	}
-	nvgpu_mutex_release(&tsg->event_id_list_lock);
-
-	if (event_found) {
-		*event_id_data = local_event_id_data;
-		return 0;
-	} else {
-		return -1;
-	}
-}
-
-/*
- * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
- * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
- */
-static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
-{
-	switch (event_id) {
-	case NVGPU_EVENT_ID_BPT_INT:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
-	case NVGPU_EVENT_ID_BPT_PAUSE:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
-	case NVGPU_EVENT_ID_BLOCKING_SYNC:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
-	case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
-	case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
-	case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
-		return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
-	}
-
-	return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
-}
-
-void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
-				       int __event_id)
-{
-	struct gk20a_event_id_data *event_id_data;
-	u32 event_id;
-	int err = 0;
-	struct gk20a *g = tsg->g;
-
-	event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
-	if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
-		return;
-
-	err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
-						&event_id_data);
-	if (err)
-		return;
-
-	nvgpu_mutex_acquire(&event_id_data->lock);
-
-	nvgpu_log_info(g,
-		"posting event for event_id=%d on tsg=%d\n",
-		event_id, tsg->tsgid);
-	event_id_data->event_posted = true;
-
-	nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
-
-	nvgpu_mutex_release(&event_id_data->lock);
-}
-
-static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
-{
-	unsigned int mask = 0;
-	struct gk20a_event_id_data *event_id_data = filep->private_data;
-	struct gk20a *g = event_id_data->g;
-	u32 event_id = event_id_data->event_id;
-	struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
-
-	poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
-
-	nvgpu_mutex_acquire(&event_id_data->lock);
-
-	if (event_id_data->event_posted) {
-		nvgpu_log_info(g,
-			"found pending event_id=%d on TSG=%d\n",
-			event_id, tsg->tsgid);
-		mask = (POLLPRI | POLLIN);
-		event_id_data->event_posted = false;
-	}
-
-	nvgpu_mutex_release(&event_id_data->lock);
-
-	return mask;
-}
-
-static int gk20a_event_id_release(struct inode *inode, struct file *filp)
-{
-	struct gk20a_event_id_data *event_id_data = filp->private_data;
-	struct gk20a *g = event_id_data->g;
-	struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
-
-	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
-	nvgpu_list_del(&event_id_data->event_id_node);
-	nvgpu_mutex_release(&tsg->event_id_list_lock);
-
-	nvgpu_mutex_destroy(&event_id_data->lock);
-	gk20a_put(g);
-	nvgpu_kfree(g, event_id_data);
-	filp->private_data = NULL;
-
-	return 0;
-}
-
-const struct file_operations gk20a_event_id_ops = {
-	.owner = THIS_MODULE,
-	.poll = gk20a_event_id_poll,
-	.release = gk20a_event_id_release,
-};
-
-static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
-					 int event_id,
-					 int *fd)
-{
-	int err = 0;
-	int local_fd;
-	struct file *file;
-	char name[64];
-	struct gk20a_event_id_data *event_id_data;
-	struct gk20a *g;
-
-	g = gk20a_get(tsg->g);
-	if (!g)
-		return -ENODEV;
-
-	err = gk20a_tsg_get_event_data_from_id(tsg,
-				event_id, &event_id_data);
-	if (err == 0) {
-		/* We already have event enabled */
-		err = -EINVAL;
-		goto free_ref;
-	}
-
-	err = get_unused_fd_flags(O_RDWR);
-	if (err < 0)
-		goto free_ref;
-	local_fd = err;
-
-	snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
-		 event_id, local_fd);
-
-	file = anon_inode_getfile(name, &gk20a_event_id_ops,
-				  NULL, O_RDWR);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto clean_up;
-	}
-
-	event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
-	if (!event_id_data) {
-		err = -ENOMEM;
-		goto clean_up_file;
-	}
-	event_id_data->g = g;
-	event_id_data->id = tsg->tsgid;
-	event_id_data->event_id = event_id;
-
-	nvgpu_cond_init(&event_id_data->event_id_wq);
-	err = nvgpu_mutex_init(&event_id_data->lock);
-	if (err)
-		goto clean_up_free;
-
-	nvgpu_init_list_node(&event_id_data->event_id_node);
-
-	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
-	nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
-	nvgpu_mutex_release(&tsg->event_id_list_lock);
-
-	fd_install(local_fd, file);
-	file->private_data = event_id_data;
-
-	*fd = local_fd;
-
-	return 0;
-
-clean_up_free:
-	nvgpu_kfree(g, event_id_data);
-clean_up_file:
-	fput(file);
-clean_up:
-	put_unused_fd(local_fd);
-free_ref:
-	gk20a_put(g);
-	return err;
-}
-
-static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
-		struct nvgpu_event_id_ctrl_args *args)
-{
-	int err = 0;
-	int fd = -1;
-
-	if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
-		return -EINVAL;
-
-	switch (args->cmd) {
-	case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
-		err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
-		if (!err)
-			args->event_fd = fd;
-		break;
-
-	default:
-		nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
-			   args->cmd);
-		err = -EINVAL;
-		break;
-	}
-
-	return err;
-}
-
-int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
-{
-	struct tsg_private *priv;
-	struct tsg_gk20a *tsg;
-	struct device *dev;
-	int err;
-
-	g = gk20a_get(g);
-	if (!g)
-		return -ENODEV;
-
-	dev  = dev_from_gk20a(g);
-
-	nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
-
-	priv = nvgpu_kmalloc(g, sizeof(*priv));
-	if (!priv) {
-		err = -ENOMEM;
-		goto free_ref;
-	}
-
-	tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
-	if (!tsg) {
-		nvgpu_kfree(g, priv);
-		err = -ENOMEM;
-		goto free_ref;
-	}
-
-	priv->g = g;
-	priv->tsg = tsg;
-	filp->private_data = priv;
-
-	gk20a_sched_ctrl_tsg_added(g, tsg);
-
-	return 0;
-
-free_ref:
-	gk20a_put(g);
-	return err;
-}
-
-int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l;
-	struct gk20a *g;
-	int ret;
-
-	l = container_of(inode->i_cdev,
-			 struct nvgpu_os_linux, tsg.cdev);
-	g = &l->g;
-
-	nvgpu_log_fn(g, " ");
-
-	ret = gk20a_busy(g);
-	if (ret) {
-		nvgpu_err(g, "failed to power on, %d", ret);
-		return ret;
-	}
-
-	ret = nvgpu_ioctl_tsg_open(&l->g, filp);
-
-	gk20a_idle(g);
-	nvgpu_log_fn(g, "done");
-	return ret;
-}
-
-void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
-{
-	struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
-	struct gk20a *g = tsg->g;
-
-	gk20a_sched_ctrl_tsg_removed(g, tsg);
-
-	gk20a_tsg_release(ref);
-	gk20a_put(g);
-}
-
-int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
-{
-	struct tsg_private *priv = filp->private_data;
-	struct tsg_gk20a *tsg = priv->tsg;
-
-	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-	nvgpu_kfree(tsg->g, priv);
-	return 0;
-}
-
-static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
-	struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	u32 level = arg->level;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-
-	nvgpu_mutex_acquire(&sched->control_lock);
-	if (sched->control_locked) {
-		err = -EPERM;
-		goto done;
-	}
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu");
-		goto done;
-	}
-
-	level = nvgpu_get_common_runlist_level(level);
-	err = gk20a_tsg_set_runlist_interleave(tsg, level);
-
-	gk20a_idle(g);
-done:
-	nvgpu_mutex_release(&sched->control_lock);
-	return err;
-}
-
-static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
-	struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-
-	nvgpu_mutex_acquire(&sched->control_lock);
-	if (sched->control_locked) {
-		err = -EPERM;
-		goto done;
-	}
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu");
-		goto done;
-	}
-	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
-	gk20a_idle(g);
-done:
-	nvgpu_mutex_release(&sched->control_lock);
-	return err;
-}
-
-static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
-	struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
-{
-	arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
-	return 0;
-}
-
-long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
-			     unsigned long arg)
-{
-	struct tsg_private *priv = filp->private_data;
-	struct tsg_gk20a *tsg = priv->tsg;
-	struct gk20a *g = tsg->g;
-	u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
-	int err = 0;
-
-	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
-	    (_IOC_NR(cmd) == 0) ||
-	    (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
-	    (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	if (!g->sw_ready) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_idle(g);
-	}
-
-	switch (cmd) {
-	case NVGPU_TSG_IOCTL_BIND_CHANNEL:
-		{
-		int ch_fd = *(int *)buf;
-		if (ch_fd < 0) {
-			err = -EINVAL;
-			break;
-		}
-		err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
-		break;
-		}
-
-	case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
-	{
-		err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
-			(struct nvgpu_tsg_bind_channel_ex_args *)buf);
-		break;
-	}
-
-	case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
-		{
-		int ch_fd = *(int *)buf;
-
-		if (ch_fd < 0) {
-			err = -EINVAL;
-			break;
-		}
-		err = gk20a_busy(g);
-		if (err) {
-			nvgpu_err(g,
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			break;
-		}
-		err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
-		gk20a_idle(g);
-		break;
-		}
-
-	case NVGPU_IOCTL_TSG_ENABLE:
-		{
-		err = gk20a_busy(g);
-		if (err) {
-			nvgpu_err(g,
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			return err;
-		}
-		g->ops.fifo.enable_tsg(tsg);
-		gk20a_idle(g);
-		break;
-		}
-
-	case NVGPU_IOCTL_TSG_DISABLE:
-		{
-		err = gk20a_busy(g);
-		if (err) {
-			nvgpu_err(g,
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			return err;
-		}
-		g->ops.fifo.disable_tsg(tsg);
-		gk20a_idle(g);
-		break;
-		}
-
-	case NVGPU_IOCTL_TSG_PREEMPT:
-		{
-		err = gk20a_busy(g);
-		if (err) {
-			nvgpu_err(g,
-			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
-			return err;
-		}
-		/* preempt TSG */
-		err = g->ops.fifo.preempt_tsg(g, tsg->tsgid);
-		gk20a_idle(g);
-		break;
-		}
-
-	case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
-		{
-		err = gk20a_tsg_event_id_ctrl(g, tsg,
-			(struct nvgpu_event_id_ctrl_args *)buf);
-		break;
-		}
-
-	case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
-		err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
-			(struct nvgpu_runlist_interleave_args *)buf);
-		break;
-
-	case NVGPU_IOCTL_TSG_SET_TIMESLICE:
-		{
-		err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
-			(struct nvgpu_timeslice_args *)buf);
-		break;
-		}
-	case NVGPU_IOCTL_TSG_GET_TIMESLICE:
-		{
-		err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
-			(struct nvgpu_timeslice_args *)buf);
-		break;
-		}
-
-	default:
-		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
-			   cmd);
-		err = -ENOTTY;
-		break;
-	}
-
-	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
-		err = copy_to_user((void __user *)arg,
-				   buf, _IOC_SIZE(cmd));
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h
deleted file mode 100644
index 67399fd4..00000000
--- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef NVGPU_IOCTL_TSG_H
-#define NVGPU_IOCTL_TSG_H
-
-struct inode;
-struct file;
-struct gk20a;
-struct nvgpu_ref;
-
-int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
-int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
-int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
-long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
-			       unsigned int cmd, unsigned long arg);
-void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
deleted file mode 100644
index 10946a08..00000000
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
-#include <linux/stacktrace.h>
-
-#include <nvgpu/lock.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/bug.h>
-
-#include "gk20a/gk20a.h"
-
-#include "kmem_priv.h"
-
-/*
- * Statically declared because this needs to be shared across all nvgpu driver
- * instances. This makes sure that all kmem caches are _definitely_ uniquely
- * named.
- */
-static atomic_t kmem_cache_id;
-
-void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
-{
-	void *p;
-
-	if (size > PAGE_SIZE) {
-		if (clear)
-			p = nvgpu_vzalloc(g, size);
-		else
-			p = nvgpu_vmalloc(g, size);
-	} else {
-		if (clear)
-			p = nvgpu_kzalloc(g, size);
-		else
-			p = nvgpu_kmalloc(g, size);
-	}
-
-	return p;
-}
-
-void nvgpu_big_free(struct gk20a *g, void *p)
-{
-	/*
-	 * This will have to be fixed eventually. Allocs that use
-	 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
-	 * when freeing.
-	 */
-	if (is_vmalloc_addr(p))
-		nvgpu_vfree(g, p);
-	else
-		nvgpu_kfree(g, p);
-}
-
-void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
-{
-	void *alloc;
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	alloc = __nvgpu_track_kmalloc(g, size, ip);
-#else
-	alloc = kmalloc(size, GFP_KERNEL);
-#endif
-
-	kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
-		 size, alloc, GFP_KERNEL);
-
-	return alloc;
-}
-
-void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
-{
-	void *alloc;
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	alloc = __nvgpu_track_kzalloc(g, size, ip);
-#else
-	alloc = kzalloc(size, GFP_KERNEL);
-#endif
-
-	kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
-		 size, alloc, GFP_KERNEL);
-
-	return alloc;
-}
-
-void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
-{
-	void *alloc;
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	alloc = __nvgpu_track_kcalloc(g, n, size, ip);
-#else
-	alloc = kcalloc(n, size, GFP_KERNEL);
-#endif
-
-	kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
-		 n * size, alloc, GFP_KERNEL);
-
-	return alloc;
-}
-
-void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
-{
-	void *alloc;
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	alloc = __nvgpu_track_vmalloc(g, size, ip);
-#else
-	alloc = vmalloc(size);
-#endif
-
-	kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
-
-	return alloc;
-}
-
-void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
-{
-	void *alloc;
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	alloc = __nvgpu_track_vzalloc(g, size, ip);
-#else
-	alloc = vzalloc(size);
-#endif
-
-	kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
-
-	return alloc;
-}
-
-void __nvgpu_kfree(struct gk20a *g, void *addr)
-{
-	kmem_dbg(g, "kfree: addr=0x%p", addr);
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	__nvgpu_track_kfree(g, addr);
-#else
-	kfree(addr);
-#endif
-}
-
-void __nvgpu_vfree(struct gk20a *g, void *addr)
-{
-	kmem_dbg(g, "vfree: addr=0x%p", addr);
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	__nvgpu_track_vfree(g, addr);
-#else
-	vfree(addr);
-#endif
-}
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-
-void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
-{
-	nvgpu_mutex_acquire(&tracker->lock);
-}
-
-void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
-{
-	nvgpu_mutex_release(&tracker->lock);
-}
-
-void kmem_print_mem_alloc(struct gk20a *g,
-			 struct nvgpu_mem_alloc *alloc,
-			 struct seq_file *s)
-{
-#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
-	int i;
-
-	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
-		alloc->addr, alloc->size);
-	for (i = 0; i < alloc->stack_length; i++)
-		__pstat(s, "  %3d [<%p>] %pS\n", i,
-			(void *)alloc->stack[i],
-			(void *)alloc->stack[i]);
-	__pstat(s, "\n");
-#else
-	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
-		alloc->addr, alloc->size, alloc->ip);
-#endif
-}
-
-static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
-			   struct nvgpu_mem_alloc *alloc)
-{
-	alloc->allocs_entry.key_start = alloc->addr;
-	alloc->allocs_entry.key_end = alloc->addr + alloc->size;
-
-	nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
-	return 0;
-}
-
-static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
-	struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
-{
-	struct nvgpu_mem_alloc *alloc;
-	struct nvgpu_rbtree_node *node = NULL;
-
-	nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
-	if (!node)
-		return NULL;
-
-	alloc = nvgpu_mem_alloc_from_rbtree_node(node);
-
-	nvgpu_rbtree_unlink(node, &tracker->allocs);
-
-	return alloc;
-}
-
-static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
-				   unsigned long size, unsigned long real_size,
-				   u64 addr, unsigned long ip)
-{
-	int ret;
-	struct nvgpu_mem_alloc *alloc;
-#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
-	struct stack_trace stack_trace;
-#endif
-
-	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
-	if (!alloc)
-		return -ENOMEM;
-
-	alloc->owner = tracker;
-	alloc->size = size;
-	alloc->real_size = real_size;
-	alloc->addr = addr;
-	alloc->ip = (void *)(uintptr_t)ip;
-
-#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
-	stack_trace.max_entries = MAX_STACK_TRACE;
-	stack_trace.nr_entries = 0;
-	stack_trace.entries = alloc->stack;
-	/*
-	 * This 4 here skips the 2 function calls that happen for all traced
-	 * allocs due to nvgpu:
-	 *
-	 *   __nvgpu_save_kmem_alloc+0x7c/0x128
-	 *   __nvgpu_track_kzalloc+0xcc/0xf8
-	 *
-	 * And the function calls that get made by the stack trace code itself.
-	 * If the trace savings code changes this will likely have to change
-	 * as well.
-	 */
-	stack_trace.skip = 4;
-	save_stack_trace(&stack_trace);
-	alloc->stack_length = stack_trace.nr_entries;
-#endif
-
-	nvgpu_lock_tracker(tracker);
-	tracker->bytes_alloced += size;
-	tracker->bytes_alloced_real += real_size;
-	tracker->nr_allocs++;
-
-	/* Keep track of this for building a histogram later on. */
-	if (tracker->max_alloc < size)
-		tracker->max_alloc = size;
-	if (tracker->min_alloc > size)
-		tracker->min_alloc = size;
-
-	ret = nvgpu_add_alloc(tracker, alloc);
-	if (ret) {
-		WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
-		kfree(alloc);
-		nvgpu_unlock_tracker(tracker);
-		return ret;
-	}
-	nvgpu_unlock_tracker(tracker);
-
-	return 0;
-}
-
-static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
-				   u64 addr)
-{
-	struct nvgpu_mem_alloc *alloc;
-
-	nvgpu_lock_tracker(tracker);
-	alloc = nvgpu_rem_alloc(tracker, addr);
-	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
-		nvgpu_unlock_tracker(tracker);
-		return -EINVAL;
-	}
-
-	memset((void *)alloc->addr, 0, alloc->size);
-
-	tracker->nr_frees++;
-	tracker->bytes_freed += alloc->size;
-	tracker->bytes_freed_real += alloc->real_size;
-	nvgpu_unlock_tracker(tracker);
-
-	return 0;
-}
-
-static void __nvgpu_check_valloc_size(unsigned long size)
-{
-	WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
-}
-
-static void __nvgpu_check_kalloc_size(size_t size)
-{
-	WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
-}
-
-void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
-			    unsigned long ip)
-{
-	void *alloc = vmalloc(size);
-
-	if (!alloc)
-		return NULL;
-
-	__nvgpu_check_valloc_size(size);
-
-	/*
-	 * Ignore the return message. If this fails let's not cause any issues
-	 * for the rest of the driver.
-	 */
-	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
-				(u64)(uintptr_t)alloc, ip);
-
-	return alloc;
-}
-
-void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
-			    unsigned long ip)
-{
-	void *alloc = vzalloc(size);
-
-	if (!alloc)
-		return NULL;
-
-	__nvgpu_check_valloc_size(size);
-
-	/*
-	 * Ignore the return message. If this fails let's not cause any issues
-	 * for the rest of the driver.
-	 */
-	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
-				(u64)(uintptr_t)alloc, ip);
-
-	return alloc;
-}
-
-void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
-{
-	void *alloc = kmalloc(size, GFP_KERNEL);
-
-	if (!alloc)
-		return NULL;
-
-	__nvgpu_check_kalloc_size(size);
-
-	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
-				(u64)(uintptr_t)alloc, ip);
-
-	return alloc;
-}
-
-void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
-{
-	void *alloc = kzalloc(size, GFP_KERNEL);
-
-	if (!alloc)
-		return NULL;
-
-	__nvgpu_check_kalloc_size(size);
-
-	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
-				(u64)(uintptr_t)alloc, ip);
-
-	return alloc;
-}
-
-void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
-			    unsigned long ip)
-{
-	void *alloc = kcalloc(n, size, GFP_KERNEL);
-
-	if (!alloc)
-		return NULL;
-
-	__nvgpu_check_kalloc_size(n * size);
-
-	__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
-				roundup_pow_of_two(n * size),
-				(u64)(uintptr_t)alloc, ip);
-
-	return alloc;
-}
-
-void __nvgpu_track_vfree(struct gk20a *g, void *addr)
-{
-	/*
-	 * Often it is accepted practice to pass NULL pointers into free
-	 * functions to save code.
-	 */
-	if (!addr)
-		return;
-
-	__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
-
-	vfree(addr);
-}
-
-void __nvgpu_track_kfree(struct gk20a *g, void *addr)
-{
-	if (!addr)
-		return;
-
-	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
-
-	kfree(addr);
-}
-
-static int __do_check_for_outstanding_allocs(
-	struct gk20a *g,
-	struct nvgpu_mem_alloc_tracker *tracker,
-	const char *type, bool silent)
-{
-	struct nvgpu_rbtree_node *node;
-	int count = 0;
-
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		if (!silent)
-			kmem_print_mem_alloc(g, alloc, NULL);
-
-		count++;
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	return count;
-}
-
-/**
- * check_for_outstanding_allocs - Count and display outstanding allocs
- *
- * @g      - The GPU.
- * @silent - If set don't print anything about the allocs.
- *
- * Dump (or just count) the number of allocations left outstanding.
- */
-static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
-{
-	int count = 0;
-
-	count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
-						   silent);
-	count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
-						   silent);
-
-	return count;
-}
-
-static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
-				  void (*force_free_func)(const void *))
-{
-	struct nvgpu_rbtree_node *node;
-
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		if (force_free_func)
-			force_free_func((void *)alloc->addr);
-
-		nvgpu_rbtree_unlink(node, &tracker->allocs);
-		kfree(alloc);
-
-		nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	}
-}
-
-/**
- * nvgpu_kmem_cleanup - Cleanup the kmem tracking
- *
- * @g          - The GPU.
- * @force_free - If set will also free leaked objects if possible.
- *
- * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
- * is non-zero then the allocation made by nvgpu is also freed. This is risky,
- * though, as it is possible that the memory is still in use by other parts of
- * the GPU driver not aware that this has happened.
- *
- * In theory it should be fine if the GPU driver has been deinitialized and
- * there are no bugs in that code. However, if there are any bugs in that code
- * then they could likely manifest as odd crashes indeterminate amounts of time
- * in the future. So use @force_free at your own risk.
- */
-static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
-{
-	do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
-	do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
-}
-
-void nvgpu_kmem_fini(struct gk20a *g, int flags)
-{
-	int count;
-	bool silent, force_free;
-
-	if (!flags)
-		return;
-
-	silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
-	force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
-
-	count = check_for_outstanding_allocs(g, silent);
-	nvgpu_kmem_cleanup(g, force_free);
-
-	/*
-	 * If we leak objects we can either BUG() out or just WARN(). In general
-	 * it doesn't make sense to BUG() on here since leaking a few objects
-	 * won't crash the kernel but it can be helpful for development.
-	 *
-	 * If neither flag is set then we just silently do nothing.
-	 */
-	if (count > 0) {
-		if (flags & NVGPU_KMEM_FINI_WARN) {
-			WARN(1, "Letting %d allocs leak!!\n", count);
-		} else if (flags & NVGPU_KMEM_FINI_BUG) {
-			nvgpu_err(g, "Letting %d allocs leak!!", count);
-			BUG();
-		}
-	}
-}
-
-int nvgpu_kmem_init(struct gk20a *g)
-{
-	int err;
-
-	g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
-	g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
-
-	if (!g->vmallocs || !g->kmallocs) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	g->vmallocs->name = "vmalloc";
-	g->kmallocs->name = "kmalloc";
-
-	g->vmallocs->allocs = NULL;
-	g->kmallocs->allocs = NULL;
-
-	nvgpu_mutex_init(&g->vmallocs->lock);
-	nvgpu_mutex_init(&g->kmallocs->lock);
-
-	g->vmallocs->min_alloc = PAGE_SIZE;
-	g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
-
-	/*
-	 * This needs to go after all the other initialization since they use
-	 * the nvgpu_kzalloc() API.
-	 */
-	g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
-						sizeof(struct nvgpu_mem_alloc));
-	g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
-						sizeof(struct nvgpu_mem_alloc));
-
-	if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
-		err = -ENOMEM;
-		if (g->vmallocs->allocs_cache)
-			nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
-		if (g->kmallocs->allocs_cache)
-			nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
-		goto fail;
-	}
-
-	return 0;
-
-fail:
-	if (g->vmallocs)
-		kfree(g->vmallocs);
-	if (g->kmallocs)
-		kfree(g->kmallocs);
-	return err;
-}
-
-#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
-
-int nvgpu_kmem_init(struct gk20a *g)
-{
-	return 0;
-}
-
-void nvgpu_kmem_fini(struct gk20a *g, int flags)
-{
-}
-#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
-
-struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
-{
-	struct nvgpu_kmem_cache *cache =
-		nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
-
-	if (!cache)
-		return NULL;
-
-	cache->g = g;
-
-	snprintf(cache->name, sizeof(cache->name),
-		 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
-		 atomic_inc_return(&kmem_cache_id));
-	cache->cache = kmem_cache_create(cache->name,
-					 size, size, 0, NULL);
-	if (!cache->cache) {
-		nvgpu_kfree(g, cache);
-		return NULL;
-	}
-
-	return cache;
-}
-
-void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
-{
-	struct gk20a *g = cache->g;
-
-	kmem_cache_destroy(cache->cache);
-	nvgpu_kfree(g, cache);
-}
-
-void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
-{
-	return kmem_cache_alloc(cache->cache, GFP_KERNEL);
-}
-
-void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
-{
-	kmem_cache_free(cache->cache, ptr);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
deleted file mode 100644
index a41762af..00000000
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __KMEM_PRIV_H__
-#define __KMEM_PRIV_H__
-
-#include <nvgpu/rbtree.h>
-#include <nvgpu/lock.h>
-
-struct seq_file;
-
-#define __pstat(s, fmt, msg...)				\
-	do {						\
-		if (s)					\
-			seq_printf(s, fmt, ##msg);	\
-		else					\
-			pr_info(fmt, ##msg);		\
-	} while (0)
-
-#define MAX_STACK_TRACE				20
-
-/*
- * Linux specific version of the nvgpu_kmem_cache struct. This type is
- * completely opaque to the rest of the driver.
- */
-struct nvgpu_kmem_cache {
-	struct gk20a *g;
-	struct kmem_cache *cache;
-
-	/*
-	 * Memory to hold the kmem_cache unique name. Only necessary on our
-	 * k3.10 kernel when not using the SLUB allocator but it's easier to
-	 * just carry this on to newer kernels.
-	 */
-	char name[128];
-};
-
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-
-struct nvgpu_mem_alloc {
-	struct nvgpu_mem_alloc_tracker *owner;
-
-	void *ip;
-#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
-	unsigned long stack[MAX_STACK_TRACE];
-	int stack_length;
-#endif
-
-	u64 addr;
-
-	unsigned long size;
-	unsigned long real_size;
-
-	struct nvgpu_rbtree_node allocs_entry;
-};
-
-static inline struct nvgpu_mem_alloc *
-nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
-{
-	return (struct nvgpu_mem_alloc *)
-	((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
-};
-
-/*
- * Linux specific tracking of vmalloc, kmalloc, etc.
- */
-struct nvgpu_mem_alloc_tracker {
-	const char *name;
-	struct nvgpu_kmem_cache *allocs_cache;
-	struct nvgpu_rbtree_node *allocs;
-	struct nvgpu_mutex lock;
-
-	u64 bytes_alloced;
-	u64 bytes_freed;
-	u64 bytes_alloced_real;
-	u64 bytes_freed_real;
-	u64 nr_allocs;
-	u64 nr_frees;
-
-	unsigned long min_alloc;
-	unsigned long max_alloc;
-};
-
-void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
-void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
-
-void kmem_print_mem_alloc(struct gk20a *g,
-			 struct nvgpu_mem_alloc *alloc,
-			 struct seq_file *s);
-#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
-
-#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/log.c b/drivers/gpu/nvgpu/common/linux/log.c
deleted file mode 100644
index ca29e0f3..00000000
--- a/drivers/gpu/nvgpu/common/linux/log.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-
-#include <nvgpu/log.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "os_linux.h"
-
-/*
- * Define a length for log buffers. This is the buffer that the 'fmt, ...' part
- * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
- * needs to not be overly sized since we have limited kernel stack space. But at
- * the same time we don't want it to be restrictive either.
- */
-#define LOG_BUFFER_LENGTH	160
-
-/*
- * Annoying quirk of Linux: this has to be a string literal since the printk()
- * function and friends use the preprocessor to concatenate stuff to the start
- * of this string when printing.
- */
-#define LOG_FMT			"nvgpu: %s %33s:%-4d [%s]  %s\n"
-
-static const char *log_types[] = {
-	"ERR",
-	"WRN",
-	"DBG",
-	"INFO",
-};
-
-int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
-{
-	return !!(g->log_mask & log_mask);
-}
-
-static inline const char *nvgpu_log_name(struct gk20a *g)
-{
-	return dev_name(dev_from_gk20a(g));
-}
-
-#ifdef CONFIG_GK20A_TRACE_PRINTK
-static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
-				     const char *func_name, int line,
-				     const char *log_type, const char *log)
-{
-	trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
-}
-#endif
-
-static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
-				     const char *func_name, int line,
-				     enum nvgpu_log_type type, const char *log)
-{
-	const char *name = gpu_name ? gpu_name : "";
-	const char *log_type = log_types[type];
-
-#ifdef CONFIG_GK20A_TRACE_PRINTK
-	if (trace)
-		return __nvgpu_trace_printk_log(trace, name, func_name,
-						line, log_type, log);
-#endif
-	switch (type) {
-	case NVGPU_DEBUG:
-		/*
-		 * We could use pr_debug() here but we control debug enablement
-		 * separately from the Linux kernel. Perhaps this is a bug in
-		 * nvgpu.
-		 */
-		pr_info(LOG_FMT, name, func_name, line, log_type, log);
-		break;
-	case NVGPU_INFO:
-		pr_info(LOG_FMT, name, func_name, line, log_type, log);
-		break;
-	case NVGPU_WARNING:
-		pr_warn(LOG_FMT, name, func_name, line, log_type, log);
-		break;
-	case NVGPU_ERROR:
-		pr_err(LOG_FMT, name, func_name, line, log_type, log);
-		break;
-	}
-}
-
-__attribute__((format (printf, 5, 6)))
-void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
-		     enum nvgpu_log_type type, const char *fmt, ...)
-{
-	char log[LOG_BUFFER_LENGTH];
-	va_list args;
-
-	va_start(args, fmt);
-	vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
-	va_end(args);
-
-	__nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
-				 func_name, line, type, log);
-}
-
-__attribute__((format (printf, 5, 6)))
-void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
-		     const char *func_name, int line,
-		     const char *fmt, ...)
-{
-	char log[LOG_BUFFER_LENGTH];
-	va_list args;
-
-	if ((log_mask & g->log_mask) == 0)
-		return;
-
-	va_start(args, fmt);
-	vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
-	va_end(args);
-
-	__nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
-				 func_name, line, NVGPU_DEBUG, log);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
deleted file mode 100644
index af71cc81..00000000
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ /dev/null
@@ -1,1365 +0,0 @@
-/*
- * GK20A Graphics
- *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
-#include <linux/reset.h>
-#include <linux/platform/tegra/common.h>
-#include <linux/pci.h>
-
-#include <uapi/linux/nvgpu.h>
-#include <dt-bindings/soc/gm20b-fuse.h>
-#include <dt-bindings/soc/gp10b-fuse.h>
-
-#include <soc/tegra/fuse.h>
-
-#include <nvgpu/dma.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/nvgpu_common.h>
-#include <nvgpu/soc.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/debug.h>
-#include <nvgpu/ctxsw_trace.h>
-#include <nvgpu/vidmem.h>
-#include <nvgpu/sim.h>
-
-#include "platform_gk20a.h"
-#include "sysfs.h"
-#include "vgpu/vgpu_linux.h"
-#include "scale.h"
-#include "pci.h"
-#include "module.h"
-#include "module_usermode.h"
-#include "intr.h"
-#include "ioctl.h"
-
-#include "os_linux.h"
-#include "ctxsw_trace.h"
-#include "driver_common.h"
-#include "channel.h"
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-#include "cde.h"
-#endif
-
-#define CLASS_NAME "nvidia-gpu"
-/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
-
-#define GK20A_WAIT_FOR_IDLE_MS	2000
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/gk20a.h>
-
-
-struct device_node *nvgpu_get_node(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-
-	if (dev_is_pci(dev)) {
-		struct pci_bus *bus = to_pci_dev(dev)->bus;
-
-		while (!pci_is_root_bus(bus))
-			bus = bus->parent;
-
-		return bus->bridge->parent->of_node;
-	}
-
-	return dev->of_node;
-}
-
-void gk20a_busy_noresume(struct gk20a *g)
-{
-	pm_runtime_get_noresume(dev_from_gk20a(g));
-}
-
-int gk20a_busy(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int ret = 0;
-	struct device *dev;
-
-	if (!g)
-		return -ENODEV;
-
-	atomic_inc(&g->usage_count.atomic_var);
-
-	down_read(&l->busy_lock);
-
-	if (!gk20a_can_busy(g)) {
-		ret = -ENODEV;
-		atomic_dec(&g->usage_count.atomic_var);
-		goto fail;
-	}
-
-	dev = dev_from_gk20a(g);
-
-	if (pm_runtime_enabled(dev)) {
-		/* Increment usage count and attempt to resume device */
-		ret = pm_runtime_get_sync(dev);
-		if (ret < 0) {
-			/* Mark suspended so runtime pm will retry later */
-			pm_runtime_set_suspended(dev);
-			pm_runtime_put_noidle(dev);
-			atomic_dec(&g->usage_count.atomic_var);
-			goto fail;
-		}
-	} else {
-		nvgpu_mutex_acquire(&g->poweron_lock);
-		if (!g->power_on) {
-			ret = gk20a_gpu_is_virtual(dev) ?
-				vgpu_pm_finalize_poweron(dev)
-				: gk20a_pm_finalize_poweron(dev);
-			if (ret) {
-				atomic_dec(&g->usage_count.atomic_var);
-				nvgpu_mutex_release(&g->poweron_lock);
-				goto fail;
-			}
-		}
-		nvgpu_mutex_release(&g->poweron_lock);
-	}
-
-fail:
-	up_read(&l->busy_lock);
-
-	return ret < 0 ? ret : 0;
-}
-
-void gk20a_idle_nosuspend(struct gk20a *g)
-{
-	pm_runtime_put_noidle(dev_from_gk20a(g));
-}
-
-void gk20a_idle(struct gk20a *g)
-{
-	struct device *dev;
-
-	atomic_dec(&g->usage_count.atomic_var);
-
-	dev = dev_from_gk20a(g);
-
-	if (!(dev && gk20a_can_busy(g)))
-		return;
-
-	if (pm_runtime_enabled(dev)) {
-		pm_runtime_mark_last_busy(dev);
-		pm_runtime_put_sync_autosuspend(dev);
-	}
-}
-
-/*
- * Undoes gk20a_lockout_registers().
- */
-static int gk20a_restore_registers(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->regs = l->regs_saved;
-	l->bar1 = l->bar1_saved;
-
-	nvgpu_restore_usermode_registers(g);
-
-	return 0;
-}
-
-static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l)
-{
-	int err = 0;
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	err = nvgpu_cde_init_ops(l);
-#endif
-
-	return err;
-}
-
-int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
-{
-	struct gk20a *g = &l->g;
-	int err;
-
-	if (l->init_done)
-		return 0;
-
-	err = nvgpu_init_channel_support_linux(l);
-	if (err) {
-		nvgpu_err(g, "failed to init linux channel support");
-		return err;
-	}
-
-	l->init_done = true;
-
-	return 0;
-}
-
-int gk20a_pm_finalize_poweron(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (g->power_on)
-		return 0;
-
-	trace_gk20a_finalize_poweron(dev_name(dev));
-
-	/* Increment platform power refcount */
-	if (platform->busy) {
-		err = platform->busy(dev);
-		if (err < 0) {
-			nvgpu_err(g, "failed to poweron platform dependency");
-			return err;
-		}
-	}
-
-	err = gk20a_restore_registers(g);
-	if (err)
-		return err;
-
-	/* Enable interrupt workqueue */
-	if (!l->nonstall_work_queue) {
-		l->nonstall_work_queue = alloc_workqueue("%s",
-						WQ_HIGHPRI, 1, "mc_nonstall");
-		INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb);
-	}
-
-	err = gk20a_detect_chip(g);
-	if (err)
-		return err;
-
-	if (g->sim) {
-		if (g->sim->sim_init_late)
-			g->sim->sim_init_late(g);
-	}
-
-	err = gk20a_finalize_poweron(g);
-	if (err)
-		goto done;
-
-	err = nvgpu_finalize_poweron_linux(l);
-	if (err)
-		goto done;
-
-	nvgpu_init_mm_ce_context(g);
-
-	nvgpu_vidmem_thread_unpause(&g->mm);
-
-	/* Initialise scaling: it will initialize scaling drive only once */
-	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) &&
-			nvgpu_platform_is_silicon(g)) {
-		gk20a_scale_init(dev);
-		if (platform->initscale)
-			platform->initscale(dev);
-	}
-
-	trace_gk20a_finalize_poweron_done(dev_name(dev));
-
-	err = nvgpu_init_os_linux_ops(l);
-	if (err)
-		goto done;
-
-	enable_irq(g->irq_stall);
-	if (g->irq_stall != g->irq_nonstall)
-		enable_irq(g->irq_nonstall);
-	g->irqs_enabled = 1;
-
-	gk20a_scale_resume(dev_from_gk20a(g));
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	if (platform->has_cde)
-		gk20a_init_cde_support(l);
-#endif
-
-	err = gk20a_sched_ctrl_init(g);
-	if (err) {
-		nvgpu_err(g, "failed to init sched control");
-		return err;
-	}
-
-	g->sw_ready = true;
-
-done:
-	if (err)
-		g->power_on = false;
-
-	return err;
-}
-
-/*
- * Locks out the driver from accessing GPU registers. This prevents access to
- * thse registers after the GPU has been clock or power gated. This should help
- * find annoying bugs where register reads and writes are silently dropped
- * after the GPU has been turned off. On older chips these reads and writes can
- * also lock the entire CPU up.
- */
-static int gk20a_lockout_registers(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->regs = NULL;
-	l->bar1 = NULL;
-
-	nvgpu_lockout_usermode_registers(g);
-
-	return 0;
-}
-
-static int gk20a_pm_prepare_poweroff(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-#endif
-	int ret = 0;
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	bool irqs_enabled;
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_mutex_acquire(&g->poweroff_lock);
-
-	if (!g->power_on)
-		goto done;
-
-	/* disable IRQs and wait for completion */
-	irqs_enabled = g->irqs_enabled;
-	if (irqs_enabled) {
-		disable_irq(g->irq_stall);
-		if (g->irq_stall != g->irq_nonstall)
-			disable_irq(g->irq_nonstall);
-		g->irqs_enabled = 0;
-	}
-
-	gk20a_scale_suspend(dev);
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	gk20a_cde_suspend(l);
-#endif
-
-	ret = gk20a_prepare_poweroff(g);
-	if (ret)
-		goto error;
-
-	/* Decrement platform power refcount */
-	if (platform->idle)
-		platform->idle(dev);
-
-	/* Stop CPU from accessing the GPU registers. */
-	gk20a_lockout_registers(g);
-
-	nvgpu_mutex_release(&g->poweroff_lock);
-	return 0;
-
-error:
-	/* re-enabled IRQs if previously enabled */
-	if (irqs_enabled) {
-		enable_irq(g->irq_stall);
-		if (g->irq_stall != g->irq_nonstall)
-			enable_irq(g->irq_nonstall);
-		g->irqs_enabled = 1;
-	}
-
-	gk20a_scale_resume(dev);
-done:
-	nvgpu_mutex_release(&g->poweroff_lock);
-
-	return ret;
-}
-
-static struct of_device_id tegra_gk20a_of_match[] = {
-#ifdef CONFIG_TEGRA_GK20A
-	{ .compatible = "nvidia,tegra210-gm20b",
-		.data = &gm20b_tegra_platform },
-	{ .compatible = "nvidia,tegra186-gp10b",
-		.data = &gp10b_tegra_platform },
-	{ .compatible = "nvidia,gv11b",
-		.data = &gv11b_tegra_platform },
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-	{ .compatible = "nvidia,gv11b-vgpu",
-		.data = &gv11b_vgpu_tegra_platform},
-#endif
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-	{ .compatible = "nvidia,tegra124-gk20a-vgpu",
-		.data = &vgpu_tegra_platform },
-#endif
-#endif
-
-	{ },
-};
-
-#ifdef CONFIG_PM
-/**
- * __gk20a_do_idle() - force the GPU to idle and railgate
- *
- * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
- *
- * Acquires two locks : &l->busy_lock and &platform->railgate_lock
- * In success, we hold these locks and return
- * In failure, we release these locks and return
- */
-int __gk20a_do_idle(struct gk20a *g, bool force_reset)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct nvgpu_timeout timeout;
-	int ref_cnt;
-	int target_ref_cnt = 0;
-	bool is_railgated;
-	int err = 0;
-
-	/*
-	 * Hold back deterministic submits and changes to deterministic
-	 * channels - this must be outside the power busy locks.
-	 */
-	gk20a_channel_deterministic_idle(g);
-
-	/* acquire busy lock to block other busy() calls */
-	down_write(&l->busy_lock);
-
-	/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
-	nvgpu_mutex_acquire(&platform->railgate_lock);
-
-	/* check if it is already railgated ? */
-	if (platform->is_railgated(dev))
-		return 0;
-
-	/*
-	 * release railgate_lock, prevent suspend by incrementing usage counter,
-	 * re-acquire railgate_lock
-	 */
-	nvgpu_mutex_release(&platform->railgate_lock);
-	pm_runtime_get_sync(dev);
-
-	/*
-	 * One refcount taken in this API
-	 * If User disables rail gating, we take one more
-	 * extra refcount
-	 */
-	if (g->can_railgate)
-		target_ref_cnt = 1;
-	else
-		target_ref_cnt = 2;
-	nvgpu_mutex_acquire(&platform->railgate_lock);
-
-	nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
-			   NVGPU_TIMER_CPU_TIMER);
-
-	/* check and wait until GPU is idle (with a timeout) */
-	do {
-		nvgpu_usleep_range(1000, 1100);
-		ref_cnt = atomic_read(&dev->power.usage_count);
-	} while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
-
-	if (ref_cnt != target_ref_cnt) {
-		nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
-			ref_cnt);
-		goto fail_drop_usage_count;
-	}
-
-	/* check if global force_reset flag is set */
-	force_reset |= platform->force_reset_in_do_idle;
-
-	nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
-			   NVGPU_TIMER_CPU_TIMER);
-
-	if (g->can_railgate && !force_reset) {
-		/*
-		 * Case 1 : GPU railgate is supported
-		 *
-		 * if GPU is now idle, we will have only one ref count,
-		 * drop this ref which will rail gate the GPU
-		 */
-		pm_runtime_put_sync(dev);
-
-		/* add sufficient delay to allow GPU to rail gate */
-		nvgpu_msleep(g->railgate_delay);
-
-		/* check in loop if GPU is railgated or not */
-		do {
-			nvgpu_usleep_range(1000, 1100);
-			is_railgated = platform->is_railgated(dev);
-		} while (!is_railgated && !nvgpu_timeout_expired(&timeout));
-
-		if (is_railgated) {
-			return 0;
-		} else {
-			nvgpu_err(g, "failed to idle in timeout");
-			goto fail_timeout;
-		}
-	} else {
-		/*
-		 * Case 2 : GPU railgate is not supported or we explicitly
-		 * do not want to depend on runtime PM
-		 *
-		 * if GPU is now idle, call prepare_poweroff() to save the
-		 * state and then do explicit railgate
-		 *
-		 * __gk20a_do_unidle() needs to unrailgate, call
-		 * finalize_poweron(), and then call pm_runtime_put_sync()
-		 * to balance the GPU usage counter
-		 */
-
-		/* Save the GPU state */
-		err = gk20a_pm_prepare_poweroff(dev);
-		if (err)
-			goto fail_drop_usage_count;
-
-		/* railgate GPU */
-		platform->railgate(dev);
-
-		nvgpu_udelay(10);
-
-		g->forced_reset = true;
-		return 0;
-	}
-
-fail_drop_usage_count:
-	pm_runtime_put_noidle(dev);
-fail_timeout:
-	nvgpu_mutex_release(&platform->railgate_lock);
-	up_write(&l->busy_lock);
-	gk20a_channel_deterministic_unidle(g);
-	return -EBUSY;
-}
-
-/**
- * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
- * from outside of GPU driver
- *
- * In success, this call MUST be balanced by caller with gk20a_do_unidle()
- */
-static int gk20a_do_idle(void *_g)
-{
-	struct gk20a *g = (struct gk20a *)_g;
-
-	return __gk20a_do_idle(g, true);
-}
-
-/**
- * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
- */
-int __gk20a_do_unidle(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int err;
-
-	if (g->forced_reset) {
-		/*
-		 * If we did a forced-reset/railgate
-		 * then unrailgate the GPU here first
-		 */
-		platform->unrailgate(dev);
-
-		/* restore the GPU state */
-		err = gk20a_pm_finalize_poweron(dev);
-		if (err)
-			return err;
-
-		/* balance GPU usage counter */
-		pm_runtime_put_sync(dev);
-
-		g->forced_reset = false;
-	}
-
-	/* release the lock and open up all other busy() calls */
-	nvgpu_mutex_release(&platform->railgate_lock);
-	up_write(&l->busy_lock);
-
-	gk20a_channel_deterministic_unidle(g);
-
-	return 0;
-}
-
-/**
- * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
- */
-static int gk20a_do_unidle(void *_g)
-{
-	struct gk20a *g = (struct gk20a *)_g;
-
-	return __gk20a_do_unidle(g);
-}
-#endif
-
-void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
-					    struct resource **out)
-{
-	struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
-
-	if (!r)
-		return NULL;
-	if (out)
-		*out = r;
-	return devm_ioremap_resource(&dev->dev, r);
-}
-
-static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-
-	return nvgpu_intr_stall(g);
-}
-
-static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-
-	return nvgpu_intr_nonstall(g);
-}
-
-static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-
-	return nvgpu_intr_thread_stall(g);
-}
-
-void gk20a_remove_support(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct sim_nvgpu_linux *sim_linux;
-
-	tegra_unregister_idle_unidle(gk20a_do_idle);
-
-	nvgpu_kfree(g, g->dbg_regops_tmp_buf);
-
-	nvgpu_remove_channel_support_linux(l);
-
-	if (g->pmu.remove_support)
-		g->pmu.remove_support(&g->pmu);
-
-	if (g->gr.remove_support)
-		g->gr.remove_support(&g->gr);
-
-	if (g->mm.remove_ce_support)
-		g->mm.remove_ce_support(&g->mm);
-
-	if (g->fifo.remove_support)
-		g->fifo.remove_support(&g->fifo);
-
-	if (g->mm.remove_support)
-		g->mm.remove_support(&g->mm);
-
-	if (g->sim) {
-		sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
-		if (g->sim->remove_support)
-			g->sim->remove_support(g);
-		if (sim_linux->remove_support_linux)
-			sim_linux->remove_support_linux(g);
-	}
-
-	/* free mappings to registers, etc */
-	if (l->regs) {
-		iounmap(l->regs);
-		l->regs = NULL;
-	}
-	if (l->bar1) {
-		iounmap(l->bar1);
-		l->bar1 = NULL;
-	}
-
-	nvgpu_remove_usermode_support(g);
-
-	nvgpu_free_enabled_flags(g);
-}
-
-static int gk20a_init_support(struct platform_device *dev)
-{
-	int err = -ENOMEM;
-	struct gk20a *g = get_gk20a(&dev->dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g);
-
-	l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
-					 &l->reg_mem);
-	if (IS_ERR(l->regs)) {
-		nvgpu_err(g, "failed to remap gk20a registers");
-		err = PTR_ERR(l->regs);
-		goto fail;
-	}
-
-	l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
-					 &l->bar1_mem);
-	if (IS_ERR(l->bar1)) {
-		nvgpu_err(g, "failed to remap gk20a bar1");
-		err = PTR_ERR(l->bar1);
-		goto fail;
-	}
-
-	err = nvgpu_init_sim_support_linux(g, dev);
-	if (err)
-		goto fail;
-	err = nvgpu_init_sim_support(g);
-	if (err)
-		goto fail_sim;
-
-	nvgpu_init_usermode_support(g);
-	return 0;
-
-fail_sim:
-	nvgpu_remove_sim_support_linux(g);
-fail:
-	if (l->regs) {
-		iounmap(l->regs);
-		l->regs = NULL;
-	}
-	if (l->bar1) {
-		iounmap(l->bar1);
-		l->bar1 = NULL;
-	}
-
-	return err;
-}
-
-static int gk20a_pm_railgate(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int ret = 0;
-	struct gk20a *g = get_gk20a(dev);
-
-	/* if platform is already railgated, then just return */
-	if (platform->is_railgated && platform->is_railgated(dev))
-		return ret;
-
-#ifdef CONFIG_DEBUG_FS
-	g->pstats.last_rail_gate_start = jiffies;
-
-	if (g->pstats.railgating_cycle_count >= 1)
-		g->pstats.total_rail_ungate_time_ms =
-			g->pstats.total_rail_ungate_time_ms +
-			jiffies_to_msecs(g->pstats.last_rail_gate_start -
-					g->pstats.last_rail_ungate_complete);
-#endif
-
-	if (platform->railgate)
-		ret = platform->railgate(dev);
-	if (ret) {
-		nvgpu_err(g, "failed to railgate platform, err=%d", ret);
-		return ret;
-	}
-
-#ifdef CONFIG_DEBUG_FS
-	g->pstats.last_rail_gate_complete = jiffies;
-#endif
-	ret = tegra_fuse_clock_disable();
-	if (ret)
-		nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret);
-
-	return ret;
-}
-
-static int gk20a_pm_unrailgate(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int ret = 0;
-	struct gk20a *g = get_gk20a(dev);
-
-	ret = tegra_fuse_clock_enable();
-	if (ret) {
-		nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret);
-		return ret;
-	}
-#ifdef CONFIG_DEBUG_FS
-	g->pstats.last_rail_ungate_start = jiffies;
-	if (g->pstats.railgating_cycle_count >= 1)
-		g->pstats.total_rail_gate_time_ms =
-			g->pstats.total_rail_gate_time_ms +
-			jiffies_to_msecs(g->pstats.last_rail_ungate_start -
-				g->pstats.last_rail_gate_complete);
-
-	g->pstats.railgating_cycle_count++;
-#endif
-
-	trace_gk20a_pm_unrailgate(dev_name(dev));
-
-	if (platform->unrailgate) {
-		nvgpu_mutex_acquire(&platform->railgate_lock);
-		ret = platform->unrailgate(dev);
-		nvgpu_mutex_release(&platform->railgate_lock);
-	}
-
-#ifdef CONFIG_DEBUG_FS
-	g->pstats.last_rail_ungate_complete = jiffies;
-#endif
-
-	return ret;
-}
-
-/*
- * Remove association of the driver with OS interrupt handler
- */
-void nvgpu_free_irq(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-
-	devm_free_irq(dev, g->irq_stall, g);
-	if (g->irq_stall != g->irq_nonstall)
-		devm_free_irq(dev, g->irq_nonstall, g);
-}
-
-/*
- * Idle the GPU in preparation of shutdown/remove.
- * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
- * state to prevent further activity on the driver SW side.
- * On driver removal quiesce() should be called after start_unload()
- */
-int nvgpu_quiesce(struct gk20a *g)
-{
-	int err;
-	struct device *dev = dev_from_gk20a(g);
-
-	if (g->power_on) {
-		err = gk20a_wait_for_idle(g);
-		if (err) {
-			nvgpu_err(g, "failed to idle GPU, err=%d", err);
-			return err;
-		}
-
-		err = gk20a_fifo_disable_all_engine_activity(g, true);
-		if (err) {
-			nvgpu_err(g,
-				"failed to disable engine activity, err=%d",
-				err);
-		return err;
-		}
-
-		err = gk20a_fifo_wait_engine_idle(g);
-		if (err) {
-			nvgpu_err(g, "failed to idle engines, err=%d",
-				err);
-			return err;
-		}
-	}
-
-	if (gk20a_gpu_is_virtual(dev))
-		err = vgpu_pm_prepare_poweroff(dev);
-	else
-		err = gk20a_pm_prepare_poweroff(dev);
-
-	if (err)
-		nvgpu_err(g, "failed to prepare for poweroff, err=%d",
-			err);
-
-	return err;
-}
-
-static void gk20a_pm_shutdown(struct platform_device *pdev)
-{
-	struct gk20a_platform *platform = platform_get_drvdata(pdev);
-	struct gk20a *g = platform->g;
-	int err;
-
-	nvgpu_info(g, "shutting down");
-
-	/* vgpu has nothing to clean up currently */
-	if (gk20a_gpu_is_virtual(&pdev->dev))
-		return;
-
-	if (!g->power_on)
-		goto finish;
-
-	gk20a_driver_start_unload(g);
-
-	/* If GPU is already railgated,
-	 * just prevent more requests, and return */
-	if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
-		__pm_runtime_disable(&pdev->dev, false);
-		nvgpu_info(g, "already railgated, shut down complete");
-		return;
-	}
-
-	/* Prevent more requests by disabling Runtime PM */
-	__pm_runtime_disable(&pdev->dev, false);
-
-	err = nvgpu_quiesce(g);
-	if (err)
-		goto finish;
-
-	err = gk20a_pm_railgate(&pdev->dev);
-	if (err)
-		nvgpu_err(g, "failed to railgate, err=%d", err);
-
-finish:
-	nvgpu_info(g, "shut down complete");
-}
-
-#ifdef CONFIG_PM
-static int gk20a_pm_runtime_resume(struct device *dev)
-{
-	int err = 0;
-
-	err = gk20a_pm_unrailgate(dev);
-	if (err)
-		goto fail;
-
-	if (gk20a_gpu_is_virtual(dev))
-		err = vgpu_pm_finalize_poweron(dev);
-	else
-		err = gk20a_pm_finalize_poweron(dev);
-	if (err)
-		goto fail_poweron;
-
-	return 0;
-
-fail_poweron:
-	gk20a_pm_railgate(dev);
-fail:
-	return err;
-}
-
-static int gk20a_pm_runtime_suspend(struct device *dev)
-{
-	int err = 0;
-	struct gk20a *g = get_gk20a(dev);
-
-	if (gk20a_gpu_is_virtual(dev))
-		err = vgpu_pm_prepare_poweroff(dev);
-	else
-		err = gk20a_pm_prepare_poweroff(dev);
-	if (err) {
-		nvgpu_err(g, "failed to power off, err=%d", err);
-		goto fail;
-	}
-
-	err = gk20a_pm_railgate(dev);
-	if (err)
-		goto fail;
-
-	return 0;
-
-fail:
-	gk20a_pm_finalize_poweron(dev);
-	pm_runtime_mark_last_busy(dev);
-	return err;
-}
-
-static int gk20a_pm_suspend(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-	int ret = 0;
-	int idle_usage_count = 0;
-
-	if (!g->power_on) {
-		if (!pm_runtime_enabled(dev))
-			gk20a_pm_railgate(dev);
-		return 0;
-	}
-
-	if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count)
-		return -EBUSY;
-
-	ret = gk20a_pm_runtime_suspend(dev);
-	if (ret)
-		return ret;
-
-	if (platform->suspend)
-		platform->suspend(dev);
-
-	g->suspended = true;
-
-	return 0;
-}
-
-static int gk20a_pm_resume(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int ret = 0;
-
-	if (!g->suspended) {
-		if (!pm_runtime_enabled(dev))
-			gk20a_pm_unrailgate(dev);
-		return 0;
-	}
-
-	ret = gk20a_pm_runtime_resume(dev);
-
-	g->suspended = false;
-
-	return ret;
-}
-
-static const struct dev_pm_ops gk20a_pm_ops = {
-	.runtime_resume = gk20a_pm_runtime_resume,
-	.runtime_suspend = gk20a_pm_runtime_suspend,
-	.resume = gk20a_pm_resume,
-	.suspend = gk20a_pm_suspend,
-};
-#endif
-
-static int gk20a_pm_init(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	/*
-	 * Initialise pm runtime. For railgate disable
-	 * case, set autosuspend delay to negative which
-	 * will suspend runtime pm
-	 */
-	if (g->railgate_delay && g->can_railgate)
-		pm_runtime_set_autosuspend_delay(dev,
-				 g->railgate_delay);
-	else
-		pm_runtime_set_autosuspend_delay(dev, -1);
-
-	pm_runtime_use_autosuspend(dev);
-	pm_runtime_enable(dev);
-
-	return err;
-}
-
-/*
- * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING.
- */
-void gk20a_driver_start_unload(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
-
-	down_write(&l->busy_lock);
-	__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
-	/* GR SW ready needs to be invalidated at this time with the busy lock
-	 * held to prevent a racing condition on the gr/mm code */
-	g->gr.sw_ready = false;
-	g->sw_ready = false;
-	up_write(&l->busy_lock);
-
-	if (g->is_virtual)
-		return;
-
-	gk20a_wait_for_idle(g);
-
-	nvgpu_wait_for_deferred_interrupts(g);
-
-	if (l->nonstall_work_queue) {
-		cancel_work_sync(&l->nonstall_fn_work);
-		destroy_workqueue(l->nonstall_work_queue);
-		l->nonstall_work_queue = NULL;
-	}
-}
-
-static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
-{
-	gk20a_get_platform(&pdev->dev)->g = gk20a;
-}
-
-static int nvgpu_read_fuse_overrides(struct gk20a *g)
-{
-	struct device_node *np = nvgpu_get_node(g);
-	u32 *fuses;
-	int count, i;
-
-	if (!np) /* may be pcie device */
-		return 0;
-
-	count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
-	if (count <= 0)
-		return count;
-
-	fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2);
-	if (!fuses)
-		return -ENOMEM;
-	of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
-	for (i = 0; i < count; i++) {
-		u32 fuse, value;
-
-		fuse = fuses[2 * i];
-		value = fuses[2 * i + 1];
-		switch (fuse) {
-		case GM20B_FUSE_OPT_TPC_DISABLE:
-			g->tpc_fs_mask_user = ~value;
-			break;
-		case GP10B_FUSE_OPT_ECC_EN:
-			g->gr.fecs_feature_override_ecc_val = value;
-			break;
-		default:
-			nvgpu_err(g, "ignore unknown fuse override %08x", fuse);
-			break;
-		}
-	}
-
-	nvgpu_kfree(g, fuses);
-
-	return 0;
-}
-
-static int gk20a_probe(struct platform_device *dev)
-{
-	struct nvgpu_os_linux *l = NULL;
-	struct gk20a *gk20a;
-	int err;
-	struct gk20a_platform *platform = NULL;
-	struct device_node *np;
-
-	if (dev->dev.of_node) {
-		const struct of_device_id *match;
-
-		match = of_match_device(tegra_gk20a_of_match, &dev->dev);
-		if (match)
-			platform = (struct gk20a_platform *)match->data;
-	} else
-		platform = (struct gk20a_platform *)dev->dev.platform_data;
-
-	if (!platform) {
-		dev_err(&dev->dev, "no platform data\n");
-		return -ENODATA;
-	}
-
-	platform_set_drvdata(dev, platform);
-
-	if (gk20a_gpu_is_virtual(&dev->dev))
-		return vgpu_probe(dev);
-
-	l = kzalloc(sizeof(*l), GFP_KERNEL);
-	if (!l) {
-		dev_err(&dev->dev, "couldn't allocate gk20a support");
-		return -ENOMEM;
-	}
-
-	hash_init(l->ecc_sysfs_stats_htable);
-
-	gk20a = &l->g;
-
-	nvgpu_log_fn(gk20a, " ");
-
-	nvgpu_init_gk20a(gk20a);
-	set_gk20a(dev, gk20a);
-	l->dev = &dev->dev;
-	gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;
-
-	nvgpu_kmem_init(gk20a);
-
-	err = nvgpu_init_enabled_flags(gk20a);
-	if (err)
-		goto return_err;
-
-	np = nvgpu_get_node(gk20a);
-	if (of_dma_is_coherent(np)) {
-		__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
-		__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
-	}
-
-	if (nvgpu_platform_is_simulation(gk20a))
-		__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
-
-	gk20a->irq_stall = platform_get_irq(dev, 0);
-	gk20a->irq_nonstall = platform_get_irq(dev, 1);
-	if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) {
-		err = -ENXIO;
-		goto return_err;
-	}
-
-	err = devm_request_threaded_irq(&dev->dev,
-			gk20a->irq_stall,
-			gk20a_intr_isr_stall,
-			gk20a_intr_thread_stall,
-			0, "gk20a_stall", gk20a);
-	if (err) {
-		dev_err(&dev->dev,
-			"failed to request stall intr irq @ %d\n",
-				gk20a->irq_stall);
-		goto return_err;
-	}
-	err = devm_request_irq(&dev->dev,
-			gk20a->irq_nonstall,
-			gk20a_intr_isr_nonstall,
-			0, "gk20a_nonstall", gk20a);
-	if (err) {
-		dev_err(&dev->dev,
-			"failed to request non-stall intr irq @ %d\n",
-				gk20a->irq_nonstall);
-		goto return_err;
-	}
-	disable_irq(gk20a->irq_stall);
-	if (gk20a->irq_stall != gk20a->irq_nonstall)
-		disable_irq(gk20a->irq_nonstall);
-
-	err = gk20a_init_support(dev);
-	if (err)
-		goto return_err;
-
-	err = nvgpu_read_fuse_overrides(gk20a);
-
-#ifdef CONFIG_RESET_CONTROLLER
-	platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
-	if (IS_ERR(platform->reset_control))
-		platform->reset_control = NULL;
-#endif
-
-	err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
-	if (err)
-		goto return_err;
-
-	err = gk20a_pm_init(&dev->dev);
-	if (err) {
-		dev_err(&dev->dev, "pm init failed");
-		goto return_err;
-	}
-
-	gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
-
-	return 0;
-
-return_err:
-	nvgpu_free_enabled_flags(gk20a);
-
-	/*
-	 * Last since the above allocs may use data structures in here.
-	 */
-	nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP);
-
-	kfree(l);
-
-	return err;
-}
-
-int nvgpu_remove(struct device *dev, struct class *class)
-{
-	struct gk20a *g = get_gk20a(dev);
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-#endif
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	err = nvgpu_quiesce(g);
-	WARN(err, "gpu failed to idle during driver removal");
-
-	if (nvgpu_mem_is_valid(&g->syncpt_mem))
-		nvgpu_dma_free(g, &g->syncpt_mem);
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	if (platform->has_cde)
-		gk20a_cde_destroy(l);
-#endif
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	gk20a_ctxsw_trace_cleanup(g);
-#endif
-
-	gk20a_sched_ctrl_cleanup(g);
-
-	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
-		gk20a_scale_exit(dev);
-
-	nvgpu_clk_arb_cleanup_arbiter(g);
-
-	gk20a_user_deinit(dev, class);
-
-	gk20a_debug_deinit(g);
-
-	nvgpu_remove_sysfs(dev);
-
-	if (platform->secure_buffer.destroy)
-		platform->secure_buffer.destroy(g,
-				&platform->secure_buffer);
-
-	if (pm_runtime_enabled(dev))
-		pm_runtime_disable(dev);
-
-	if (platform->remove)
-		platform->remove(dev);
-
-	nvgpu_log_fn(g, "removed");
-
-	return err;
-}
-
-static int __exit gk20a_remove(struct platform_device *pdev)
-{
-	int err;
-	struct device *dev = &pdev->dev;
-	struct gk20a *g = get_gk20a(dev);
-
-	if (gk20a_gpu_is_virtual(dev))
-		return vgpu_remove(pdev);
-
-	err = nvgpu_remove(dev, &nvgpu_class);
-
-	set_gk20a(pdev, NULL);
-	gk20a_put(g);
-
-	return err;
-}
-
-static struct platform_driver gk20a_driver = {
-	.probe = gk20a_probe,
-	.remove = __exit_p(gk20a_remove),
-	.shutdown = gk20a_pm_shutdown,
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "gk20a",
-		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-#ifdef CONFIG_OF
-		.of_match_table = tegra_gk20a_of_match,
-#endif
-#ifdef CONFIG_PM
-		.pm = &gk20a_pm_ops,
-#endif
-		.suppress_bind_attrs = true,
-	}
-};
-
-struct class nvgpu_class = {
-	.owner = THIS_MODULE,
-	.name = CLASS_NAME,
-};
-
-static int __init gk20a_init(void)
-{
-
-	int ret;
-
-	ret = class_register(&nvgpu_class);
-	if (ret)
-		return ret;
-
-	ret = nvgpu_pci_init();
-	if (ret)
-		return ret;
-
-	return platform_driver_register(&gk20a_driver);
-}
-
-static void __exit gk20a_exit(void)
-{
-	nvgpu_pci_exit();
-	platform_driver_unregister(&gk20a_driver);
-	class_unregister(&nvgpu_class);
-}
-
-MODULE_LICENSE("GPL v2");
-module_init(gk20a_init);
-module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h
deleted file mode 100644
index ab4bca03..00000000
--- a/drivers/gpu/nvgpu/common/linux/module.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
-#define __NVGPU_COMMON_LINUX_MODULE_H__
-
-struct gk20a;
-struct device;
-struct nvgpu_os_linux;
-
-int gk20a_pm_finalize_poweron(struct device *dev);
-int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
-void gk20a_remove_support(struct gk20a *g);
-void gk20a_driver_start_unload(struct gk20a *g);
-int nvgpu_quiesce(struct gk20a *g);
-int nvgpu_remove(struct device *dev, struct class *class);
-void nvgpu_free_irq(struct gk20a *g);
-struct device_node *nvgpu_get_node(struct gk20a *g);
-void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
-		struct resource **out);
-extern struct class nvgpu_class;
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.c b/drivers/gpu/nvgpu/common/linux/module_usermode.c
deleted file mode 100644
index 61cb4e87..00000000
--- a/drivers/gpu/nvgpu/common/linux/module_usermode.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/types.h>
-
-#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
-
-#include "common/linux/os_linux.h"
-
-/*
- * Locks out the driver from accessing GPU registers. This prevents access to
- * thse registers after the GPU has been clock or power gated. This should help
- * find annoying bugs where register reads and writes are silently dropped
- * after the GPU has been turned off. On older chips these reads and writes can
- * also lock the entire CPU up.
- */
-void nvgpu_lockout_usermode_registers(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->usermode_regs = NULL;
-}
-
-/*
- * Undoes t19x_lockout_registers().
- */
-void nvgpu_restore_usermode_registers(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->usermode_regs = l->usermode_regs_saved;
-}
-
-void nvgpu_remove_usermode_support(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (l->usermode_regs) {
-		l->usermode_regs = NULL;
-	}
-}
-
-void nvgpu_init_usermode_support(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->usermode_regs = l->regs + usermode_cfg0_r();
-	l->usermode_regs_saved = l->usermode_regs;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.h b/drivers/gpu/nvgpu/common/linux/module_usermode.h
deleted file mode 100644
index b17053ca..00000000
--- a/drivers/gpu/nvgpu/common/linux/module_usermode.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __NVGPU_MODULE_T19X_H__
-#define __NVGPU_MODULE_T19X_H__
-
-struct gk20a;
-
-void nvgpu_init_usermode_support(struct gk20a *g);
-void nvgpu_remove_usermode_support(struct gk20a *g);
-void nvgpu_lockout_usermode_registers(struct gk20a *g);
-void nvgpu_restore_usermode_registers(struct gk20a *g);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
deleted file mode 100644
index 93925803..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/dma.h>
-#include <nvgpu/gmmu.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/page_allocator.h>
-#include <nvgpu/log.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/vidmem.h>
-
-#include <nvgpu/linux/dma.h>
-#include <nvgpu/linux/vidmem.h>
-
-#include <linux/vmalloc.h>
-
-#include "os_linux.h"
-
-#include "gk20a/gk20a.h"
-#include "gk20a/mm_gk20a.h"
-#include "platform_gk20a.h"
-
-static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	u64 ipa = sg_phys((struct scatterlist *)sgl);
-
-	if (platform->phys_addr)
-		return platform->phys_addr(g, ipa);
-
-	return ipa;
-}
-
-int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	void *cpu_va;
-	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
-		PAGE_KERNEL :
-		pgprot_writecombine(PAGE_KERNEL);
-
-	if (mem->aperture != APERTURE_SYSMEM)
-		return 0;
-
-	/*
-	 * WAR for bug 2040115: we already will always have a coherent vmap()
-	 * for all sysmem buffers. The prot settings are left alone since
-	 * eventually this should be deleted.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		return 0;
-
-	/*
-	 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
-	 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
-	 * another CPU mapping.
-	 */
-	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
-		return 0;
-
-	if (WARN_ON(mem->cpu_va)) {
-		nvgpu_warn(g, "nested");
-		return -EBUSY;
-	}
-
-	cpu_va = vmap(mem->priv.pages,
-			PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
-			0, prot);
-
-	if (WARN_ON(!cpu_va))
-		return -ENOMEM;
-
-	mem->cpu_va = cpu_va;
-	return 0;
-}
-
-void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	if (mem->aperture != APERTURE_SYSMEM)
-		return;
-
-	/*
-	 * WAR for bug 2040115: skip this since the map will be taken care of
-	 * during the free in the DMA API.
-	 */
-	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		return;
-
-	/*
-	 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
-	 * already made by the DMA API.
-	 */
-	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
-		return;
-
-	vunmap(mem->cpu_va);
-	mem->cpu_va = NULL;
-}
-
-static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 r = start, *dest_u32 = *arg;
-
-	if (!l->regs) {
-		__gk20a_warn_on_no_regs();
-		return;
-	}
-
-	while (words--) {
-		*dest_u32++ = gk20a_readl(g, r);
-		r += sizeof(u32);
-	}
-
-	*arg = dest_u32;
-}
-
-u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
-{
-	u32 data = 0;
-
-	if (mem->aperture == APERTURE_SYSMEM) {
-		u32 *ptr = mem->cpu_va;
-
-		WARN_ON(!ptr);
-		data = ptr[w];
-#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
-		nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
-#endif
-	} else if (mem->aperture == APERTURE_VIDMEM) {
-		u32 value;
-		u32 *p = &value;
-
-		nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
-				sizeof(u32), pramin_access_batch_rd_n, &p);
-
-		data = value;
-
-	} else {
-		WARN_ON("Accessing unallocated nvgpu_mem");
-	}
-
-	return data;
-}
-
-u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
-{
-	WARN_ON(offset & 3);
-	return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
-}
-
-void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
-		u32 offset, void *dest, u32 size)
-{
-	WARN_ON(offset & 3);
-	WARN_ON(size & 3);
-
-	if (mem->aperture == APERTURE_SYSMEM) {
-		u8 *src = (u8 *)mem->cpu_va + offset;
-
-		WARN_ON(!mem->cpu_va);
-		memcpy(dest, src, size);
-#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
-		if (size)
-			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
-					src, *dest, size);
-#endif
-	} else if (mem->aperture == APERTURE_VIDMEM) {
-		u32 *dest_u32 = dest;
-
-		nvgpu_pramin_access_batched(g, mem, offset, size,
-				pramin_access_batch_rd_n, &dest_u32);
-	} else {
-		WARN_ON("Accessing unallocated nvgpu_mem");
-	}
-}
-
-static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 r = start, *src_u32 = *arg;
-
-	if (!l->regs) {
-		__gk20a_warn_on_no_regs();
-		return;
-	}
-
-	while (words--) {
-		writel_relaxed(*src_u32++, l->regs + r);
-		r += sizeof(u32);
-	}
-
-	*arg = src_u32;
-}
-
-void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
-{
-	if (mem->aperture == APERTURE_SYSMEM) {
-		u32 *ptr = mem->cpu_va;
-
-		WARN_ON(!ptr);
-#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
-		nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
-#endif
-		ptr[w] = data;
-	} else if (mem->aperture == APERTURE_VIDMEM) {
-		u32 value = data;
-		u32 *p = &value;
-
-		nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
-				sizeof(u32), pramin_access_batch_wr_n, &p);
-		if (!mem->skip_wmb)
-			wmb();
-	} else {
-		WARN_ON("Accessing unallocated nvgpu_mem");
-	}
-}
-
-void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
-{
-	WARN_ON(offset & 3);
-	nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
-}
-
-void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		void *src, u32 size)
-{
-	WARN_ON(offset & 3);
-	WARN_ON(size & 3);
-
-	if (mem->aperture == APERTURE_SYSMEM) {
-		u8 *dest = (u8 *)mem->cpu_va + offset;
-
-		WARN_ON(!mem->cpu_va);
-#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
-		if (size)
-			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
-					dest, *src, size);
-#endif
-		memcpy(dest, src, size);
-	} else if (mem->aperture == APERTURE_VIDMEM) {
-		u32 *src_u32 = src;
-
-		nvgpu_pramin_access_batched(g, mem, offset, size,
-				pramin_access_batch_wr_n, &src_u32);
-		if (!mem->skip_wmb)
-			wmb();
-	} else {
-		WARN_ON("Accessing unallocated nvgpu_mem");
-	}
-}
-
-static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	u32 r = start, repeat = **arg;
-
-	if (!l->regs) {
-		__gk20a_warn_on_no_regs();
-		return;
-	}
-
-	while (words--) {
-		writel_relaxed(repeat, l->regs + r);
-		r += sizeof(u32);
-	}
-}
-
-void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		u32 c, u32 size)
-{
-	WARN_ON(offset & 3);
-	WARN_ON(size & 3);
-	WARN_ON(c & ~0xff);
-
-	c &= 0xff;
-
-	if (mem->aperture == APERTURE_SYSMEM) {
-		u8 *dest = (u8 *)mem->cpu_va + offset;
-
-		WARN_ON(!mem->cpu_va);
-#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
-		if (size)
-			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
-				dest, c, size);
-#endif
-		memset(dest, c, size);
-	} else if (mem->aperture == APERTURE_VIDMEM) {
-		u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
-		u32 *p = &repeat_value;
-
-		nvgpu_pramin_access_batched(g, mem, offset, size,
-				pramin_access_batch_set, &p);
-		if (!mem->skip_wmb)
-			wmb();
-	} else {
-		WARN_ON("Accessing unallocated nvgpu_mem");
-	}
-}
-
-/*
- * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
- * and/or become private to this file once all bad usages of Linux SGLs are
- * cleaned up in the driver.
- */
-u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
-{
-	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
-	    !nvgpu_iommuable(g))
-		return g->ops.mm.gpu_phys_addr(g, NULL,
-			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
-
-	if (sg_dma_address(sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, NULL,
-			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
-
-	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
-		return 0;
-
-	return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
-}
-
-/*
- * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
- * allocation.
- */
-static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
-}
-
-/*
- * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
- * allocation.
- *
- * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
- * than one scatterlist chunk. If there's more than one scatterlist chunk then
- * the buffer will not be contiguous. As such the base address probably isn't
- * very useful. This is true for SYSMEM as well, if there's no IOMMU.
- *
- * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
- * an IOMMU present and enabled for the GPU.
- *
- * %attrs can be NULL. If it is not NULL then it may be inspected to determine
- * if the address needs to be modified before writing into a PTE.
- */
-u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	struct nvgpu_page_alloc *alloc;
-
-	if (mem->aperture == APERTURE_SYSMEM)
-		return nvgpu_mem_get_addr_sysmem(g, mem);
-
-	/*
-	 * Otherwise get the vidmem address.
-	 */
-	alloc = mem->vidmem_alloc;
-
-	/* This API should not be used with > 1 chunks */
-	WARN_ON(alloc->nr_chunks != 1);
-
-	return alloc->base;
-}
-
-/*
- * This should only be used on contiguous buffers regardless of whether
- * there's an IOMMU present/enabled. This applies to both SYSMEM and
- * VIDMEM.
- */
-u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	/*
-	 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
-	 * back to that.
-	 */
-	if (mem->aperture == APERTURE_VIDMEM)
-		return nvgpu_mem_get_addr(g, mem);
-
-	return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
-}
-
-/*
- * Be careful how you use this! You are responsible for correctly freeing this
- * memory.
- */
-int nvgpu_mem_create_from_mem(struct gk20a *g,
-			      struct nvgpu_mem *dest, struct nvgpu_mem *src,
-			      int start_page, int nr_pages)
-{
-	int ret;
-	u64 start = start_page * PAGE_SIZE;
-	u64 size = nr_pages * PAGE_SIZE;
-	dma_addr_t new_iova;
-
-	if (src->aperture != APERTURE_SYSMEM)
-		return -EINVAL;
-
-	/* Some silly things a caller might do... */
-	if (size > src->size)
-		return -EINVAL;
-	if ((start + size) > src->size)
-		return -EINVAL;
-
-	dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
-	dest->aperture  = src->aperture;
-	dest->skip_wmb  = src->skip_wmb;
-	dest->size      = size;
-
-	/*
-	 * Re-use the CPU mapping only if the mapping was made by the DMA API.
-	 *
-	 * Bug 2040115: the DMA API wrapper makes the mapping that we should
-	 * re-use.
-	 */
-	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
-	    nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
-		dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
-
-	dest->priv.pages = src->priv.pages + start_page;
-	dest->priv.flags = src->priv.flags;
-
-	new_iova = sg_dma_address(src->priv.sgt->sgl) ?
-		sg_dma_address(src->priv.sgt->sgl) + start : 0;
-
-	/*
-	 * Make a new SG table that is based only on the subset of pages that
-	 * is passed to us. This table gets freed by the dma free routines.
-	 */
-	if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
-						   src->priv.pages + start_page,
-						   new_iova, size);
-	else
-		ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
-					new_iova, size);
-
-	return ret;
-}
-
-int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
-				  struct page **pages, int nr_pages)
-{
-	struct sg_table *sgt;
-	struct page **our_pages =
-		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
-
-	if (!our_pages)
-		return -ENOMEM;
-
-	memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
-
-	if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
-					 nr_pages * PAGE_SIZE)) {
-		nvgpu_kfree(g, our_pages);
-		return -ENOMEM;
-	}
-
-	/*
-	 * If we are making an SGT from physical pages we can be reasonably
-	 * certain that this should bypass the SMMU - thus we set the DMA (aka
-	 * IOVA) address to 0. This tells the GMMU mapping code to not make a
-	 * mapping directed to the SMMU.
-	 */
-	sg_dma_address(sgt->sgl) = 0;
-
-	dest->mem_flags  = __NVGPU_MEM_FLAG_NO_DMA;
-	dest->aperture   = APERTURE_SYSMEM;
-	dest->skip_wmb   = 0;
-	dest->size       = PAGE_SIZE * nr_pages;
-
-	dest->priv.flags = 0;
-	dest->priv.pages = our_pages;
-	dest->priv.sgt   = sgt;
-
-	return 0;
-}
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
-				 u64 src_phys, int nr_pages)
-{
-	struct page **pages =
-		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
-	int i, ret = 0;
-
-	if (!pages)
-		return -ENOMEM;
-
-	for (i = 0; i < nr_pages; i++)
-		pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
-
-	ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
-	nvgpu_kfree(g, pages);
-
-	return ret;
-}
-#endif
-
-static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
-{
-	return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
-}
-
-static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
-{
-	return (u64)__nvgpu_sgl_phys(g, sgl);
-}
-
-static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
-{
-	return (u64)sg_dma_address((struct scatterlist *)sgl);
-}
-
-static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
-{
-	return (u64)((struct scatterlist *)sgl)->length;
-}
-
-static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
-					struct nvgpu_sgl *sgl,
-					struct nvgpu_gmmu_attrs *attrs)
-{
-	if (sg_dma_address((struct scatterlist *)sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, attrs,
-				__nvgpu_sgl_phys(g, sgl));
-
-	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
-		return 0;
-
-	return nvgpu_mem_iommu_translate(g,
-				sg_dma_address((struct scatterlist *)sgl));
-}
-
-static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
-					  struct nvgpu_sgt *sgt)
-{
-	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
-		return false;
-	return true;
-}
-
-static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
-{
-	/*
-	 * Free this SGT. All we do is free the passed SGT. The actual Linux
-	 * SGT/SGL needs to be freed separately.
-	 */
-	nvgpu_kfree(g, sgt);
-}
-
-static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
-	.sgl_next      = nvgpu_mem_linux_sgl_next,
-	.sgl_phys      = nvgpu_mem_linux_sgl_phys,
-	.sgl_dma       = nvgpu_mem_linux_sgl_dma,
-	.sgl_length    = nvgpu_mem_linux_sgl_length,
-	.sgl_gpu_addr  = nvgpu_mem_linux_sgl_gpu_addr,
-	.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
-	.sgt_free      = nvgpu_mem_linux_sgl_free,
-};
-
-static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
-	struct gk20a *g,
-	struct scatterlist *linux_sgl)
-{
-	struct nvgpu_page_alloc *vidmem_alloc;
-
-	vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
-	if (!vidmem_alloc)
-		return NULL;
-
-	return &vidmem_alloc->sgt;
-}
-
-struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
-{
-	struct nvgpu_sgt *nvgpu_sgt;
-	struct scatterlist *linux_sgl = sgt->sgl;
-
-	if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
-		return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
-
-	nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
-	if (!nvgpu_sgt)
-		return NULL;
-
-	nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
-
-	nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
-	nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
-
-	return nvgpu_sgt;
-}
-
-struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
-					    struct nvgpu_mem *mem)
-{
-	return nvgpu_linux_sgt_create(g, mem->priv.sgt);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c
deleted file mode 100644
index 6ab60248..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvhost.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/nvhost.h>
-#include <linux/nvhost_t194.h>
-#include <linux/nvhost_ioctl.h>
-#include <linux/of_platform.h>
-
-#include <nvgpu/nvhost.h>
-
-#include "nvhost_priv.h"
-
-#include "gk20a/gk20a.h"
-#include "os_linux.h"
-#include "module.h"
-
-int nvgpu_get_nvhost_dev(struct gk20a *g)
-{
-	struct device_node *np = nvgpu_get_node(g);
-	struct platform_device *host1x_pdev = NULL;
-	const __be32 *host1x_ptr;
-
-	host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
-	if (host1x_ptr) {
-		struct device_node *host1x_node =
-			of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
-
-		host1x_pdev = of_find_device_by_node(host1x_node);
-		if (!host1x_pdev) {
-			nvgpu_warn(g, "host1x device not available");
-			return -EPROBE_DEFER;
-		}
-
-	} else {
-		if (g->has_syncpoints) {
-			nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
-			g->has_syncpoints = false;
-		}
-		return 0;
-	}
-
-	g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
-	if (!g->nvhost_dev)
-		return -ENOMEM;
-
-	g->nvhost_dev->host1x_pdev = host1x_pdev;
-
-	return 0;
-}
-
-void nvgpu_free_nvhost_dev(struct gk20a *g)
-{
-	nvgpu_kfree(g, g->nvhost_dev);
-}
-
-int nvgpu_nvhost_module_busy_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev)
-{
-	return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
-}
-
-void nvgpu_nvhost_module_idle_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev)
-{
-	nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
-}
-
-void nvgpu_nvhost_debug_dump_device(
-	struct nvgpu_nvhost_dev *nvhost_dev)
-{
-	nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
-}
-
-const char *nvgpu_nvhost_syncpt_get_name(
-	struct nvgpu_nvhost_dev *nvhost_dev, int id)
-{
-	return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
-}
-
-bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
-{
-	return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
-}
-
-int nvgpu_nvhost_syncpt_is_expired_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
-{
-	return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
-			id, thresh);
-}
-
-u32 nvgpu_nvhost_syncpt_incr_max_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
-{
-	return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
-}
-
-int nvgpu_nvhost_intr_register_notifier(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
-	void (*callback)(void *, int), void *private_data)
-{
-	return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
-			id, thresh,
-			callback, private_data);
-}
-
-void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
-{
-	nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
-}
-
-void nvgpu_nvhost_syncpt_put_ref_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
-{
-	nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
-}
-
-u32 nvgpu_nvhost_get_syncpt_host_managed(
-	struct nvgpu_nvhost_dev *nvhost_dev,
-	u32 param, const char *syncpt_name)
-{
-	return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
-			param, syncpt_name);
-}
-
-u32 nvgpu_nvhost_get_syncpt_client_managed(
-	struct nvgpu_nvhost_dev *nvhost_dev,
-	const char *syncpt_name)
-{
-	return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
-			syncpt_name);
-}
-
-int nvgpu_nvhost_syncpt_wait_timeout_ext(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
-	u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
-{
-	return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
-		id, thresh, timeout, value, ts);
-}
-
-int nvgpu_nvhost_syncpt_read_ext_check(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
-{
-	return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
-}
-
-u32 nvgpu_nvhost_syncpt_read_maxval(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
-{
-	return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
-}
-
-void nvgpu_nvhost_syncpt_set_safe_state(
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
-{
-	u32 val;
-
-	/*
-	 * Add large number of increments to current value
-	 * so that all waiters on this syncpoint are released
-	 *
-	 * We don't expect any case where more than 0x10000 increments
-	 * are pending
-	 */
-	val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
-	val += 0x10000;
-
-	nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
-	nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
-}
-
-int nvgpu_nvhost_create_symlink(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-	int err = 0;
-
-	if (g->nvhost_dev &&
-			(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
-		err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
-				&dev->kobj,
-				dev_name(dev));
-	}
-
-	return err;
-}
-
-void nvgpu_nvhost_remove_symlink(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-
-	if (g->nvhost_dev &&
-			(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
-		sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
-				  dev_name(dev));
-	}
-}
-
-#ifdef CONFIG_SYNC
-u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
-{
-	return nvhost_sync_pt_id(pt);
-}
-
-u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
-{
-	return nvhost_sync_pt_thresh(pt);
-}
-
-struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
-{
-	return nvhost_sync_fdget(fd);
-}
-
-int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
-{
-	return nvhost_sync_num_pts(fence);
-}
-
-struct sync_fence *nvgpu_nvhost_sync_create_fence(
-	struct nvgpu_nvhost_dev *nvhost_dev,
-	u32 id, u32 thresh, const char *name)
-{
-	struct nvhost_ctrl_sync_fence_info pt = {
-		.id = id,
-		.thresh = thresh,
-	};
-
-	return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
-}
-#endif /* CONFIG_SYNC */
-
-#ifdef CONFIG_TEGRA_T19X_GRHOST
-int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
-		struct nvgpu_nvhost_dev *nvhost_dev,
-		u64 *base, size_t *size)
-{
-	return nvhost_syncpt_unit_interface_get_aperture(
-		nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
-}
-
-u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
-{
-	return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
-}
-
-int nvgpu_nvhost_syncpt_init(struct gk20a *g)
-{
-	int err = 0;
-
-	if (!g->has_syncpoints)
-		return -ENOSYS;
-
-	err = nvgpu_get_nvhost_dev(g);
-	if (err) {
-		nvgpu_err(g, "host1x device not available");
-		g->has_syncpoints = false;
-		return -ENOSYS;
-	}
-
-	err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
-			g->nvhost_dev,
-			&g->syncpt_unit_base,
-			&g->syncpt_unit_size);
-	if (err) {
-		nvgpu_err(g, "Failed to get syncpt interface");
-		g->has_syncpoints = false;
-		return -ENOSYS;
-	}
-
-	g->syncpt_size =
-			nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
-	nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
-			g->syncpt_unit_base, g->syncpt_unit_size,
-			g->syncpt_size);
-
-	return 0;
-}
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h b/drivers/gpu/nvgpu/common/linux/nvhost_priv.h
deleted file mode 100644
index c03390a7..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __NVGPU_NVHOST_PRIV_H__
-#define __NVGPU_NVHOST_PRIV_H__
-
-struct nvgpu_nvhost_dev {
-	struct platform_device *host1x_pdev;
-};
-
-#endif /* __NVGPU_NVHOST_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/nvlink.c b/drivers/gpu/nvgpu/common/linux/nvlink.c
deleted file mode 100644
index c93514c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/nvlink.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <gk20a/gk20a.h>
-#include <nvgpu/nvlink.h>
-#include <nvgpu/enabled.h>
-#include "module.h"
-
-#ifdef CONFIG_TEGRA_NVLINK
-int nvgpu_nvlink_read_dt_props(struct gk20a *g)
-{
-	struct device_node *np;
-	struct nvlink_device *ndev = g->nvlink.priv;
-	u32 local_dev_id;
-	u32 local_link_id;
-	u32 remote_dev_id;
-	u32 remote_link_id;
-	bool is_master;
-
-	/* Parse DT */
-	np = nvgpu_get_node(g);
-	if (!np)
-		goto fail;
-
-	np = of_get_child_by_name(np, "nvidia,nvlink");
-	if (!np)
-		goto fail;
-
-	np = of_get_child_by_name(np, "endpoint");
-	if (!np)
-		goto fail;
-
-	/* Parse DT structure to detect endpoint topology */
-	of_property_read_u32(np, "local_dev_id", &local_dev_id);
-	of_property_read_u32(np, "local_link_id", &local_link_id);
-	of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
-	of_property_read_u32(np, "remote_link_id", &remote_link_id);
-	is_master = of_property_read_bool(np, "is_master");
-
-	/* Check that we are in dGPU mode */
-	if (local_dev_id != NVLINK_ENDPT_GV100) {
-		nvgpu_err(g, "Local nvlink device is not dGPU");
-		return -EINVAL;
-	}
-
-	ndev->is_master = is_master;
-	ndev->device_id = local_dev_id;
-	ndev->link.link_id = local_link_id;
-	ndev->link.remote_dev_info.device_id = remote_dev_id;
-	ndev->link.remote_dev_info.link_id = remote_link_id;
-
-	return 0;
-
-fail:
-	nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
-	return -ENODEV;
-}
-#endif /* CONFIG_TEGRA_NVLINK */
-
-void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
-{
-		/* MSS_NVLINK_1_BASE */
-		void __iomem *soc1 = ioremap(0x01f20010, 4096);
-		/* MSS_NVLINK_2_BASE */
-		void __iomem *soc2 = ioremap(0x01f40010, 4096);
-		/* MSS_NVLINK_3_BASE */
-		void __iomem *soc3 = ioremap(0x01f60010, 4096);
-		/* MSS_NVLINK_4_BASE */
-		void __iomem *soc4 = ioremap(0x01f80010, 4096);
-		u32 val;
-
-		nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
-
-		val = readl_relaxed(soc1);
-		writel_relaxed(val, soc1);
-		val = readl_relaxed(soc1 + 4);
-		writel_relaxed(val, soc1 + 4);
-
-		val = readl_relaxed(soc2);
-		writel_relaxed(val, soc2);
-		val = readl_relaxed(soc2 + 4);
-		writel_relaxed(val, soc2 + 4);
-
-		val = readl_relaxed(soc3);
-		writel_relaxed(val, soc3);
-		val = readl_relaxed(soc3 + 4);
-		writel_relaxed(val, soc3 + 4);
-
-		val = readl_relaxed(soc4);
-		writel_relaxed(val, soc4);
-		val = readl_relaxed(soc4 + 4);
-		writel_relaxed(val, soc4 + 4);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
deleted file mode 100644
index 9be8c6c0..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <nvgpu/types.h>
-#include <nvgpu/os_fence.h>
-#include <nvgpu/linux/os_fence_android.h>
-
-#include "gk20a/gk20a.h"
-
-#include "../drivers/staging/android/sync.h"
-
-inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
-{
-	struct sync_fence *fence = (struct sync_fence *)s->priv;
-	return fence;
-}
-
-static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
-{
-	fence_out->priv = NULL;
-	fence_out->g = NULL;
-	fence_out->ops = NULL;
-}
-
-void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
-	struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
-	struct sync_fence *fence)
-{
-	fence_out->g = g;
-	fence_out->ops = fops;
-	fence_out->priv = (void *)fence;
-}
-
-void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
-{
-	struct sync_fence *fence = nvgpu_get_sync_fence(s);
-
-	sync_fence_put(fence);
-
-	nvgpu_os_fence_clear(s);
-}
-
-void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
-{
-	struct sync_fence *fence = nvgpu_get_sync_fence(s);
-
-	sync_fence_get(fence);
-	sync_fence_install(fence, fd);
-}
-
-int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
-	struct channel_gk20a *c, int fd)
-{
-	int err = -ENOSYS;
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
-#endif
-
-	if (err)
-		err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
-
-	if (err)
-		nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
deleted file mode 100644
index 25832417..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/errno.h>
-
-#include <nvgpu/types.h>
-#include <nvgpu/os_fence.h>
-#include <nvgpu/linux/os_fence_android.h>
-#include <nvgpu/semaphore.h>
-
-#include "gk20a/channel_sync_gk20a.h"
-#include "gk20a/mm_gk20a.h"
-
-#include "sync_sema_android.h"
-
-#include "../drivers/staging/android/sync.h"
-
-int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
-	struct priv_cmd_entry *wait_cmd,
-	struct channel_gk20a *c,
-	int max_wait_cmds)
-{
-	int err;
-	int wait_cmd_size;
-	int num_wait_cmds;
-	int i;
-	struct nvgpu_semaphore *sema;
-	struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
-
-	wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
-
-	num_wait_cmds = sync_fence->num_fences;
-	if (num_wait_cmds == 0)
-		return 0;
-
-	if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
-		return -EINVAL;
-
-	err = gk20a_channel_alloc_priv_cmdbuf(c,
-		wait_cmd_size * num_wait_cmds,
-		wait_cmd);
-	if (err) {
-		nvgpu_err(c->g, "not enough priv cmd buffer space");
-		return err;
-	}
-
-	for (i = 0; i < num_wait_cmds; i++) {
-		struct fence *f = sync_fence->cbs[i].sync_pt;
-		struct sync_pt *pt = sync_pt_from_fence(f);
-
-		sema = gk20a_sync_pt_sema(pt);
-		gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
-			wait_cmd_size, i);
-	}
-
-	return 0;
-}
-
-static const struct nvgpu_os_fence_ops sema_ops = {
-	.program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
-	.drop_ref = nvgpu_os_fence_android_drop_ref,
-	.install_fence = nvgpu_os_fence_android_install_fd,
-};
-
-int nvgpu_os_fence_sema_create(
-	struct nvgpu_os_fence *fence_out,
-	struct channel_gk20a *c,
-	struct nvgpu_semaphore *sema)
-{
-	struct sync_fence *fence;
-
-	fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
-			nvgpu_semaphore_gpu_ro_va(sema));
-
-	if (!fence) {
-		nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
-			(u32)nvgpu_semaphore_gpu_ro_va(sema));
-
-		return -ENOMEM;
-	}
-
-	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
-
-	return 0;
-}
-
-int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
-	struct channel_gk20a *c, int fd)
-{
-	struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
-
-	if (!fence)
-		return -EINVAL;
-
-	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
-
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
deleted file mode 100644
index d7a72fcd..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/errno.h>
-
-#include <nvgpu/types.h>
-#include <nvgpu/os_fence.h>
-#include <nvgpu/linux/os_fence_android.h>
-#include <nvgpu/nvhost.h>
-#include <nvgpu/atomic.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/channel_gk20a.h"
-#include "gk20a/channel_sync_gk20a.h"
-#include "gk20a/mm_gk20a.h"
-
-#include "../drivers/staging/android/sync.h"
-
-int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
-	struct priv_cmd_entry *wait_cmd,
-	struct channel_gk20a *c,
-	int max_wait_cmds)
-{
-	int err;
-	int wait_cmd_size;
-	int num_wait_cmds;
-	int i;
-	u32 wait_id;
-	struct sync_pt *pt;
-
-	struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
-
-	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
-		return -EINVAL;
-
-	/* validate syncpt ids */
-	for (i = 0; i < sync_fence->num_fences; i++) {
-		pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
-		wait_id = nvgpu_nvhost_sync_pt_id(pt);
-		if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
-					c->g->nvhost_dev, wait_id)) {
-			return -EINVAL;
-		}
-	}
-
-	num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
-	if (num_wait_cmds == 0)
-		return 0;
-
-	wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
-	err = gk20a_channel_alloc_priv_cmdbuf(c,
-		wait_cmd_size * num_wait_cmds, wait_cmd);
-	if (err) {
-		nvgpu_err(c->g,
-			"not enough priv cmd buffer space");
-		return err;
-	}
-
-	for (i = 0; i < sync_fence->num_fences; i++) {
-		struct fence *f = sync_fence->cbs[i].sync_pt;
-		struct sync_pt *pt = sync_pt_from_fence(f);
-		u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
-		u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
-
-		err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
-			wait_cmd, wait_cmd_size, i, true);
-	}
-
-	WARN_ON(i != num_wait_cmds);
-
-	return 0;
-}
-
-static const struct nvgpu_os_fence_ops syncpt_ops = {
-	.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
-	.drop_ref = nvgpu_os_fence_android_drop_ref,
-	.install_fence = nvgpu_os_fence_android_install_fd,
-};
-
-int nvgpu_os_fence_syncpt_create(
-	struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
-	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
-{
-	struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
-		nvhost_dev, id, thresh, "fence");
-
-	if (!fence) {
-		nvgpu_err(c->g, "error constructing fence %s", "fence");
-		return -ENOMEM;
-	}
-
-	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
-
-	return 0;
-}
-
-int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
-	struct channel_gk20a *c, int fd)
-{
-	struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
-
-	if (!fence)
-		return -ENOMEM;
-
-	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
-
-	return 0;
-}
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
deleted file mode 100644
index 4dcce322..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef NVGPU_OS_LINUX_H
-#define NVGPU_OS_LINUX_H
-
-#include <linux/cdev.h>
-#include <linux/iommu.h>
-#include <linux/hashtable.h>
-
-#include "gk20a/gk20a.h"
-#include "cde.h"
-#include "sched.h"
-
-struct nvgpu_os_linux_ops {
-	struct {
-		void (*get_program_numbers)(struct gk20a *g,
-					    u32 block_height_log2,
-					    u32 shader_parameter,
-					    int *hprog, int *vprog);
-		bool (*need_scatter_buffer)(struct gk20a *g);
-		int (*populate_scatter_buffer)(struct gk20a *g,
-					       struct sg_table *sgt,
-					       size_t surface_size,
-					       void *scatter_buffer_ptr,
-					       size_t scatter_buffer_size);
-	} cde;
-};
-
-struct nvgpu_os_linux {
-	struct gk20a g;
-	struct device *dev;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} channel;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} ctrl;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} as_dev;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} dbg;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} prof;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} tsg;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} ctxsw;
-
-	struct {
-		struct cdev cdev;
-		struct device *node;
-	} sched;
-
-	dev_t cdev_region;
-
-	struct devfreq *devfreq;
-
-	struct device_dma_parameters dma_parms;
-
-	atomic_t hw_irq_stall_count;
-	atomic_t hw_irq_nonstall_count;
-
-	struct nvgpu_cond sw_irq_stall_last_handled_wq;
-	atomic_t sw_irq_stall_last_handled;
-
-	atomic_t nonstall_ops;
-
-	struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
-	atomic_t sw_irq_nonstall_last_handled;
-
-	struct work_struct nonstall_fn_work;
-	struct workqueue_struct *nonstall_work_queue;
-
-	struct resource *reg_mem;
-	void __iomem *regs;
-	void __iomem *regs_saved;
-
-	struct resource *bar1_mem;
-	void __iomem *bar1;
-	void __iomem *bar1_saved;
-
-	void __iomem *usermode_regs;
-	void __iomem *usermode_regs_saved;
-
-	struct nvgpu_os_linux_ops ops;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs;
-	struct dentry *debugfs_alias;
-
-	struct dentry *debugfs_ltc_enabled;
-	struct dentry *debugfs_timeouts_enabled;
-	struct dentry *debugfs_gr_idle_timeout_default;
-	struct dentry *debugfs_disable_bigpage;
-	struct dentry *debugfs_gr_default_attrib_cb_size;
-
-	struct dentry *debugfs_timeslice_low_priority_us;
-	struct dentry *debugfs_timeslice_medium_priority_us;
-	struct dentry *debugfs_timeslice_high_priority_us;
-	struct dentry *debugfs_runlist_interleave;
-	struct dentry *debugfs_allocators;
-	struct dentry *debugfs_xve;
-	struct dentry *debugfs_kmem;
-	struct dentry *debugfs_hal;
-
-	struct dentry *debugfs_force_preemption_cilp;
-	struct dentry *debugfs_force_preemption_gfxp;
-	struct dentry *debugfs_dump_ctxsw_stats;
-#endif
-	DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
-
-	struct gk20a_cde_app cde_app;
-
-	struct rw_semaphore busy_lock;
-
-	struct gk20a_sched_ctrl sched_ctrl;
-
-	bool init_done;
-};
-
-static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
-{
-	return container_of(g, struct nvgpu_os_linux, g);
-}
-
-static inline struct device *dev_from_gk20a(struct gk20a *g)
-{
-	return nvgpu_os_linux_from_gk20a(g)->dev;
-}
-
-#define INTERFACE_NAME "nvhost%s-gpu"
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/os_sched.c b/drivers/gpu/nvgpu/common/linux/os_sched.c
deleted file mode 100644
index 586b35eb..00000000
--- a/drivers/gpu/nvgpu/common/linux/os_sched.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <nvgpu/os_sched.h>
-
-#include <linux/sched.h>
-
-int nvgpu_current_tid(struct gk20a *g)
-{
-	return current->pid;
-}
-
-int nvgpu_current_pid(struct gk20a *g)
-{
-	return current->tgid;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
deleted file mode 100644
index 1011b441..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
-
-#include <nvgpu/nvhost.h>
-#include <nvgpu/nvgpu_common.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/nvlink.h>
-#include <nvgpu/soc.h>
-
-#include "gk20a/gk20a.h"
-#include "clk/clk.h"
-#include "clk/clk_mclk.h"
-#include "module.h"
-#include "intr.h"
-#include "sysfs.h"
-#include "os_linux.h"
-#include "platform_gk20a.h"
-#include <nvgpu/sim.h>
-
-#include "pci.h"
-#include "pci_usermode.h"
-
-#include "os_linux.h"
-#include "driver_common.h"
-
-#define PCI_INTERFACE_NAME "card-%s%%s"
-
-static int nvgpu_pci_tegra_probe(struct device *dev)
-{
-	return 0;
-}
-
-static int nvgpu_pci_tegra_remove(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	if (g->ops.gr.remove_gr_sysfs)
-		g->ops.gr.remove_gr_sysfs(g);
-
-	return 0;
-}
-
-static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
-{
-	return false;
-}
-
-static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
-{
-	long ret = (long)rate;
-
-	if (rate == UINT_MAX)
-		ret = BOOT_GPC2CLK_MHZ * 1000000UL;
-
-	return ret;
-}
-
-static struct gk20a_platform nvgpu_pci_device[] = {
-	{ /* DEVICE=0x1c35 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = true,
-	.enable_elpg = true,
-	.enable_elcg = false,
-	.enable_slcg = true,
-	.enable_blcg = true,
-	.enable_mscg = true,
-	.can_slcg    = true,
-	.can_blcg    = true,
-	.can_elcg    = true,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x86063000,
-	.hardcode_sw_threshold = true,
-	.ina3221_dcb_index = 0,
-	.ina3221_i2c_address = 0x84,
-	.ina3221_i2c_port = 0x2,
-	},
-	{ /* DEVICE=0x1c36 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = true,
-	.enable_elpg = true,
-	.enable_elcg = false,
-	.enable_slcg = true,
-	.enable_blcg = true,
-	.enable_mscg = true,
-	.can_slcg    = true,
-	.can_blcg    = true,
-	.can_elcg    = true,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x86062d00,
-	.hardcode_sw_threshold = true,
-	.ina3221_dcb_index = 0,
-	.ina3221_i2c_address = 0x84,
-	.ina3221_i2c_port = 0x2,
-	},
-	{ /* DEVICE=0x1c37 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = true,
-	.enable_elpg = true,
-	.enable_elcg = false,
-	.enable_slcg = true,
-	.enable_blcg = true,
-	.enable_mscg = true,
-	.can_slcg    = true,
-	.can_blcg    = true,
-	.can_elcg    = true,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x86063000,
-	.hardcode_sw_threshold = true,
-	.ina3221_dcb_index = 0,
-	.ina3221_i2c_address = 0x84,
-	.ina3221_i2c_port = 0x2,
-	},
-	{ /* DEVICE=0x1c75 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = true,
-	.enable_elpg = true,
-	.enable_elcg = false,
-	.enable_slcg = true,
-	.enable_blcg = true,
-	.enable_mscg = true,
-	.can_slcg    = true,
-	.can_blcg    = true,
-	.can_elcg    = true,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x86065300,
-	.hardcode_sw_threshold = false,
-	.ina3221_dcb_index = 1,
-	.ina3221_i2c_address = 0x80,
-	.ina3221_i2c_port = 0x1,
-	},
-	{ /* DEVICE=PG503 SKU 201 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = false,
-	.enable_slcg = false,
-	.enable_blcg = false,
-	.enable_mscg = false,
-	.can_slcg    = false,
-	.can_blcg    = false,
-	.can_elcg    = false,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x88001e00,
-	.hardcode_sw_threshold = false,
-	.run_preos = true,
-	},
-	{ /* DEVICE=PG503 SKU 200 ES */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = false,
-	.enable_slcg = false,
-	.enable_blcg = false,
-	.enable_mscg = false,
-	.can_slcg    = false,
-	.can_blcg    = false,
-	.can_elcg    = false,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x88001e00,
-	.hardcode_sw_threshold = false,
-	.run_preos = true,
-	},
-	{
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = false,
-	.enable_slcg = false,
-	.enable_blcg = false,
-	.enable_mscg = false,
-	.can_slcg    = false,
-	.can_blcg    = false,
-	.can_elcg    = false,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x88000126,
-	.hardcode_sw_threshold = false,
-	.run_preos = true,
-	.has_syncpoints = true,
-	},
-	{ /* SKU250 */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = true,
-	.enable_slcg = true,
-	.enable_blcg = true,
-	.enable_mscg = false,
-	.can_slcg    = true,
-	.can_blcg    = true,
-	.can_elcg    = true,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x1,
-	.hardcode_sw_threshold = false,
-	.run_preos = true,
-	.has_syncpoints = true,
-	},
-	{ /* SKU 0x1e3f */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = false,
-	.enable_slcg = false,
-	.enable_blcg = false,
-	.enable_mscg = false,
-	.can_slcg    = false,
-	.can_blcg    = false,
-	.can_elcg    = false,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	/*
-	 * WAR: PCIE X1 is very slow, set to very high value till nvlink is up
-	 */
-	.ch_wdt_timeout_ms = 30000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x1,
-	.hardcode_sw_threshold = false,
-	.unified_memory = false,
-	},
-	{ /* 0x1eba */
-	/* ptimer src frequency in hz */
-	.ptimer_src_freq	= 31250000,
-
-	.probe = nvgpu_pci_tegra_probe,
-	.remove = nvgpu_pci_tegra_remove,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= false,
-	.can_elpg_init = false,
-	.enable_elpg = false,
-	.enable_elcg = false,
-	.enable_slcg = false,
-	.enable_blcg = false,
-	.enable_mscg = false,
-	.can_slcg    = false,
-	.can_blcg    = false,
-	.can_elcg    = false,
-
-	.disable_aspm = true,
-
-	/* power management callbacks */
-	.is_railgated = nvgpu_pci_tegra_is_railgated,
-	.clk_round_rate = nvgpu_pci_clk_round_rate,
-
-	.ch_wdt_timeout_ms = 7000,
-
-	.honors_aperture = true,
-	.dma_mask = DMA_BIT_MASK(40),
-	.vbios_min_version = 0x90040109,
-	.hardcode_sw_threshold = false,
-	.has_syncpoints = true,
-	},
-};
-
-static struct pci_device_id nvgpu_pci_table[] = {
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 0,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 1,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 2,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 3,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 4,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 5,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 6,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 7,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 8,
-	},
-	{
-		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba),
-		.class = PCI_BASE_CLASS_DISPLAY << 16,
-		.class_mask = 0xff << 16,
-		.driver_data = 9,
-	},
-	{}
-};
-
-static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-	irqreturn_t ret_stall;
-	irqreturn_t ret_nonstall;
-
-	ret_stall = nvgpu_intr_stall(g);
-	ret_nonstall = nvgpu_intr_nonstall(g);
-
-#if defined(CONFIG_PCI_MSI)
-	/* Send MSI EOI */
-	if (g->ops.xve.rearm_msi && g->msi_enabled)
-		g->ops.xve.rearm_msi(g);
-#endif
-
-	return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
-}
-
-static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
-{
-	struct gk20a *g = dev_id;
-
-	return nvgpu_intr_thread_stall(g);
-}
-
-static int nvgpu_pci_init_support(struct pci_dev *pdev)
-{
-	int err = 0;
-	struct gk20a *g = get_gk20a(&pdev->dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	l->regs = ioremap(pci_resource_start(pdev, 0),
-			  pci_resource_len(pdev, 0));
-	if (IS_ERR(l->regs)) {
-		nvgpu_err(g, "failed to remap gk20a registers");
-		err = PTR_ERR(l->regs);
-		goto fail;
-	}
-
-	l->bar1 = ioremap(pci_resource_start(pdev, 1),
-			  pci_resource_len(pdev, 1));
-	if (IS_ERR(l->bar1)) {
-		nvgpu_err(g, "failed to remap gk20a bar1");
-		err = PTR_ERR(l->bar1);
-		goto fail;
-	}
-
-	err = nvgpu_init_sim_support_linux_pci(g);
-	if (err)
-		goto fail;
-	err = nvgpu_init_sim_support_pci(g);
-	if (err)
-		goto fail_sim;
-
-	nvgpu_pci_init_usermode_support(l);
-
-	return 0;
-
- fail_sim:
-	nvgpu_remove_sim_support_linux_pci(g);
- fail:
-	if (l->regs) {
-		iounmap(l->regs);
-		l->regs = NULL;
-	}
-	if (l->bar1) {
-		iounmap(l->bar1);
-		l->bar1 = NULL;
-	}
-
-	return err;
-}
-
-static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
-{
-	if (mode)
-		*mode = S_IRUGO | S_IWUGO;
-	return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
-}
-
-static struct class nvgpu_pci_class = {
-	.owner = THIS_MODULE,
-	.name = "nvidia-pci-gpu",
-	.devnode = nvgpu_pci_devnode,
-};
-
-#ifdef CONFIG_PM
-static int nvgpu_pci_pm_runtime_resume(struct device *dev)
-{
-	return gk20a_pm_finalize_poweron(dev);
-}
-
-static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static const struct dev_pm_ops nvgpu_pci_pm_ops = {
-	.runtime_resume = nvgpu_pci_pm_runtime_resume,
-	.runtime_suspend = nvgpu_pci_pm_runtime_suspend,
-	.resume = nvgpu_pci_pm_runtime_resume,
-	.suspend = nvgpu_pci_pm_runtime_suspend,
-};
-#endif
-
-static int nvgpu_pci_pm_init(struct device *dev)
-{
-#ifdef CONFIG_PM
-	struct gk20a *g = get_gk20a(dev);
-
-	if (!g->can_railgate) {
-		pm_runtime_disable(dev);
-	} else {
-		if (g->railgate_delay)
-			pm_runtime_set_autosuspend_delay(dev,
-				g->railgate_delay);
-
-		/*
-		 * Runtime PM for PCI devices is disabled by default,
-		 * so we need to enable it first
-		 */
-		pm_runtime_use_autosuspend(dev);
-		pm_runtime_put_noidle(dev);
-		pm_runtime_allow(dev);
-	}
-#endif
-	return 0;
-}
-
-static int nvgpu_pci_probe(struct pci_dev *pdev,
-			   const struct pci_device_id *pent)
-{
-	struct gk20a_platform *platform = NULL;
-	struct nvgpu_os_linux *l;
-	struct gk20a *g;
-	int err;
-	char nodefmt[64];
-	struct device_node *np;
-
-	/* make sure driver_data is a sane index */
-	if (pent->driver_data >= sizeof(nvgpu_pci_device) /
-				 sizeof(nvgpu_pci_device[0])) {
-		return -EINVAL;
-	}
-
-	l = kzalloc(sizeof(*l), GFP_KERNEL);
-	if (!l) {
-		dev_err(&pdev->dev, "couldn't allocate gk20a support");
-		return -ENOMEM;
-	}
-
-	hash_init(l->ecc_sysfs_stats_htable);
-
-	g = &l->g;
-	nvgpu_init_gk20a(g);
-
-	nvgpu_kmem_init(g);
-
-	/* Allocate memory to hold platform data*/
-	platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
-			sizeof(struct gk20a_platform));
-	if (!platform) {
-		dev_err(&pdev->dev, "couldn't allocate platform data");
-		err = -ENOMEM;
-		goto err_free_l;
-	}
-
-	/* copy detected device data to allocated platform space*/
-	memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
-		sizeof(struct gk20a_platform));
-
-	pci_set_drvdata(pdev, platform);
-
-	err = nvgpu_init_enabled_flags(g);
-	if (err)
-		goto err_free_platform;
-
-	platform->g = g;
-	l->dev = &pdev->dev;
-
-	np = nvgpu_get_node(g);
-	if (of_dma_is_coherent(np)) {
-		__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
-		__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
-	}
-
-	err = pci_enable_device(pdev);
-	if (err)
-		goto err_free_platform;
-	pci_set_master(pdev);
-
-	g->pci_vendor_id = pdev->vendor;
-	g->pci_device_id = pdev->device;
-	g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
-	g->pci_subsystem_device_id = pdev->subsystem_device;
-	g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
-	g->pci_revision = pdev->revision;
-
-	g->ina3221_dcb_index = platform->ina3221_dcb_index;
-	g->ina3221_i2c_address = platform->ina3221_i2c_address;
-	g->ina3221_i2c_port = platform->ina3221_i2c_port;
-	g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
-
-#if defined(CONFIG_PCI_MSI)
-	err = pci_enable_msi(pdev);
-	if (err) {
-		nvgpu_err(g,
-			"MSI could not be enabled, falling back to legacy");
-		g->msi_enabled = false;
-	} else
-		g->msi_enabled = true;
-#endif
-
-	g->irq_stall = pdev->irq;
-	g->irq_nonstall = pdev->irq;
-	if (g->irq_stall < 0) {
-		err = -ENXIO;
-		goto err_disable_msi;
-	}
-
-	err = devm_request_threaded_irq(&pdev->dev,
-			g->irq_stall,
-			nvgpu_pci_isr,
-			nvgpu_pci_intr_thread,
-#if defined(CONFIG_PCI_MSI)
-			g->msi_enabled ? 0 :
-#endif
-			IRQF_SHARED, "nvgpu", g);
-	if (err) {
-		nvgpu_err(g,
-			"failed to request irq @ %d", g->irq_stall);
-		goto err_disable_msi;
-	}
-	disable_irq(g->irq_stall);
-
-	err = nvgpu_pci_init_support(pdev);
-	if (err)
-		goto err_free_irq;
-
-	if (strchr(dev_name(&pdev->dev), '%')) {
-		nvgpu_err(g, "illegal character in device name");
-		err = -EINVAL;
-		goto err_free_irq;
-	}
-
-	snprintf(nodefmt, sizeof(nodefmt),
-		 PCI_INTERFACE_NAME, dev_name(&pdev->dev));
-
-	err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
-	if (err)
-		goto err_free_irq;
-
-	err = nvgpu_pci_pm_init(&pdev->dev);
-	if (err) {
-		nvgpu_err(g, "pm init failed");
-		goto err_free_irq;
-	}
-
-	err = nvgpu_nvlink_probe(g);
-	/*
-	 * ENODEV is a legal error which means there is no NVLINK
-	 * any other error is fatal
-	 */
-	if (err) {
-		if (err != -ENODEV) {
-			nvgpu_err(g, "fatal error probing nvlink, bailing out");
-			goto err_free_irq;
-		}
-		/* Enable Semaphore SHIM on nvlink only for now. */
-		__nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
-		g->has_syncpoints = false;
-	} else {
-		err = nvgpu_nvhost_syncpt_init(g);
-		if (err) {
-			if (err != -ENOSYS) {
-				nvgpu_err(g, "syncpt init failed");
-				goto err_free_irq;
-			}
-		}
-	}
-
-	g->mm.has_physical_mode = false;
-
-	return 0;
-
-err_free_irq:
-	nvgpu_free_irq(g);
-err_disable_msi:
-#if defined(CONFIG_PCI_MSI)
-	if (g->msi_enabled)
-		pci_disable_msi(pdev);
-#endif
-err_free_platform:
-	nvgpu_kfree(g, platform);
-err_free_l:
-	kfree(l);
-	return err;
-}
-
-static void nvgpu_pci_remove(struct pci_dev *pdev)
-{
-	struct gk20a *g = get_gk20a(&pdev->dev);
-	struct device *dev = dev_from_gk20a(g);
-	int err;
-
-	/* no support yet for unbind if DGPU is in VGPU mode */
-	if (gk20a_gpu_is_virtual(dev))
-		return;
-
-	nvgpu_nvlink_remove(g);
-
-	gk20a_driver_start_unload(g);
-	err = nvgpu_quiesce(g);
-	/* TODO: handle failure to idle */
-	WARN(err, "gpu failed to idle during driver removal");
-
-	nvgpu_free_irq(g);
-
-	nvgpu_remove(dev, &nvgpu_pci_class);
-
-#if defined(CONFIG_PCI_MSI)
-	if (g->msi_enabled)
-		pci_disable_msi(pdev);
-	else {
-		/* IRQ does not need to be enabled in MSI as the line is not
-		 * shared
-		 */
-		enable_irq(g->irq_stall);
-	}
-#endif
-
-	/* free allocated platform data space */
-	nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
-
-	gk20a_get_platform(&pdev->dev)->g = NULL;
-	gk20a_put(g);
-}
-
-static struct pci_driver nvgpu_pci_driver = {
-	.name = "nvgpu",
-	.id_table = nvgpu_pci_table,
-	.probe = nvgpu_pci_probe,
-	.remove = nvgpu_pci_remove,
-#ifdef CONFIG_PM
-	.driver.pm = &nvgpu_pci_pm_ops,
-#endif
-};
-
-int __init nvgpu_pci_init(void)
-{
-	int ret;
-
-	ret = class_register(&nvgpu_pci_class);
-	if (ret)
-		return ret;
-
-	return pci_register_driver(&nvgpu_pci_driver);
-}
-
-void __exit nvgpu_pci_exit(void)
-{
-	pci_unregister_driver(&nvgpu_pci_driver);
-	class_unregister(&nvgpu_pci_class);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/pci.h b/drivers/gpu/nvgpu/common/linux/pci.h
deleted file mode 100644
index cc6b77b1..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef NVGPU_PCI_H
-#define NVGPU_PCI_H
-
-#ifdef CONFIG_GK20A_PCI
-int nvgpu_pci_init(void);
-void nvgpu_pci_exit(void);
-#else
-static inline int nvgpu_pci_init(void) { return 0; }
-static inline void nvgpu_pci_exit(void) {}
-#endif
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.c b/drivers/gpu/nvgpu/common/linux/pci_usermode.c
deleted file mode 100644
index f474bd10..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci_usermode.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <nvgpu/types.h>
-
-#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
-
-#include "common/linux/os_linux.h"
-
-void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
-{
-	l->usermode_regs = l->regs + usermode_cfg0_r();
-	l->usermode_regs_saved = l->usermode_regs;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.h b/drivers/gpu/nvgpu/common/linux/pci_usermode.h
deleted file mode 100644
index 25a08d28..00000000
--- a/drivers/gpu/nvgpu/common/linux/pci_usermode.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __NVGPU_PCI_USERMODE_H__
-#define __NVGPU_PCI_USERMODE_H__
-
-struct nvgpu_os_linux;
-
-void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c
deleted file mode 100644
index 0fe1c8d2..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/hashtable.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/hashtable.h>
-
-#include "os_linux.h"
-
-#include "gk20a/gk20a.h"
-
-#include "platform_gk20a.h"
-#include "platform_gk20a_tegra.h"
-#include "gp10b/platform_gp10b.h"
-#include "platform_gp10b_tegra.h"
-#include "platform_ecc_sysfs.h"
-
-static u32 gen_ecc_hash_key(char *str)
-{
-	int i = 0;
-	u32 hash_key = 0x811c9dc5;
-
-	while (str[i]) {
-		hash_key *= 0x1000193;
-		hash_key ^= (u32)(str[i]);
-		i++;
-	};
-
-	return hash_key;
-}
-
-static ssize_t ecc_stat_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	const char *ecc_stat_full_name = attr->attr.name;
-	const char *ecc_stat_base_name;
-	unsigned int hw_unit;
-	unsigned int subunit;
-	struct gk20a_ecc_stat *ecc_stat;
-	u32 hash_key;
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
-							&subunit) == 2) {
-		ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
-		hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
-	} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
-		ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
-	} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
-		ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
-	} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
-		ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
-	} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
-		ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
-	} else {
-		return snprintf(buf,
-				PAGE_SIZE,
-				"Error: Invalid ECC stat name!\n");
-	}
-
-	hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
-
-	hash_for_each_possible(l->ecc_sysfs_stats_htable,
-				ecc_stat,
-				hash_node,
-				hash_key) {
-		if (hw_unit >= ecc_stat->count)
-			continue;
-		if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
-			return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
-	}
-
-	return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
-}
-
-int nvgpu_gr_ecc_stat_create(struct device *dev,
-			     int is_l2, char *ecc_stat_name,
-			     struct gk20a_ecc_stat *ecc_stat)
-{
-	struct gk20a *g = get_gk20a(dev);
-	char *ltc_unit_name = "ltc";
-	char *gr_unit_name = "gpc0_tpc";
-	char *lts_unit_name = "lts";
-	int num_hw_units = 0;
-	int num_subunits = 0;
-
-	if (is_l2 == 1)
-		num_hw_units = g->ltc_count;
-	else if (is_l2 == 2) {
-		num_hw_units = g->ltc_count;
-		num_subunits = g->gr.slices_per_ltc;
-	} else
-		num_hw_units = g->gr.tpc_count;
-
-
-	return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
-				is_l2 ? ltc_unit_name : gr_unit_name,
-				num_subunits ? lts_unit_name: NULL,
-				ecc_stat_name,
-				ecc_stat);
-}
-
-int nvgpu_ecc_stat_create(struct device *dev,
-			  int num_hw_units, int num_subunits,
-			  char *ecc_unit_name, char *ecc_subunit_name,
-			  char *ecc_stat_name,
-			  struct gk20a_ecc_stat *ecc_stat)
-{
-	int error = 0;
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int hw_unit = 0;
-	int subunit = 0;
-	int element = 0;
-	u32 hash_key = 0;
-	struct device_attribute *dev_attr_array;
-
-	int num_elements = num_subunits ? num_subunits * num_hw_units :
-		num_hw_units;
-
-	/* Allocate arrays */
-	dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
-				       num_elements);
-	ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
-	ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
-
-	for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
-		ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
-						ECC_STAT_NAME_MAX_SIZE);
-	}
-	ecc_stat->count = num_elements;
-	if (num_subunits) {
-		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
-			for (subunit = 0; subunit < num_subunits; subunit++) {
-				element = hw_unit*num_subunits + subunit;
-
-				snprintf(ecc_stat->names[element],
-					ECC_STAT_NAME_MAX_SIZE,
-					"%s%d_%s%d_%s",
-					ecc_unit_name,
-					hw_unit,
-					ecc_subunit_name,
-					subunit,
-					ecc_stat_name);
-
-				sysfs_attr_init(&dev_attr_array[element].attr);
-				dev_attr_array[element].attr.name =
-					ecc_stat->names[element];
-				dev_attr_array[element].attr.mode =
-					VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
-				dev_attr_array[element].show = ecc_stat_show;
-				dev_attr_array[element].store = NULL;
-
-				/* Create sysfs file */
-				error |= device_create_file(dev,
-						&dev_attr_array[element]);
-
-			}
-		}
-	} else {
-		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
-
-			/* Fill in struct device_attribute members */
-			snprintf(ecc_stat->names[hw_unit],
-				ECC_STAT_NAME_MAX_SIZE,
-				"%s%d_%s",
-				ecc_unit_name,
-				hw_unit,
-				ecc_stat_name);
-
-			sysfs_attr_init(&dev_attr_array[hw_unit].attr);
-			dev_attr_array[hw_unit].attr.name =
-						ecc_stat->names[hw_unit];
-			dev_attr_array[hw_unit].attr.mode =
-					VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
-			dev_attr_array[hw_unit].show = ecc_stat_show;
-			dev_attr_array[hw_unit].store = NULL;
-
-			/* Create sysfs file */
-			error |= device_create_file(dev,
-					&dev_attr_array[hw_unit]);
-		}
-	}
-
-	/* Add hash table entry */
-	hash_key = gen_ecc_hash_key(ecc_stat_name);
-	hash_add(l->ecc_sysfs_stats_htable,
-		&ecc_stat->hash_node,
-		hash_key);
-
-	ecc_stat->attr_array = dev_attr_array;
-
-	return error;
-}
-
-void nvgpu_gr_ecc_stat_remove(struct device *dev,
-			      int is_l2, struct gk20a_ecc_stat *ecc_stat)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int num_hw_units = 0;
-	int num_subunits = 0;
-
-	if (is_l2 == 1)
-		num_hw_units = g->ltc_count;
-	else if (is_l2 == 2) {
-		num_hw_units = g->ltc_count;
-		num_subunits = g->gr.slices_per_ltc;
-	} else
-		num_hw_units = g->gr.tpc_count;
-
-	nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
-}
-
-void nvgpu_ecc_stat_remove(struct device *dev,
-			   int num_hw_units, int num_subunits,
-			   struct gk20a_ecc_stat *ecc_stat)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct device_attribute *dev_attr_array = ecc_stat->attr_array;
-	int hw_unit = 0;
-	int subunit = 0;
-	int element = 0;
-	int num_elements = num_subunits ? num_subunits * num_hw_units :
-		num_hw_units;
-
-	/* Remove sysfs files */
-	if (num_subunits) {
-		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
-			for (subunit = 0; subunit < num_subunits; subunit++) {
-				element = hw_unit * num_subunits + subunit;
-
-				device_remove_file(dev,
-						   &dev_attr_array[element]);
-			}
-		}
-	} else {
-		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
-			device_remove_file(dev, &dev_attr_array[hw_unit]);
-	}
-
-	/* Remove hash table entry */
-	hash_del(&ecc_stat->hash_node);
-
-	/* Free arrays */
-	nvgpu_kfree(g, ecc_stat->counters);
-
-	for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
-		nvgpu_kfree(g, ecc_stat->names[hw_unit]);
-
-	nvgpu_kfree(g, ecc_stat->names);
-	nvgpu_kfree(g, dev_attr_array);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h
deleted file mode 100644
index d29f7bd3..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _NVGPU_PLATFORM_SYSFS_H_
-#define _NVGPU_PLATFORM_SYSFS_H_
-
-#include "gp10b/gr_gp10b.h"
-
-#define ECC_STAT_NAME_MAX_SIZE	100
-
-int nvgpu_gr_ecc_stat_create(struct device *dev,
-			     int is_l2, char *ecc_stat_name,
-			     struct gk20a_ecc_stat *ecc_stat);
-int nvgpu_ecc_stat_create(struct device *dev,
-			  int num_hw_units, int num_subunits,
-			  char *ecc_unit_name, char *ecc_subunit_name,
-			  char *ecc_stat_name,
-			  struct gk20a_ecc_stat *ecc_stat);
-void nvgpu_gr_ecc_stat_remove(struct device *dev,
-			      int is_l2, struct gk20a_ecc_stat *ecc_stat);
-void nvgpu_ecc_stat_remove(struct device *dev,
-			   int num_hw_units, int num_subunits,
-			   struct gk20a_ecc_stat *ecc_stat);
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
deleted file mode 100644
index 9a99b7fe..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * GK20A Platform (SoC) Interface
- *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef _GK20A_PLATFORM_H_
-#define _GK20A_PLATFORM_H_
-
-#include <linux/device.h>
-
-#include <nvgpu/lock.h>
-
-#include "gk20a/gk20a.h"
-
-#define GK20A_CLKS_MAX		4
-
-struct gk20a;
-struct channel_gk20a;
-struct gr_ctx_buffer_desc;
-struct gk20a_scale_profile;
-
-struct secure_page_buffer {
-	void (*destroy)(struct gk20a *, struct secure_page_buffer *);
-	size_t size;
-	dma_addr_t phys;
-	size_t used;
-};
-
-struct gk20a_platform {
-	/* Populated by the gk20a driver before probing the platform. */
-	struct gk20a *g;
-
-	/* Should be populated at probe. */
-	bool can_railgate_init;
-
-	/* Should be populated at probe. */
-	bool can_elpg_init;
-
-	/* Should be populated at probe. */
-	bool has_syncpoints;
-
-	/* channel limit after which to start aggressive sync destroy */
-	unsigned int aggressive_sync_destroy_thresh;
-
-	/* flag to set sync destroy aggressiveness */
-	bool aggressive_sync_destroy;
-
-	/* set if ASPM should be disabled on boot; only makes sense for PCI */
-	bool disable_aspm;
-
-	/* Set if the platform can unify the small/large address spaces. */
-	bool unify_address_spaces;
-
-	/* Clock configuration is stored here. Platform probe is responsible
-	 * for filling this data. */
-	struct clk *clk[GK20A_CLKS_MAX];
-	int num_clks;
-	int maxmin_clk_id;
-
-#ifdef CONFIG_RESET_CONTROLLER
-	/* Reset control for device */
-	struct reset_control *reset_control;
-#endif
-
-	/* Delay before rail gated */
-	int railgate_delay_init;
-
-	/* init value for slowdown factor */
-	u8 ldiv_slowdown_factor_init;
-
-	/* Second Level Clock Gating: true = enable false = disable */
-	bool enable_slcg;
-
-	/* Block Level Clock Gating: true = enable flase = disable */
-	bool enable_blcg;
-
-	/* Engine Level Clock Gating: true = enable flase = disable */
-	bool enable_elcg;
-
-	/* Should be populated at probe. */
-	bool can_slcg;
-
-	/* Should be populated at probe. */
-	bool can_blcg;
-
-	/* Should be populated at probe. */
-	bool can_elcg;
-
-	/* Engine Level Power Gating: true = enable flase = disable */
-	bool enable_elpg;
-
-	/* Adaptative ELPG: true = enable flase = disable */
-	bool enable_aelpg;
-
-	/* PMU Perfmon: true = enable false = disable */
-	bool enable_perfmon;
-
-	/* Memory System Clock Gating: true = enable flase = disable*/
-	bool enable_mscg;
-
-	/* Timeout for per-channel watchdog (in mS) */
-	u32 ch_wdt_timeout_ms;
-
-	/* Disable big page support */
-	bool disable_bigpage;
-
-	/*
-	 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset
-	 * This flag can be used to force CAR reset case instead of rail gate
-	 */
-	bool force_reset_in_do_idle;
-
-	/* guest/vm id, needed for IPA to PA transation */
-	int vmid;
-
-	/* Initialize the platform interface of the gk20a driver.
-	 *
-	 * The platform implementation of this function must
-	 *   - set the power and clocks of the gk20a device to a known
-	 *     state, and
-	 *   - populate the gk20a_platform structure (a pointer to the
-	 *     structure can be obtained by calling gk20a_get_platform).
-	 *
-	 * After this function is finished, the driver will initialise
-	 * pm runtime and genpd based on the platform configuration.
-	 */
-	int (*probe)(struct device *dev);
-
-	/* Second stage initialisation - called once all power management
-	 * initialisations are done.
-	 */
-	int (*late_probe)(struct device *dev);
-
-	/* Remove device after power management has been done
-	 */
-	int (*remove)(struct device *dev);
-
-	/* Poweron platform dependencies */
-	int (*busy)(struct device *dev);
-
-	/* Powerdown platform dependencies */
-	void (*idle)(struct device *dev);
-
-	/* Preallocated VPR buffer for kernel */
-	size_t secure_buffer_size;
-	struct secure_page_buffer secure_buffer;
-
-	/* Device is going to be suspended */
-	int (*suspend)(struct device *);
-
-	/* Called to turn off the device */
-	int (*railgate)(struct device *dev);
-
-	/* Called to turn on the device */
-	int (*unrailgate)(struct device *dev);
-	struct nvgpu_mutex railgate_lock;
-
-	/* Called to check state of device */
-	bool (*is_railgated)(struct device *dev);
-
-	/* get supported frequency list */
-	int (*get_clk_freqs)(struct device *pdev,
-				unsigned long **freqs, int *num_freqs);
-
-	/* clk related supported functions */
-	long (*clk_round_rate)(struct device *dev,
-				unsigned long rate);
-
-	/* Called to register GPCPLL with common clk framework */
-	int (*clk_register)(struct gk20a *g);
-
-	/* platform specific scale init quirks */
-	void (*initscale)(struct device *dev);
-
-	/* Postscale callback is called after frequency change */
-	void (*postscale)(struct device *dev,
-			  unsigned long freq);
-
-	/* Pre callback is called before frequency change */
-	void (*prescale)(struct device *dev);
-
-	/* Devfreq governor name. If scaling is enabled, we request
-	 * this governor to be used in scaling */
-	const char *devfreq_governor;
-
-	/* Quality of service notifier callback. If this is set, the scaling
-	 * routines will register a callback to Qos. Each time we receive
-	 * a new value, this callback gets called.  */
-	int (*qos_notify)(struct notifier_block *nb,
-			  unsigned long n, void *p);
-
-	/* Called as part of debug dump. If the gpu gets hung, this function
-	 * is responsible for delivering all necessary debug data of other
-	 * hw units which may interact with the gpu without direct supervision
-	 * of the CPU.
-	 */
-	void (*dump_platform_dependencies)(struct device *dev);
-
-	/* Defined when SMMU stage-2 is enabled, and we need to use physical
-	 * addresses (not IPA). This is the case for GV100 nvlink in HV+L
-	 * configuration, when dGPU is in pass-through mode.
-	 */
-	u64 (*phys_addr)(struct gk20a *g, u64 ipa);
-
-	/* Callbacks to assert/deassert GPU reset */
-	int (*reset_assert)(struct device *dev);
-	int (*reset_deassert)(struct device *dev);
-	struct clk *clk_reset;
-	struct dvfs_rail *gpu_rail;
-
-	bool virtual_dev;
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-	void *vgpu_priv;
-#endif
-	/* source frequency for ptimer in hz */
-	u32 ptimer_src_freq;
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	bool has_cde;
-#endif
-
-	/* soc name for finding firmware files */
-	const char *soc_name;
-
-	/* false if vidmem aperture actually points to sysmem */
-	bool honors_aperture;
-	/* unified or split memory with separate vidmem? */
-	bool unified_memory;
-
-	/*
-	 * DMA mask for Linux (both coh and non-coh). If not set defaults to
-	 * 0x3ffffffff (i.e a 34 bit mask).
-	 */
-	u64 dma_mask;
-
-	/* minimum supported VBIOS version */
-	u32 vbios_min_version;
-
-	/* true if we run preos microcode on this board */
-	bool run_preos;
-
-	/* true if we need to program sw threshold for
-         * power limits
-	 */
-	bool hardcode_sw_threshold;
-
-	/* i2c device index, port and address for INA3221 */
-	u32 ina3221_dcb_index;
-	u32 ina3221_i2c_address;
-	u32 ina3221_i2c_port;
-
-	/* stream id to use */
-	u32 ltc_streamid;
-
-	/* scaling rate */
-	unsigned long cached_rate;
-};
-
-static inline struct gk20a_platform *gk20a_get_platform(
-		struct device *dev)
-{
-	return (struct gk20a_platform *)dev_get_drvdata(dev);
-}
-
-#ifdef CONFIG_TEGRA_GK20A
-extern struct gk20a_platform gm20b_tegra_platform;
-extern struct gk20a_platform gp10b_tegra_platform;
-extern struct gk20a_platform gv11b_tegra_platform;
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-extern struct gk20a_platform vgpu_tegra_platform;
-extern struct gk20a_platform gv11b_vgpu_tegra_platform;
-#endif
-#endif
-
-int gk20a_tegra_busy(struct device *dev);
-void gk20a_tegra_idle(struct device *dev);
-void gk20a_tegra_debug_dump(struct device *pdev);
-
-static inline struct gk20a *get_gk20a(struct device *dev)
-{
-	return gk20a_get_platform(dev)->g;
-}
-static inline struct gk20a *gk20a_from_dev(struct device *dev)
-{
-	if (!dev)
-		return NULL;
-
-	return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
-}
-static inline bool gk20a_gpu_is_virtual(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-
-	return platform->virtual_dev;
-}
-
-static inline int support_gk20a_pmu(struct device *dev)
-{
-	if (IS_ENABLED(CONFIG_GK20A_PMU)) {
-		/* gPMU is not supported for vgpu */
-		return !gk20a_gpu_is_virtual(dev);
-	}
-
-	return 0;
-}
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
deleted file mode 100644
index af55e5b6..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * GK20A Tegra Platform Interface
- *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <linux/clkdev.h>
-#include <linux/of_platform.h>
-#include <linux/debugfs.h>
-#include <linux/platform_data/tegra_edp.h>
-#include <linux/delay.h>
-#include <uapi/linux/nvgpu.h>
-#include <linux/dma-buf.h>
-#include <linux/dma-attrs.h>
-#include <linux/nvmap.h>
-#include <linux/reset.h>
-#if defined(CONFIG_TEGRA_DVFS)
-#include <linux/tegra_soctherm.h>
-#endif
-#include <linux/platform/tegra/common.h>
-#include <linux/platform/tegra/mc.h>
-#include <linux/clk/tegra.h>
-#if defined(CONFIG_COMMON_CLK)
-#include <soc/tegra/tegra-dvfs.h>
-#endif
-#ifdef CONFIG_TEGRA_BWMGR
-#include <linux/platform/tegra/emc_bwmgr.h>
-#endif
-
-#include <linux/platform/tegra/tegra_emc.h>
-#include <soc/tegra/chip-id.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/nvhost.h>
-
-#include <nvgpu/linux/dma.h>
-
-#include "gk20a/gk20a.h"
-#include "gm20b/clk_gm20b.h"
-
-#include "scale.h"
-#include "platform_gk20a.h"
-#include "clk.h"
-#include "os_linux.h"
-
-#include "../../../arch/arm/mach-tegra/iomap.h"
-#include <soc/tegra/pmc.h>
-
-#define TEGRA_GK20A_BW_PER_FREQ 32
-#define TEGRA_GM20B_BW_PER_FREQ 64
-#define TEGRA_DDR3_BW_PER_FREQ 16
-#define TEGRA_DDR4_BW_PER_FREQ 16
-#define MC_CLIENT_GPU 34
-#define PMC_GPU_RG_CNTRL_0		0x2d4
-
-#ifdef CONFIG_COMMON_CLK
-#define GPU_RAIL_NAME "vdd-gpu"
-#else
-#define GPU_RAIL_NAME "vdd_gpu"
-#endif
-
-extern struct device tegra_vpr_dev;
-
-#ifdef CONFIG_TEGRA_BWMGR
-struct gk20a_emc_params {
-	unsigned long bw_ratio;
-	unsigned long freq_last_set;
-	struct tegra_bwmgr_client *bwmgr_cl;
-};
-#else
-struct gk20a_emc_params {
-	unsigned long bw_ratio;
-	unsigned long freq_last_set;
-};
-#endif
-
-#define MHZ_TO_HZ(x) ((x) * 1000000)
-#define HZ_TO_MHZ(x) ((x) / 1000000)
-
-static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
-				       struct secure_page_buffer *secure_buffer)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
-	dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
-			(void *)(uintptr_t)secure_buffer->phys,
-			secure_buffer->phys, __DMA_ATTR(attrs));
-
-	secure_buffer->destroy = NULL;
-}
-
-static int gk20a_tegra_secure_alloc(struct gk20a *g,
-			     struct gr_ctx_buffer_desc *desc,
-			     size_t size)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
-	dma_addr_t phys;
-	struct sg_table *sgt;
-	struct page *page;
-	int err = 0;
-	size_t aligned_size = PAGE_ALIGN(size);
-
-	if (nvgpu_mem_is_valid(&desc->mem))
-		return 0;
-
-	/* We ran out of preallocated memory */
-	if (secure_buffer->used + aligned_size > secure_buffer->size) {
-		nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
-				size, secure_buffer->used, secure_buffer->size);
-		return -ENOMEM;
-	}
-
-	phys = secure_buffer->phys + secure_buffer->used;
-
-	sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
-	if (!sgt) {
-		nvgpu_err(platform->g, "failed to allocate memory");
-		return -ENOMEM;
-	}
-	err = sg_alloc_table(sgt, 1, GFP_KERNEL);
-	if (err) {
-		nvgpu_err(platform->g, "failed to allocate sg_table");
-		goto fail_sgt;
-	}
-	page = phys_to_page(phys);
-	sg_set_page(sgt->sgl, page, size, 0);
-	/* This bypasses SMMU for VPR during gmmu_map. */
-	sg_dma_address(sgt->sgl) = 0;
-
-	desc->destroy = NULL;
-
-	desc->mem.priv.sgt = sgt;
-	desc->mem.size = size;
-	desc->mem.aperture = APERTURE_SYSMEM;
-
-	secure_buffer->used += aligned_size;
-
-	return err;
-
-fail_sgt:
-	nvgpu_kfree(platform->g, sgt);
-	return err;
-}
-
-/*
- * gk20a_tegra_get_emc_rate()
- *
- * This function returns the minimum emc clock based on gpu frequency
- */
-
-static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
-				struct gk20a_emc_params *emc_params)
-{
-	unsigned long gpu_freq, gpu_fmax_at_vmin;
-	unsigned long emc_rate, emc_scale;
-
-	gpu_freq = clk_get_rate(g->clk.tegra_clk);
-	gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
-		clk_get_parent(g->clk.tegra_clk));
-
-	/* When scaling emc, account for the gpu load when the
-	 * gpu frequency is less than or equal to fmax@vmin. */
-	if (gpu_freq <= gpu_fmax_at_vmin)
-		emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
-	else
-		emc_scale = g->emc3d_ratio;
-
-	emc_rate =
-		(HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
-
-	return MHZ_TO_HZ(emc_rate);
-}
-
-/*
- * gk20a_tegra_prescale(profile, freq)
- *
- * This function informs EDP about changed constraints.
- */
-
-static void gk20a_tegra_prescale(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	u32 avg = 0;
-
-	nvgpu_pmu_load_norm(g, &avg);
-	tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
-}
-
-/*
- * gk20a_tegra_calibrate_emc()
- *
- */
-
-static void gk20a_tegra_calibrate_emc(struct device *dev,
-			       struct gk20a_emc_params *emc_params)
-{
-	enum tegra_chipid cid = tegra_get_chip_id();
-	long gpu_bw, emc_bw;
-
-	/* store gpu bw based on soc */
-	switch (cid) {
-	case TEGRA210:
-		gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
-		break;
-	case TEGRA124:
-	case TEGRA132:
-		gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
-		break;
-	default:
-		gpu_bw = 0;
-		break;
-	}
-
-	/* TODO detect DDR type.
-	 * Okay for now since DDR3 and DDR4 have the same BW ratio */
-	emc_bw = TEGRA_DDR3_BW_PER_FREQ;
-
-	/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
-	 *   NOTE the ratio must come out as an integer */
-	emc_params->bw_ratio = (gpu_bw / emc_bw);
-}
-
-#ifdef CONFIG_TEGRA_BWMGR
-#ifdef CONFIG_TEGRA_DVFS
-static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
-{
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a_emc_params *params;
-	unsigned long rate;
-
-	if (!profile || !profile->private_data)
-		return;
-
-	params = (struct gk20a_emc_params *)profile->private_data;
-	rate = (enb) ? params->freq_last_set : 0;
-	tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
-}
-#endif
-
-static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a_emc_params *emc_params;
-	unsigned long emc_rate;
-
-	if (!profile || !profile->private_data)
-		return;
-
-	emc_params = profile->private_data;
-	emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
-
-	if (emc_rate > tegra_bwmgr_get_max_emc_rate())
-		emc_rate = tegra_bwmgr_get_max_emc_rate();
-
-	emc_params->freq_last_set = emc_rate;
-	if (platform->is_railgated && platform->is_railgated(dev))
-		return;
-
-	tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
-			TEGRA_BWMGR_SET_EMC_FLOOR);
-
-}
-
-#endif
-
-#if defined(CONFIG_TEGRA_DVFS)
-/*
- * gk20a_tegra_is_railgated()
- *
- * Check status of gk20a power rail
- */
-
-static bool gk20a_tegra_is_railgated(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	bool ret = false;
-
-	if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-		ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
-
-	return ret;
-}
-
-/*
- * gm20b_tegra_railgate()
- *
- * Gate (disable) gm20b power rail
- */
-
-static int gm20b_tegra_railgate(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int ret = 0;
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
-	    !tegra_dvfs_is_rail_up(platform->gpu_rail))
-		return 0;
-
-	tegra_mc_flush(MC_CLIENT_GPU);
-
-	udelay(10);
-
-	/* enable clamp */
-	tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
-	tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
-
-	udelay(10);
-
-	platform->reset_assert(dev);
-
-	udelay(10);
-
-	/*
-	 * GPCPLL is already disabled before entering this function; reference
-	 * clocks are enabled until now - disable them just before rail gating
-	 */
-	clk_disable_unprepare(platform->clk_reset);
-	clk_disable_unprepare(platform->clk[0]);
-	clk_disable_unprepare(platform->clk[1]);
-	if (platform->clk[3])
-		clk_disable_unprepare(platform->clk[3]);
-
-	udelay(10);
-
-	tegra_soctherm_gpu_tsens_invalidate(1);
-
-	if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
-		ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
-		if (ret)
-			goto err_power_off;
-	} else
-		pr_info("No GPU regulator?\n");
-
-#ifdef CONFIG_TEGRA_BWMGR
-	gm20b_bwmgr_set_rate(platform, false);
-#endif
-
-	return 0;
-
-err_power_off:
-	nvgpu_err(platform->g, "Could not railgate GPU");
-	return ret;
-}
-
-
-/*
- * gm20b_tegra_unrailgate()
- *
- * Ungate (enable) gm20b power rail
- */
-
-static int gm20b_tegra_unrailgate(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = platform->g;
-	int ret = 0;
-	bool first = false;
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-		return 0;
-
-	ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
-	if (ret)
-		return ret;
-
-#ifdef CONFIG_TEGRA_BWMGR
-	gm20b_bwmgr_set_rate(platform, true);
-#endif
-
-	tegra_soctherm_gpu_tsens_invalidate(0);
-
-	if (!platform->clk_reset) {
-		platform->clk_reset = clk_get(dev, "gpu_gate");
-		if (IS_ERR(platform->clk_reset)) {
-			nvgpu_err(g, "fail to get gpu reset clk");
-			goto err_clk_on;
-		}
-	}
-
-	if (!first) {
-		ret = clk_prepare_enable(platform->clk_reset);
-		if (ret) {
-			nvgpu_err(g, "could not turn on gpu_gate");
-			goto err_clk_on;
-		}
-
-		ret = clk_prepare_enable(platform->clk[0]);
-		if (ret) {
-			nvgpu_err(g, "could not turn on gpu pll");
-			goto err_clk_on;
-		}
-		ret = clk_prepare_enable(platform->clk[1]);
-		if (ret) {
-			nvgpu_err(g, "could not turn on pwr clock");
-			goto err_clk_on;
-		}
-
-		if (platform->clk[3]) {
-			ret = clk_prepare_enable(platform->clk[3]);
-			if (ret) {
-				nvgpu_err(g, "could not turn on fuse clock");
-				goto err_clk_on;
-			}
-		}
-	}
-
-	udelay(10);
-
-	platform->reset_assert(dev);
-
-	udelay(10);
-
-	tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
-	tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
-
-	udelay(10);
-
-	clk_disable(platform->clk_reset);
-	platform->reset_deassert(dev);
-	clk_enable(platform->clk_reset);
-
-	/* Flush MC after boot/railgate/SC7 */
-	tegra_mc_flush(MC_CLIENT_GPU);
-
-	udelay(10);
-
-	tegra_mc_flush_done(MC_CLIENT_GPU);
-
-	udelay(10);
-
-	return 0;
-
-err_clk_on:
-	tegra_dvfs_rail_power_down(platform->gpu_rail);
-
-	return ret;
-}
-#endif
-
-
-static struct {
-	char *name;
-	unsigned long default_rate;
-} tegra_gk20a_clocks[] = {
-	{"gpu_ref", UINT_MAX},
-	{"pll_p_out5", 204000000},
-	{"emc", UINT_MAX},
-	{"fuse", UINT_MAX},
-};
-
-
-
-/*
- * gk20a_tegra_get_clocks()
- *
- * This function finds clocks in tegra platform and populates
- * the clock information to gk20a platform data.
- */
-
-static int gk20a_tegra_get_clocks(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	char devname[16];
-	unsigned int i;
-	int ret = 0;
-
-	BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
-
-	snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
-
-	platform->num_clks = 0;
-	for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
-		long rate = tegra_gk20a_clocks[i].default_rate;
-		struct clk *c;
-
-		c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
-		if (IS_ERR(c)) {
-			ret = PTR_ERR(c);
-			goto err_get_clock;
-		}
-		rate = clk_round_rate(c, rate);
-		clk_set_rate(c, rate);
-		platform->clk[i] = c;
-		if (i == 0)
-			platform->cached_rate = rate;
-	}
-	platform->num_clks = i;
-
-	return 0;
-
-err_get_clock:
-
-	while (i--)
-		clk_put(platform->clk[i]);
-	return ret;
-}
-
-#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
-static int gm20b_tegra_reset_assert(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-
-	if (!platform->reset_control) {
-		WARN(1, "Reset control not initialized\n");
-		return -ENOSYS;
-	}
-
-	return reset_control_assert(platform->reset_control);
-}
-
-static int gm20b_tegra_reset_deassert(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-
-	if (!platform->reset_control) {
-		WARN(1, "Reset control not initialized\n");
-		return -ENOSYS;
-	}
-
-	return reset_control_deassert(platform->reset_control);
-}
-#endif
-
-static void gk20a_tegra_scale_init(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a_emc_params *emc_params;
-	struct gk20a *g = platform->g;
-
-	if (!profile)
-		return;
-
-	if (profile->private_data)
-		return;
-
-	emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
-	if (!emc_params)
-		return;
-
-	emc_params->freq_last_set = -1;
-	gk20a_tegra_calibrate_emc(dev, emc_params);
-
-#ifdef CONFIG_TEGRA_BWMGR
-	emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
-	if (!emc_params->bwmgr_cl) {
-		nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
-		return;
-	}
-#endif
-
-	profile->private_data = emc_params;
-}
-
-static void gk20a_tegra_scale_exit(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a_emc_params *emc_params;
-
-	if (!profile)
-		return;
-
-	emc_params = profile->private_data;
-#ifdef CONFIG_TEGRA_BWMGR
-	tegra_bwmgr_unregister(emc_params->bwmgr_cl);
-#endif
-
-	nvgpu_kfree(platform->g, profile->private_data);
-}
-
-void gk20a_tegra_debug_dump(struct device *dev)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	if (g->nvhost_dev)
-		nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
-#endif
-}
-
-int gk20a_tegra_busy(struct device *dev)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	if (g->nvhost_dev)
-		return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
-#endif
-	return 0;
-}
-
-void gk20a_tegra_idle(struct device *dev)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	if (g->nvhost_dev)
-		nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
-#endif
-}
-
-int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
-{
-	struct gk20a *g = platform->g;
-	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
-	DEFINE_DMA_ATTRS(attrs);
-	dma_addr_t iova;
-
-	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
-		return 0;
-
-	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
-	(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
-				      GFP_KERNEL, __DMA_ATTR(attrs));
-	/* Some platforms disable VPR. In that case VPR allocations always
-	 * fail. Just disable VPR usage in nvgpu in that case. */
-	if (dma_mapping_error(&tegra_vpr_dev, iova))
-		return 0;
-
-	secure_buffer->size = platform->secure_buffer_size;
-	secure_buffer->phys = iova;
-	secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
-
-	g->ops.secure_alloc = gk20a_tegra_secure_alloc;
-	__nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
-
-	return 0;
-}
-
-#ifdef CONFIG_COMMON_CLK
-static struct clk *gk20a_clk_get(struct gk20a *g)
-{
-	if (!g->clk.tegra_clk) {
-		struct clk *clk;
-		char clk_dev_id[32];
-		struct device *dev = dev_from_gk20a(g);
-
-		snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
-
-		clk = clk_get_sys(clk_dev_id, "gpu");
-		if (IS_ERR(clk)) {
-			nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
-				  clk_dev_id);
-			return NULL;
-		}
-		g->clk.tegra_clk = clk;
-	}
-
-	return g->clk.tegra_clk;
-}
-
-static int gm20b_clk_prepare_ops(struct clk_hw *hw)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	return gm20b_clk_prepare(clk);
-}
-
-static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	gm20b_clk_unprepare(clk);
-}
-
-static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	return gm20b_clk_is_prepared(clk);
-}
-
-static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	return gm20b_recalc_rate(clk, parent_rate);
-}
-
-static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
-				 unsigned long parent_rate)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
-}
-
-static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
-			     unsigned long *parent_rate)
-{
-	struct clk_gk20a *clk = to_clk_gk20a(hw);
-	return gm20b_round_rate(clk, rate, parent_rate);
-}
-
-static const struct clk_ops gm20b_clk_ops = {
-	.prepare = gm20b_clk_prepare_ops,
-	.unprepare = gm20b_clk_unprepare_ops,
-	.is_prepared = gm20b_clk_is_prepared_ops,
-	.recalc_rate = gm20b_recalc_rate_ops,
-	.set_rate = gm20b_gpcclk_set_rate_ops,
-	.round_rate = gm20b_round_rate_ops,
-};
-
-static int gm20b_register_gpcclk(struct gk20a *g)
-{
-	const char *parent_name = "pllg_ref";
-	struct clk_gk20a *clk = &g->clk;
-	struct clk_init_data init;
-	struct clk *c;
-	int err = 0;
-
-	/* make sure the clock is available */
-	if (!gk20a_clk_get(g))
-		return -ENOSYS;
-
-	err = gm20b_init_clk_setup_sw(g);
-	if (err)
-		return err;
-
-	init.name = "gpcclk";
-	init.ops = &gm20b_clk_ops;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
-	init.flags = 0;
-
-	/* Data in .init is copied by clk_register(), so stack variable OK */
-	clk->hw.init = &init;
-	c = clk_register(dev_from_gk20a(g), &clk->hw);
-	if (IS_ERR(c)) {
-		nvgpu_err(g, "Failed to register GPCPLL clock");
-		return -EINVAL;
-	}
-
-	clk->g = g;
-	clk_register_clkdev(c, "gpcclk", "gpcclk");
-
-	return err;
-}
-#endif /* CONFIG_COMMON_CLK */
-
-static int gk20a_tegra_probe(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct device_node *np = dev->of_node;
-	bool joint_xpu_rail = false;
-	int ret;
-	struct gk20a *g = platform->g;
-
-#ifdef CONFIG_COMMON_CLK
-	/* DVFS is not guaranteed to be initialized at the time of probe on
-	 * kernels with Common Clock Framework enabled.
-	 */
-	if (!platform->gpu_rail) {
-		platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
-		if (!platform->gpu_rail) {
-			nvgpu_log_info(g, "deferring probe no gpu_rail");
-			return -EPROBE_DEFER;
-		}
-	}
-
-	if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
-		nvgpu_log_info(g, "deferring probe gpu_rail not ready");
-		return -EPROBE_DEFER;
-	}
-#endif
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	ret = nvgpu_get_nvhost_dev(platform->g);
-	if (ret)
-		return ret;
-#endif
-
-#ifdef CONFIG_OF
-	joint_xpu_rail = of_property_read_bool(of_chosen,
-				"nvidia,tegra-joint_xpu_rail");
-#endif
-
-	if (joint_xpu_rail) {
-		nvgpu_log_info(g, "XPU rails are joint\n");
-		platform->g->can_railgate = false;
-	}
-
-	platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
-	if (tegra_get_chip_id() == TEGRA210) {
-		/* WAR for bug 1547668: Disable railgating and scaling
-		   irrespective of platform data if the rework was not made. */
-		np = of_find_node_by_path("/gpu-dvfs-rework");
-		if (!(np && of_device_is_available(np))) {
-			platform->devfreq_governor = "";
-			dev_warn(dev, "board does not support scaling");
-		}
-		platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
-		if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
-			platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
-	}
-
-	if (tegra_get_chip_id() == TEGRA132)
-		platform->soc_name = "tegra13x";
-
-	gk20a_tegra_get_clocks(dev);
-	nvgpu_linux_init_clk_support(platform->g);
-	ret = gk20a_tegra_init_secure_alloc(platform);
-	if (ret)
-		return ret;
-
-	if (platform->clk_register) {
-		ret = platform->clk_register(platform->g);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int gk20a_tegra_late_probe(struct device *dev)
-{
-	return 0;
-}
-
-static int gk20a_tegra_remove(struct device *dev)
-{
-	/* deinitialise tegra specific scaling quirks */
-	gk20a_tegra_scale_exit(dev);
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	nvgpu_free_nvhost_dev(get_gk20a(dev));
-#endif
-
-	return 0;
-}
-
-static int gk20a_tegra_suspend(struct device *dev)
-{
-	tegra_edp_notify_gpu_load(0, 0);
-	return 0;
-}
-
-#if defined(CONFIG_COMMON_CLK)
-static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	/* make sure the clock is available */
-	if (!gk20a_clk_get(g))
-		return rate;
-
-	return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
-}
-
-static int gk20a_clk_get_freqs(struct device *dev,
-				unsigned long **freqs, int *num_freqs)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	/* make sure the clock is available */
-	if (!gk20a_clk_get(g))
-		return -ENOSYS;
-
-	return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
-				freqs, num_freqs);
-}
-#endif
-
-struct gk20a_platform gm20b_tegra_platform = {
-	.has_syncpoints = true,
-	.aggressive_sync_destroy_thresh = 64,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-	.can_railgate_init	= true,
-	.can_elpg_init          = true,
-	.enable_slcg            = true,
-	.enable_blcg            = true,
-	.enable_elcg            = true,
-	.can_slcg               = true,
-	.can_blcg               = true,
-	.can_elcg               = true,
-	.enable_elpg            = true,
-	.enable_aelpg           = true,
-	.enable_perfmon         = true,
-	.ptimer_src_freq	= 19200000,
-
-	.force_reset_in_do_idle = false,
-
-	.ch_wdt_timeout_ms = 5000,
-
-	.probe = gk20a_tegra_probe,
-	.late_probe = gk20a_tegra_late_probe,
-	.remove = gk20a_tegra_remove,
-	/* power management callbacks */
-	.suspend = gk20a_tegra_suspend,
-
-#if defined(CONFIG_TEGRA_DVFS)
-	.railgate = gm20b_tegra_railgate,
-	.unrailgate = gm20b_tegra_unrailgate,
-	.is_railgated = gk20a_tegra_is_railgated,
-#endif
-
-	.busy = gk20a_tegra_busy,
-	.idle = gk20a_tegra_idle,
-
-#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
-	.reset_assert = gm20b_tegra_reset_assert,
-	.reset_deassert = gm20b_tegra_reset_deassert,
-#else
-	.reset_assert = gk20a_tegra_reset_assert,
-	.reset_deassert = gk20a_tegra_reset_deassert,
-#endif
-
-#if defined(CONFIG_COMMON_CLK)
-	.clk_round_rate = gk20a_round_clk_rate,
-	.get_clk_freqs = gk20a_clk_get_freqs,
-#endif
-
-#ifdef CONFIG_COMMON_CLK
-	.clk_register = gm20b_register_gpcclk,
-#endif
-
-	/* frequency scaling configuration */
-	.initscale = gk20a_tegra_scale_init,
-	.prescale = gk20a_tegra_prescale,
-#ifdef CONFIG_TEGRA_BWMGR
-	.postscale = gm20b_tegra_postscale,
-#endif
-	.devfreq_governor = "nvhost_podgov",
-	.qos_notify = gk20a_scale_qos_notify,
-
-	.dump_platform_dependencies = gk20a_tegra_debug_dump,
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	.has_cde = true,
-#endif
-
-	.soc_name = "tegra21x",
-
-	.unified_memory = true,
-	.dma_mask = DMA_BIT_MASK(34),
-
-	.secure_buffer_size = 335872,
-};
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
deleted file mode 100644
index f7d50406..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * GK20A Platform (SoC) Interface
- *
- * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
-#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
-
-struct gk20a_platform;
-
-int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
deleted file mode 100644
index fce16653..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- * GP10B Tegra Platform Interface
- *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <linux/of_platform.h>
-#include <linux/debugfs.h>
-#include <linux/dma-buf.h>
-#include <linux/nvmap.h>
-#include <linux/reset.h>
-#include <linux/platform/tegra/emc_bwmgr.h>
-
-#include <uapi/linux/nvgpu.h>
-
-#include <soc/tegra/tegra_bpmp.h>
-#include <soc/tegra/tegra_powergate.h>
-#include <soc/tegra/tegra-bpmp-dvfs.h>
-
-#include <dt-bindings/memory/tegra-swgroup.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/hashtable.h>
-#include <nvgpu/nvhost.h>
-
-#include "os_linux.h"
-
-#include "clk.h"
-
-#include "gk20a/gk20a.h"
-
-#include "platform_gk20a.h"
-#include "platform_ecc_sysfs.h"
-#include "platform_gk20a_tegra.h"
-#include "gp10b/platform_gp10b.h"
-#include "platform_gp10b_tegra.h"
-#include "scale.h"
-
-/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
-#define GP10B_FREQ_SELECT_STEP	8
-/* Max number of freq supported in h/w */
-#define GP10B_MAX_SUPPORTED_FREQS 120
-static unsigned long
-gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
-
-#define TEGRA_GP10B_BW_PER_FREQ 64
-#define TEGRA_DDR4_BW_PER_FREQ 16
-
-#define EMC_BW_RATIO  (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
-
-#define GPCCLK_INIT_RATE 1000000000
-
-static struct {
-	char *name;
-	unsigned long default_rate;
-} tegra_gp10b_clocks[] = {
-	{"gpu", GPCCLK_INIT_RATE},
-	{"gpu_sys", 204000000} };
-
-/*
- * gp10b_tegra_get_clocks()
- *
- * This function finds clocks in tegra platform and populates
- * the clock information to gp10b platform data.
- */
-
-int gp10b_tegra_get_clocks(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	unsigned int i;
-
-	platform->num_clks = 0;
-	for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
-		long rate = tegra_gp10b_clocks[i].default_rate;
-		struct clk *c;
-
-		c = clk_get(dev, tegra_gp10b_clocks[i].name);
-		if (IS_ERR(c)) {
-			nvgpu_err(platform->g, "cannot get clock %s",
-					tegra_gp10b_clocks[i].name);
-		} else {
-			clk_set_rate(c, rate);
-			platform->clk[i] = c;
-			if (i == 0)
-				platform->cached_rate = rate;
-		}
-	}
-	platform->num_clks = i;
-
-	if (platform->clk[0]) {
-		i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
-					       tegra_gp10b_clocks[0].name);
-		if (i > 0)
-			platform->maxmin_clk_id = i;
-	}
-
-	return 0;
-}
-
-void gp10b_tegra_scale_init(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct tegra_bwmgr_client *bwmgr_handle;
-
-	if (!profile)
-		return;
-
-	if ((struct tegra_bwmgr_client *)profile->private_data)
-		return;
-
-	bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
-	if (!bwmgr_handle)
-		return;
-
-	profile->private_data = (void *)bwmgr_handle;
-}
-
-static void gp10b_tegra_scale_exit(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-
-	if (profile)
-		tegra_bwmgr_unregister(
-			(struct tegra_bwmgr_client *)profile->private_data);
-}
-
-static int gp10b_tegra_probe(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	int ret;
-
-	ret = nvgpu_get_nvhost_dev(platform->g);
-	if (ret)
-		return ret;
-#endif
-
-	ret = gk20a_tegra_init_secure_alloc(platform);
-	if (ret)
-		return ret;
-
-	platform->disable_bigpage = !device_is_iommuable(dev);
-
-	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
-		= false;
-	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
-		= false;
-
-	platform->g->gr.ctx_vars.force_preemption_gfxp = false;
-	platform->g->gr.ctx_vars.force_preemption_cilp = false;
-
-	gp10b_tegra_get_clocks(dev);
-	nvgpu_linux_init_clk_support(platform->g);
-
-	return 0;
-}
-
-static int gp10b_tegra_late_probe(struct device *dev)
-{
-	return 0;
-}
-
-static int gp10b_tegra_remove(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	if (g->ops.gr.remove_gr_sysfs)
-		g->ops.gr.remove_gr_sysfs(g);
-
-	/* deinitialise tegra specific scaling quirks */
-	gp10b_tegra_scale_exit(dev);
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	nvgpu_free_nvhost_dev(get_gk20a(dev));
-#endif
-
-	return 0;
-}
-
-static bool gp10b_tegra_is_railgated(struct device *dev)
-{
-	bool ret = false;
-
-	if (tegra_bpmp_running())
-		ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
-
-	return ret;
-}
-
-static int gp10b_tegra_railgate(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-
-	/* remove emc frequency floor */
-	if (profile)
-		tegra_bwmgr_set_emc(
-			(struct tegra_bwmgr_client *)profile->private_data,
-			0, TEGRA_BWMGR_SET_EMC_FLOOR);
-
-	if (tegra_bpmp_running() &&
-	    tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
-		int i;
-		for (i = 0; i < platform->num_clks; i++) {
-			if (platform->clk[i])
-				clk_disable_unprepare(platform->clk[i]);
-		}
-		tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
-	}
-	return 0;
-}
-
-static int gp10b_tegra_unrailgate(struct device *dev)
-{
-	int ret = 0;
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-
-	if (tegra_bpmp_running()) {
-		int i;
-		ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
-		for (i = 0; i < platform->num_clks; i++) {
-			if (platform->clk[i])
-				clk_prepare_enable(platform->clk[i]);
-		}
-	}
-
-	/* to start with set emc frequency floor to max rate*/
-	if (profile)
-		tegra_bwmgr_set_emc(
-			(struct tegra_bwmgr_client *)profile->private_data,
-			tegra_bwmgr_get_max_emc_rate(),
-			TEGRA_BWMGR_SET_EMC_FLOOR);
-	return ret;
-}
-
-static int gp10b_tegra_suspend(struct device *dev)
-{
-	return 0;
-}
-
-int gp10b_tegra_reset_assert(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	int ret = 0;
-
-	if (!platform->reset_control)
-		return -EINVAL;
-
-	ret = reset_control_assert(platform->reset_control);
-
-	return ret;
-}
-
-int gp10b_tegra_reset_deassert(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	int ret = 0;
-
-	if (!platform->reset_control)
-		return -EINVAL;
-
-	ret = reset_control_deassert(platform->reset_control);
-
-	return ret;
-}
-
-void gp10b_tegra_prescale(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	u32 avg = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_pmu_load_norm(g, &avg);
-
-	nvgpu_log_fn(g, "done");
-}
-
-void gp10b_tegra_postscale(struct device *pdev,
-					unsigned long freq)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(pdev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a *g = get_gk20a(pdev);
-	unsigned long emc_rate;
-
-	nvgpu_log_fn(g, " ");
-	if (profile && !platform->is_railgated(pdev)) {
-		unsigned long emc_scale;
-
-		if (freq <= gp10b_freq_table[0])
-			emc_scale = 0;
-		else
-			emc_scale = g->emc3d_ratio;
-
-		emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
-
-		if (emc_rate > tegra_bwmgr_get_max_emc_rate())
-			emc_rate = tegra_bwmgr_get_max_emc_rate();
-
-		tegra_bwmgr_set_emc(
-			(struct tegra_bwmgr_client *)profile->private_data,
-			emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
-	}
-	nvgpu_log_fn(g, "done");
-}
-
-long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile = g->scale_profile;
-	unsigned long *freq_table = profile->devfreq_profile.freq_table;
-	int max_states = profile->devfreq_profile.max_state;
-	int i;
-
-	for (i = 0; i < max_states; ++i)
-		if (freq_table[i] >= rate)
-			return freq_table[i];
-
-	return freq_table[max_states - 1];
-}
-
-int gp10b_clk_get_freqs(struct device *dev,
-				unsigned long **freqs, int *num_freqs)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-	unsigned long max_rate;
-	unsigned long new_rate = 0, prev_rate = 0;
-	int i = 0, freq_counter = 0;
-
-	max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
-
-	/*
-	 * Walk the h/w frequency table and only select
-	 * GP10B_FREQ_SELECT_STEP'th frequencies and
-	 * add MAX freq to last
-	 */
-	for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
-		prev_rate = new_rate;
-		new_rate = clk_round_rate(platform->clk[0], prev_rate + 1);
-
-		if (i % GP10B_FREQ_SELECT_STEP == 0 ||
-				new_rate == max_rate) {
-			gp10b_freq_table[freq_counter++] = new_rate;
-
-			if (new_rate == max_rate)
-				break;
-		}
-	}
-
-	WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS);
-
-	/* Fill freq table */
-	*freqs = gp10b_freq_table;
-	*num_freqs = freq_counter;
-
-	nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
-				gp10b_freq_table[0], max_rate, *num_freqs);
-
-	return 0;
-}
-
-struct gk20a_platform gp10b_tegra_platform = {
-	.has_syncpoints = true,
-
-	/* power management configuration */
-	.railgate_delay_init	= 500,
-
-	/* ldiv slowdown factor */
-	.ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
-
-	/* power management configuration */
-	.can_railgate_init	= true,
-	.enable_elpg            = true,
-	.can_elpg_init          = true,
-	.enable_blcg		= true,
-	.enable_slcg		= true,
-	.enable_elcg		= true,
-	.can_slcg               = true,
-	.can_blcg               = true,
-	.can_elcg               = true,
-	.enable_aelpg       = true,
-	.enable_perfmon         = true,
-
-	/* ptimer src frequency in hz*/
-	.ptimer_src_freq	= 31250000,
-
-	.ch_wdt_timeout_ms = 5000,
-
-	.probe = gp10b_tegra_probe,
-	.late_probe = gp10b_tegra_late_probe,
-	.remove = gp10b_tegra_remove,
-
-	/* power management callbacks */
-	.suspend = gp10b_tegra_suspend,
-	.railgate = gp10b_tegra_railgate,
-	.unrailgate = gp10b_tegra_unrailgate,
-	.is_railgated = gp10b_tegra_is_railgated,
-
-	.busy = gk20a_tegra_busy,
-	.idle = gk20a_tegra_idle,
-
-	.dump_platform_dependencies = gk20a_tegra_debug_dump,
-
-#ifdef CONFIG_NVGPU_SUPPORT_CDE
-	.has_cde = true,
-#endif
-
-	.clk_round_rate = gp10b_round_clk_rate,
-	.get_clk_freqs = gp10b_clk_get_freqs,
-
-	/* frequency scaling configuration */
-	.initscale = gp10b_tegra_scale_init,
-	.prescale = gp10b_tegra_prescale,
-	.postscale = gp10b_tegra_postscale,
-	.devfreq_governor = "nvhost_podgov",
-
-	.qos_notify = gk20a_scale_qos_notify,
-
-	.reset_assert = gp10b_tegra_reset_assert,
-	.reset_deassert = gp10b_tegra_reset_deassert,
-
-	.force_reset_in_do_idle = false,
-
-	.soc_name = "tegra18x",
-
-	.unified_memory = true,
-	.dma_mask = DMA_BIT_MASK(36),
-
-	.ltc_streamid = TEGRA_SID_GPUB,
-
-	.secure_buffer_size = 401408,
-};
-
-void gr_gp10b_create_sysfs(struct gk20a *g)
-{
-	int error = 0;
-	struct device *dev = dev_from_gk20a(g);
-
-	/* This stat creation function is called on GR init. GR can get
-	   initialized multiple times but we only need to create the ECC
-	   stats once. Therefore, add the following check to avoid
-	   creating duplicate stat sysfs nodes. */
-	if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
-		return;
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_lrf_ecc_single_err_count",
-				&g->ecc.gr.sm_lrf_single_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_lrf_ecc_double_err_count",
-				&g->ecc.gr.sm_lrf_double_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_shm_ecc_sec_count",
-				&g->ecc.gr.sm_shm_sec_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_shm_ecc_sed_count",
-				&g->ecc.gr.sm_shm_sed_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_shm_ecc_ded_count",
-				&g->ecc.gr.sm_shm_ded_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_total_sec_pipe0_count",
-				&g->ecc.gr.tex_total_sec_pipe0_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_total_ded_pipe0_count",
-				&g->ecc.gr.tex_total_ded_pipe0_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_unique_sec_pipe0_count",
-				&g->ecc.gr.tex_unique_sec_pipe0_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_unique_ded_pipe0_count",
-				&g->ecc.gr.tex_unique_ded_pipe0_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_total_sec_pipe1_count",
-				&g->ecc.gr.tex_total_sec_pipe1_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_total_ded_pipe1_count",
-				&g->ecc.gr.tex_total_ded_pipe1_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_unique_sec_pipe1_count",
-				&g->ecc.gr.tex_unique_sec_pipe1_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"tex_ecc_unique_ded_pipe1_count",
-				&g->ecc.gr.tex_unique_ded_pipe1_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				2,
-				"ecc_sec_count",
-				&g->ecc.ltc.l2_sec_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				2,
-				"ecc_ded_count",
-				&g->ecc.ltc.l2_ded_count);
-
-	if (error)
-		dev_err(dev, "Failed to create sysfs attributes!\n");
-}
-
-void gr_gp10b_remove_sysfs(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-
-	if (!g->ecc.gr.sm_lrf_single_err_count.counters)
-		return;
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_lrf_single_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_lrf_double_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_shm_sec_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_shm_sed_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_shm_ded_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_total_sec_pipe0_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_total_ded_pipe0_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_unique_sec_pipe0_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_unique_ded_pipe0_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_total_sec_pipe1_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_total_ded_pipe1_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_unique_sec_pipe1_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.tex_unique_ded_pipe1_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			2,
-			&g->ecc.ltc.l2_sec_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			2,
-			&g->ecc.ltc.l2_ded_count);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h
deleted file mode 100644
index 6de90275..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _PLATFORM_GP10B_TEGRA_H_
-#define _PLATFORM_GP10B_TEGRA_H_
-
-#include "gp10b/gr_gp10b.h"
-#include "platform_ecc_sysfs.h"
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
deleted file mode 100644
index bf66762b..00000000
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/*
- * GV11B Tegra Platform Interface
- *
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/of_platform.h>
-#include <linux/debugfs.h>
-#include <linux/dma-buf.h>
-#include <linux/nvmap.h>
-#include <linux/reset.h>
-#include <linux/hashtable.h>
-#include <linux/clk.h>
-#include <linux/platform/tegra/emc_bwmgr.h>
-
-#include <nvgpu/nvhost.h>
-
-#include <uapi/linux/nvgpu.h>
-
-#include <soc/tegra/tegra_bpmp.h>
-#include <soc/tegra/tegra_powergate.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "clk.h"
-#include "scale.h"
-
-#include "gp10b/platform_gp10b.h"
-#include "platform_gp10b_tegra.h"
-#include "platform_ecc_sysfs.h"
-
-#include "os_linux.h"
-#include "platform_gk20a_tegra.h"
-#include "gv11b/gr_gv11b.h"
-
-static void gv11b_tegra_scale_exit(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-
-	if (profile)
-		tegra_bwmgr_unregister(
-			(struct tegra_bwmgr_client *)profile->private_data);
-}
-
-static int gv11b_tegra_probe(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int err;
-
-	err = nvgpu_nvhost_syncpt_init(platform->g);
-	if (err) {
-		if (err != -ENOSYS)
-			return err;
-	}
-
-	err = gk20a_tegra_init_secure_alloc(platform);
-	if (err)
-		return err;
-
-	platform->disable_bigpage = !device_is_iommuable(dev);
-
-	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
-		= false;
-	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
-		= false;
-
-	platform->g->gr.ctx_vars.force_preemption_gfxp = false;
-	platform->g->gr.ctx_vars.force_preemption_cilp = false;
-
-	gp10b_tegra_get_clocks(dev);
-	nvgpu_linux_init_clk_support(platform->g);
-
-	return 0;
-}
-
-static int gv11b_tegra_late_probe(struct device *dev)
-{
-	return 0;
-}
-
-
-static int gv11b_tegra_remove(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	if (g->ops.gr.remove_gr_sysfs)
-		g->ops.gr.remove_gr_sysfs(g);
-
-	gv11b_tegra_scale_exit(dev);
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	nvgpu_free_nvhost_dev(get_gk20a(dev));
-#endif
-
-	return 0;
-}
-
-static bool gv11b_tegra_is_railgated(struct device *dev)
-{
-	bool ret = false;
-#ifdef TEGRA194_POWER_DOMAIN_GPU
-	struct gk20a *g = get_gk20a(dev);
-
-	if (tegra_bpmp_running()) {
-		nvgpu_log(g, gpu_dbg_info, "bpmp running");
-		ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
-
-		nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
-	} else {
-		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
-	}
-#endif
-	return ret;
-}
-
-static int gv11b_tegra_railgate(struct device *dev)
-{
-#ifdef TEGRA194_POWER_DOMAIN_GPU
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	struct gk20a *g = get_gk20a(dev);
-	int i;
-
-	/* remove emc frequency floor */
-	if (profile)
-		tegra_bwmgr_set_emc(
-			(struct tegra_bwmgr_client *)profile->private_data,
-			0, TEGRA_BWMGR_SET_EMC_FLOOR);
-
-	if (tegra_bpmp_running()) {
-		nvgpu_log(g, gpu_dbg_info, "bpmp running");
-		if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
-			nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
-			return 0;
-		}
-		nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
-		for (i = 0; i < platform->num_clks; i++) {
-			if (platform->clk[i])
-				clk_disable_unprepare(platform->clk[i]);
-		}
-		nvgpu_log(g, gpu_dbg_info, "powergate_partition");
-		tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
-	} else {
-		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
-	}
-#endif
-	return 0;
-}
-
-static int gv11b_tegra_unrailgate(struct device *dev)
-{
-	int ret = 0;
-#ifdef TEGRA194_POWER_DOMAIN_GPU
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-	int i;
-
-	if (tegra_bpmp_running()) {
-		nvgpu_log(g, gpu_dbg_info, "bpmp running");
-		ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
-		if (ret) {
-			nvgpu_log(g, gpu_dbg_info,
-				"unpowergate partition failed");
-			return ret;
-		}
-		nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
-		for (i = 0; i < platform->num_clks; i++) {
-			if (platform->clk[i])
-				clk_prepare_enable(platform->clk[i]);
-		}
-	} else {
-		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
-	}
-
-	/* to start with set emc frequency floor to max rate*/
-	if (profile)
-		tegra_bwmgr_set_emc(
-			(struct tegra_bwmgr_client *)profile->private_data,
-			tegra_bwmgr_get_max_emc_rate(),
-			TEGRA_BWMGR_SET_EMC_FLOOR);
-#endif
-	return ret;
-}
-
-static int gv11b_tegra_suspend(struct device *dev)
-{
-	return 0;
-}
-
-struct gk20a_platform gv11b_tegra_platform = {
-	.has_syncpoints = true,
-
-	/* ptimer src frequency in hz*/
-	.ptimer_src_freq	= 31250000,
-
-	.ch_wdt_timeout_ms = 5000,
-
-	.probe = gv11b_tegra_probe,
-	.late_probe = gv11b_tegra_late_probe,
-	.remove = gv11b_tegra_remove,
-	.railgate_delay_init    = 500,
-	.can_railgate_init      = true,
-
-	.can_slcg               = true,
-	.can_blcg               = true,
-	.can_elcg               = true,
-	.enable_slcg            = true,
-	.enable_blcg            = true,
-	.enable_elcg            = true,
-	.enable_perfmon         = true,
-
-	/* power management configuration */
-	.enable_elpg		= true,
-	.can_elpg_init		= true,
-	.enable_aelpg           = true,
-
-	/* power management callbacks */
-	.suspend = gv11b_tegra_suspend,
-	.railgate = gv11b_tegra_railgate,
-	.unrailgate = gv11b_tegra_unrailgate,
-	.is_railgated = gv11b_tegra_is_railgated,
-
-	.busy = gk20a_tegra_busy,
-	.idle = gk20a_tegra_idle,
-
-	.clk_round_rate = gp10b_round_clk_rate,
-	.get_clk_freqs = gp10b_clk_get_freqs,
-
-	/* frequency scaling configuration */
-	.initscale = gp10b_tegra_scale_init,
-	.prescale = gp10b_tegra_prescale,
-	.postscale = gp10b_tegra_postscale,
-	.devfreq_governor = "nvhost_podgov",
-
-	.qos_notify = gk20a_scale_qos_notify,
-
-	.dump_platform_dependencies = gk20a_tegra_debug_dump,
-
-	.soc_name = "tegra19x",
-
-	.honors_aperture = true,
-	.unified_memory = true,
-	.dma_mask = DMA_BIT_MASK(36),
-
-	.reset_assert = gp10b_tegra_reset_assert,
-	.reset_deassert = gp10b_tegra_reset_deassert,
-
-	.secure_buffer_size = 667648,
-};
-
-void gr_gv11b_create_sysfs(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-	int error = 0;
-
-	/* This stat creation function is called on GR init. GR can get
-       initialized multiple times but we only need to create the ECC
-       stats once. Therefore, add the following check to avoid
-       creating duplicate stat sysfs nodes. */
-	if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
-		return;
-
-	gr_gp10b_create_sysfs(g);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_l1_tag_ecc_corrected_err_count",
-				&g->ecc.gr.sm_l1_tag_corrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_l1_tag_ecc_uncorrected_err_count",
-				&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_cbu_ecc_corrected_err_count",
-				&g->ecc.gr.sm_cbu_corrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_cbu_ecc_uncorrected_err_count",
-				&g->ecc.gr.sm_cbu_uncorrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_l1_data_ecc_corrected_err_count",
-				&g->ecc.gr.sm_l1_data_corrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_l1_data_ecc_uncorrected_err_count",
-				&g->ecc.gr.sm_l1_data_uncorrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_icache_ecc_corrected_err_count",
-				&g->ecc.gr.sm_icache_corrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"sm_icache_ecc_uncorrected_err_count",
-				&g->ecc.gr.sm_icache_uncorrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"gcc_l15_ecc_corrected_err_count",
-				&g->ecc.gr.gcc_l15_corrected_err_count);
-
-	error |= nvgpu_gr_ecc_stat_create(dev,
-				0,
-				"gcc_l15_ecc_uncorrected_err_count",
-				&g->ecc.gr.gcc_l15_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->ltc_count,
-				0,
-				"ltc",
-				NULL,
-				"l2_cache_uncorrected_err_count",
-				&g->ecc.ltc.l2_cache_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->ltc_count,
-				0,
-				"ltc",
-				NULL,
-				"l2_cache_corrected_err_count",
-				&g->ecc.ltc.l2_cache_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"gpc",
-				NULL,
-				"fecs_ecc_uncorrected_err_count",
-				&g->ecc.gr.fecs_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"gpc",
-				NULL,
-				"fecs_ecc_corrected_err_count",
-				&g->ecc.gr.fecs_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->gr.gpc_count,
-				0,
-				"gpc",
-				NULL,
-				"gpccs_ecc_uncorrected_err_count",
-				&g->ecc.gr.gpccs_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->gr.gpc_count,
-				0,
-				"gpc",
-				NULL,
-				"gpccs_ecc_corrected_err_count",
-				&g->ecc.gr.gpccs_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->gr.gpc_count,
-				0,
-				"gpc",
-				NULL,
-				"mmu_l1tlb_ecc_uncorrected_err_count",
-				&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				g->gr.gpc_count,
-				0,
-				"gpc",
-				NULL,
-				"mmu_l1tlb_ecc_corrected_err_count",
-				&g->ecc.gr.mmu_l1tlb_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_l2tlb_ecc_uncorrected_err_count",
-				&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_l2tlb_ecc_corrected_err_count",
-				&g->ecc.fb.mmu_l2tlb_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_hubtlb_ecc_uncorrected_err_count",
-				&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_hubtlb_ecc_corrected_err_count",
-				&g->ecc.fb.mmu_hubtlb_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_fillunit_ecc_uncorrected_err_count",
-				&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"mmu_fillunit_ecc_corrected_err_count",
-				&g->ecc.fb.mmu_fillunit_corrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"pmu_ecc_uncorrected_err_count",
-				&g->ecc.pmu.pmu_uncorrected_err_count);
-
-	error |= nvgpu_ecc_stat_create(dev,
-				1,
-				0,
-				"eng",
-				NULL,
-				"pmu_ecc_corrected_err_count",
-				&g->ecc.pmu.pmu_corrected_err_count);
-
-	if (error)
-		dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
-}
-
-void gr_gv11b_remove_sysfs(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-
-	if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
-		return;
-	gr_gp10b_remove_sysfs(g);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_l1_tag_corrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_cbu_corrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_cbu_uncorrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_l1_data_corrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_l1_data_uncorrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_icache_corrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.sm_icache_uncorrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.gcc_l15_corrected_err_count);
-
-	nvgpu_gr_ecc_stat_remove(dev,
-			0,
-			&g->ecc.gr.gcc_l15_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->ltc_count,
-			0,
-			&g->ecc.ltc.l2_cache_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->ltc_count,
-			0,
-			&g->ecc.ltc.l2_cache_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.gr.fecs_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.gr.fecs_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->gr.gpc_count,
-			0,
-			&g->ecc.gr.gpccs_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->gr.gpc_count,
-			0,
-			&g->ecc.gr.gpccs_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->gr.gpc_count,
-			0,
-			&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			g->gr.gpc_count,
-			0,
-			&g->ecc.gr.mmu_l1tlb_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_l2tlb_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_hubtlb_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.fb.mmu_fillunit_corrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.pmu.pmu_uncorrected_err_count);
-
-	nvgpu_ecc_stat_remove(dev,
-			1,
-			0,
-			&g->ecc.pmu.pmu_corrected_err_count);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/rwsem.c b/drivers/gpu/nvgpu/common/linux/rwsem.c
deleted file mode 100644
index 297ddf11..00000000
--- a/drivers/gpu/nvgpu/common/linux/rwsem.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <nvgpu/rwsem.h>
-
-void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
-{
-	init_rwsem(&rwsem->rwsem);
-}
-
-void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
-{
-	up_read(&rwsem->rwsem);
-}
-
-void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
-{
-	down_read(&rwsem->rwsem);
-}
-
-void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
-{
-	up_write(&rwsem->rwsem);
-}
-
-void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
-{
-	down_write(&rwsem->rwsem);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/scale.c b/drivers/gpu/nvgpu/common/linux/scale.c
deleted file mode 100644
index 84ac1cfd..00000000
--- a/drivers/gpu/nvgpu/common/linux/scale.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * gk20a clock scaling profile
- *
- * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/devfreq.h>
-#include <linux/export.h>
-#include <soc/tegra/chip-id.h>
-#include <linux/pm_qos.h>
-
-#include <governor.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "scale.h"
-#include "os_linux.h"
-
-/*
- * gk20a_scale_qos_notify()
- *
- * This function is called when the minimum QoS requirement for the device
- * has changed. The function calls postscaling callback if it is defined.
- */
-
-#if defined(CONFIG_COMMON_CLK)
-int gk20a_scale_qos_notify(struct notifier_block *nb,
-			  unsigned long n, void *p)
-{
-	struct gk20a_scale_profile *profile =
-			container_of(nb, struct gk20a_scale_profile,
-			qos_notify_block);
-	struct gk20a *g = get_gk20a(profile->dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct devfreq *devfreq = l->devfreq;
-
-	if (!devfreq)
-		return NOTIFY_OK;
-
-	mutex_lock(&devfreq->lock);
-	/* check for pm_qos min and max frequency requirement */
-	profile->qos_min_freq =
-	  (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
-	profile->qos_max_freq =
-	  (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
-
-	if (profile->qos_min_freq > profile->qos_max_freq) {
-		nvgpu_err(g,
-			"QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
-			profile->qos_min_freq, profile->qos_max_freq);
-		profile->qos_min_freq = profile->qos_max_freq;
-	}
-
-	update_devfreq(devfreq);
-	mutex_unlock(&devfreq->lock);
-
-	return NOTIFY_OK;
-}
-#else
-int gk20a_scale_qos_notify(struct notifier_block *nb,
-			  unsigned long n, void *p)
-{
-	struct gk20a_scale_profile *profile =
-		container_of(nb, struct gk20a_scale_profile,
-			     qos_notify_block);
-	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
-	struct gk20a *g = get_gk20a(profile->dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	unsigned long freq;
-
-	if (!platform->postscale)
-		return NOTIFY_OK;
-
-	/* get the frequency requirement. if devfreq is enabled, check if it
-	 * has higher demand than qos */
-	freq = platform->clk_round_rate(profile->dev,
-			(u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
-	if (l->devfreq)
-		freq = max(l->devfreq->previous_freq, freq);
-
-	/* Update gpu load because we may scale the emc target
-	 * if the gpu load changed. */
-	nvgpu_pmu_load_update(g);
-	platform->postscale(profile->dev, freq);
-
-	return NOTIFY_OK;
-}
-#endif
-
-/*
- * gk20a_scale_make_freq_table(profile)
- *
- * This function initialises the frequency table for the given device profile
- */
-
-static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
-	int num_freqs, err;
-	unsigned long *freqs;
-
-	if (platform->get_clk_freqs) {
-		/* get gpu frequency table */
-		err = platform->get_clk_freqs(profile->dev, &freqs,
-					&num_freqs);
-		if (err)
-			return -ENOSYS;
-	} else
-		return -ENOSYS;
-
-	profile->devfreq_profile.freq_table = (unsigned long *)freqs;
-	profile->devfreq_profile.max_state = num_freqs;
-
-	return 0;
-}
-
-/*
- * gk20a_scale_target(dev, *freq, flags)
- *
- * This function scales the clock
- */
-
-static int gk20a_scale_target(struct device *dev, unsigned long *freq,
-			      u32 flags)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = platform->g;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_scale_profile *profile = g->scale_profile;
-	struct devfreq *devfreq = l->devfreq;
-	unsigned long local_freq = *freq;
-	unsigned long rounded_rate;
-	unsigned long min_freq = 0, max_freq = 0;
-
-	/*
-	 * Calculate floor and cap frequency values
-	 *
-	 * Policy :
-	 * We have two APIs to clip the frequency
-	 *  1. devfreq
-	 *  2. pm_qos
-	 *
-	 * To calculate floor (min) freq, we select MAX of floor frequencies
-	 * requested from both APIs
-	 * To get cap (max) freq, we select MIN of max frequencies
-	 *
-	 * In case we have conflict (min_freq > max_freq) after above
-	 * steps, we ensure that max_freq wins over min_freq
-	 */
-	min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
-	max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
-
-	if (min_freq > max_freq)
-		min_freq = max_freq;
-
-	/* Clip requested frequency */
-	if (local_freq < min_freq)
-		local_freq = min_freq;
-
-	if (local_freq > max_freq)
-		local_freq = max_freq;
-
-	/* set the final frequency */
-	rounded_rate = platform->clk_round_rate(dev, local_freq);
-
-	/* Check for duplicate request */
-	if (rounded_rate == g->last_freq)
-		return 0;
-
-	if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
-		*freq = rounded_rate;
-	else {
-		g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
-		*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
-	}
-
-	g->last_freq = *freq;
-
-	/* postscale will only scale emc (dram clock) if evaluating
-	 * gk20a_tegra_get_emc_rate() produces a new or different emc
-	 * target because the load or_and gpufreq has changed */
-	if (platform->postscale)
-		platform->postscale(dev, rounded_rate);
-
-	return 0;
-}
-
-/*
- * update_load_estimate_gpmu(profile)
- *
- * Update load estimate using gpmu. The gpmu value is normalised
- * based on the time it was asked last time.
- */
-
-static void update_load_estimate_gpmu(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile = g->scale_profile;
-	unsigned long dt;
-	u32 busy_time;
-	ktime_t t;
-
-	t = ktime_get();
-	dt = ktime_us_delta(t, profile->last_event_time);
-
-	profile->dev_stat.total_time = dt;
-	profile->last_event_time = t;
-	nvgpu_pmu_load_norm(g, &busy_time);
-	profile->dev_stat.busy_time = (busy_time * dt) / 1000;
-}
-
-/*
- * gk20a_scale_suspend(dev)
- *
- * This function informs devfreq of suspend
- */
-
-void gk20a_scale_suspend(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct devfreq *devfreq = l->devfreq;
-
-	if (!devfreq)
-		return;
-
-	devfreq_suspend_device(devfreq);
-}
-
-/*
- * gk20a_scale_resume(dev)
- *
- * This functions informs devfreq of resume
- */
-
-void gk20a_scale_resume(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct devfreq *devfreq = l->devfreq;
-
-	if (!devfreq)
-		return;
-
-	g->last_freq = 0;
-	devfreq_resume_device(devfreq);
-}
-
-/*
- * gk20a_scale_get_dev_status(dev, *stat)
- *
- * This function queries the current device status.
- */
-
-static int gk20a_scale_get_dev_status(struct device *dev,
-				      struct devfreq_dev_status *stat)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile = g->scale_profile;
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-
-	/* update the software shadow */
-	nvgpu_pmu_load_update(g);
-
-	/* inform edp about new constraint */
-	if (platform->prescale)
-		platform->prescale(dev);
-
-	/* Make sure there are correct values for the current frequency */
-	profile->dev_stat.current_frequency =
-				g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
-
-	/* Update load estimate */
-	update_load_estimate_gpmu(dev);
-
-	/* Copy the contents of the current device status */
-	*stat = profile->dev_stat;
-
-	/* Finally, clear out the local values */
-	profile->dev_stat.total_time = 0;
-	profile->dev_stat.busy_time = 0;
-
-	return 0;
-}
-
-/*
- * get_cur_freq(struct device *dev, unsigned long *freq)
- *
- * This function gets the current GPU clock rate.
- */
-
-static int get_cur_freq(struct device *dev, unsigned long *freq)
-{
-	struct gk20a *g = get_gk20a(dev);
-	*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
-	return 0;
-}
-
-
-/*
- * gk20a_scale_init(dev)
- */
-
-void gk20a_scale_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = platform->g;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_scale_profile *profile;
-	int err;
-
-	if (g->scale_profile)
-		return;
-
-	if (!platform->devfreq_governor && !platform->qos_notify)
-		return;
-
-	profile = nvgpu_kzalloc(g, sizeof(*profile));
-
-	profile->dev = dev;
-	profile->dev_stat.busy = false;
-
-	/* Create frequency table */
-	err = gk20a_scale_make_freq_table(profile);
-	if (err || !profile->devfreq_profile.max_state)
-		goto err_get_freqs;
-
-	profile->qos_min_freq = 0;
-	profile->qos_max_freq = UINT_MAX;
-
-	/* Store device profile so we can access it if devfreq governor
-	 * init needs that */
-	g->scale_profile = profile;
-
-	if (platform->devfreq_governor) {
-		struct devfreq *devfreq;
-
-		profile->devfreq_profile.initial_freq =
-			profile->devfreq_profile.freq_table[0];
-		profile->devfreq_profile.target = gk20a_scale_target;
-		profile->devfreq_profile.get_dev_status =
-			gk20a_scale_get_dev_status;
-		profile->devfreq_profile.get_cur_freq = get_cur_freq;
-		profile->devfreq_profile.polling_ms = 25;
-
-		devfreq = devfreq_add_device(dev,
-					&profile->devfreq_profile,
-					platform->devfreq_governor, NULL);
-
-		if (IS_ERR(devfreq))
-			devfreq = NULL;
-
-		l->devfreq = devfreq;
-	}
-
-	/* Should we register QoS callback for this device? */
-	if (platform->qos_notify) {
-		profile->qos_notify_block.notifier_call =
-					platform->qos_notify;
-
-		pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-					&profile->qos_notify_block);
-		pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-					&profile->qos_notify_block);
-	}
-
-	return;
-
-err_get_freqs:
-	nvgpu_kfree(g, profile);
-}
-
-void gk20a_scale_exit(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = platform->g;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int err;
-
-	if (platform->qos_notify) {
-		pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-				&g->scale_profile->qos_notify_block);
-		pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-				&g->scale_profile->qos_notify_block);
-	}
-
-	if (platform->devfreq_governor) {
-		err = devfreq_remove_device(l->devfreq);
-		l->devfreq = NULL;
-	}
-
-	nvgpu_kfree(g, g->scale_profile);
-	g->scale_profile = NULL;
-}
-
-/*
- * gk20a_scale_hw_init(dev)
- *
- * Initialize hardware portion of the device
- */
-
-void gk20a_scale_hw_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a_scale_profile *profile = platform->g->scale_profile;
-
-	/* make sure that scaling has bee initialised */
-	if (!profile)
-		return;
-
-	profile->dev_stat.total_time = 0;
-	profile->last_event_time = ktime_get();
-}
diff --git a/drivers/gpu/nvgpu/common/linux/scale.h b/drivers/gpu/nvgpu/common/linux/scale.h
deleted file mode 100644
index c1e6fe86..00000000
--- a/drivers/gpu/nvgpu/common/linux/scale.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * gk20a clock scaling profile
- *
- * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef GK20A_SCALE_H
-#define GK20A_SCALE_H
-
-#include <linux/devfreq.h>
-
-struct clk;
-
-struct gk20a_scale_profile {
-	struct device			*dev;
-	ktime_t				last_event_time;
-	struct devfreq_dev_profile	devfreq_profile;
-	struct devfreq_dev_status	dev_stat;
-	struct notifier_block		qos_notify_block;
-	unsigned long			qos_min_freq;
-	unsigned long			qos_max_freq;
-	void				*private_data;
-};
-
-/* Initialization and de-initialization for module */
-void gk20a_scale_init(struct device *);
-void gk20a_scale_exit(struct device *);
-void gk20a_scale_hw_init(struct device *dev);
-
-#if defined(CONFIG_GK20A_DEVFREQ)
-/*
- * call when performing submit to notify scaling mechanism that the module is
- * in use
- */
-void gk20a_scale_notify_busy(struct device *);
-void gk20a_scale_notify_idle(struct device *);
-
-void gk20a_scale_suspend(struct device *);
-void gk20a_scale_resume(struct device *);
-int gk20a_scale_qos_notify(struct notifier_block *nb,
-			unsigned long n, void *p);
-#else
-static inline void gk20a_scale_notify_busy(struct device *dev) {}
-static inline void gk20a_scale_notify_idle(struct device *dev) {}
-static inline void gk20a_scale_suspend(struct device *dev) {}
-static inline void gk20a_scale_resume(struct device *dev) {}
-static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
-			unsigned long n, void *p)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c
deleted file mode 100644
index 2ad5aabf..00000000
--- a/drivers/gpu/nvgpu/common/linux/sched.c
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <asm/barrier.h>
-#include <linux/wait.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/log.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/barrier.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/gr_gk20a.h"
-#include "sched.h"
-#include "os_linux.h"
-#include "ioctl_tsg.h"
-
-#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-
-ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
-	size_t size, loff_t *off)
-{
-	struct gk20a_sched_ctrl *sched = filp->private_data;
-	struct gk20a *g = sched->g;
-	struct nvgpu_sched_event_arg event = { 0 };
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
-		"filp=%p buf=%p size=%zu", filp, buf, size);
-
-	if (size < sizeof(event))
-		return -EINVAL;
-	size = sizeof(event);
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	while (!sched->status) {
-		nvgpu_mutex_release(&sched->status_lock);
-		if (filp->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
-			sched->status, 0);
-		if (err)
-			return err;
-		nvgpu_mutex_acquire(&sched->status_lock);
-	}
-
-	event.reserved = 0;
-	event.status = sched->status;
-
-	if (copy_to_user(buf, &event, size)) {
-		nvgpu_mutex_release(&sched->status_lock);
-		return -EFAULT;
-	}
-
-	sched->status = 0;
-
-	nvgpu_mutex_release(&sched->status_lock);
-
-	return size;
-}
-
-unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
-{
-	struct gk20a_sched_ctrl *sched = filp->private_data;
-	struct gk20a *g = sched->g;
-	unsigned int mask = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	poll_wait(filp, &sched->readout_wq.wq, wait);
-	if (sched->status)
-		mask |= POLLIN | POLLRDNORM;
-	nvgpu_mutex_release(&sched->status_lock);
-
-	return mask;
-}
-
-static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_get_tsgs_args *arg)
-{
-	struct gk20a *g = sched->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
-			arg->size, arg->buffer);
-
-	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
-		arg->size = sched->bitmap_size;
-		return -ENOSPC;
-	}
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
-		sched->active_tsg_bitmap, sched->bitmap_size)) {
-		nvgpu_mutex_release(&sched->status_lock);
-		return -EFAULT;
-	}
-	nvgpu_mutex_release(&sched->status_lock);
-
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_get_tsgs_args *arg)
-{
-	struct gk20a *g = sched->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
-			arg->size, arg->buffer);
-
-	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
-		arg->size = sched->bitmap_size;
-		return -ENOSPC;
-	}
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
-		sched->recent_tsg_bitmap, sched->bitmap_size)) {
-		nvgpu_mutex_release(&sched->status_lock);
-		return -EFAULT;
-	}
-
-	memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
-	nvgpu_mutex_release(&sched->status_lock);
-
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_get_tsgs_by_pid_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u64 *bitmap;
-	unsigned int tsgid;
-	/* pid at user level corresponds to kernel tgid */
-	pid_t tgid = (pid_t)arg->pid;
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
-			(pid_t)arg->pid, arg->size, arg->buffer);
-
-	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
-		arg->size = sched->bitmap_size;
-		return -ENOSPC;
-	}
-
-	bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size);
-	if (!bitmap)
-		return -ENOMEM;
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
-		if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
-			tsg = &f->tsg[tsgid];
-			if (tsg->tgid == tgid)
-				NVGPU_SCHED_SET(tsgid, bitmap);
-		}
-	}
-	nvgpu_mutex_release(&sched->status_lock);
-
-	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
-		bitmap, sched->bitmap_size))
-		err = -EFAULT;
-
-	nvgpu_kfree(sched->g, bitmap);
-
-	return err;
-}
-
-static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_tsg_get_params_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u32 tsgid = arg->tsgid;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
-
-	if (tsgid >= f->num_channels)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	tsg = &f->tsg[tsgid];
-	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
-		return -ENXIO;
-
-	arg->pid = tsg->tgid;	/* kernel tgid corresponds to user pid */
-	arg->runlist_interleave = tsg->interleave_level;
-	arg->timeslice = tsg->timeslice_us;
-
-	arg->graphics_preempt_mode =
-		tsg->gr_ctx.graphics_preempt_mode;
-	arg->compute_preempt_mode =
-		tsg->gr_ctx.compute_preempt_mode;
-
-	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
-	struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_tsg_timeslice_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u32 tsgid = arg->tsgid;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
-
-	if (tsgid >= f->num_channels)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	tsg = &f->tsg[tsgid];
-	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
-		return -ENXIO;
-
-	err = gk20a_busy(g);
-	if (err)
-		goto done;
-
-	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
-
-	gk20a_idle(g);
-
-done:
-	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-
-	return err;
-}
-
-static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
-	struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_tsg_runlist_interleave_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u32 tsgid = arg->tsgid;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
-
-	if (tsgid >= f->num_channels)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	tsg = &f->tsg[tsgid];
-	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
-		return -ENXIO;
-
-	err = gk20a_busy(g);
-	if (err)
-		goto done;
-
-	err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
-
-	gk20a_idle(g);
-
-done:
-	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-
-	return err;
-}
-
-static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
-{
-	struct gk20a *g = sched->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
-
-	nvgpu_mutex_acquire(&sched->control_lock);
-	sched->control_locked = true;
-	nvgpu_mutex_release(&sched->control_lock);
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
-{
-	struct gk20a *g = sched->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
-
-	nvgpu_mutex_acquire(&sched->control_lock);
-	sched->control_locked = false;
-	nvgpu_mutex_release(&sched->control_lock);
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_api_version_args *args)
-{
-	struct gk20a *g = sched->g;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
-
-	args->version = NVGPU_SCHED_API_VERSION;
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_tsg_refcount_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u32 tsgid = arg->tsgid;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
-
-	if (tsgid >= f->num_channels)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	tsg = &f->tsg[tsgid];
-	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
-		return -ENXIO;
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
-		nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
-		/* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
-		nvgpu_mutex_release(&sched->status_lock);
-		nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-		return -ENXIO;
-	}
-
-	/* keep reference on TSG, will be released on
-	 * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
-	 */
-	NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
-	nvgpu_mutex_release(&sched->status_lock);
-
-	return 0;
-}
-
-static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
-	struct nvgpu_sched_tsg_refcount_args *arg)
-{
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	u32 tsgid = arg->tsgid;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
-
-	if (tsgid >= f->num_channels)
-		return -EINVAL;
-
-	nvgpu_speculation_barrier();
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
-		nvgpu_mutex_release(&sched->status_lock);
-		nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
-		return -ENXIO;
-	}
-	NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
-	nvgpu_mutex_release(&sched->status_lock);
-
-	tsg = &f->tsg[tsgid];
-	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-
-	return 0;
-}
-
-int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
-{
-	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
-				struct nvgpu_os_linux, sched.cdev);
-	struct gk20a *g;
-	struct gk20a_sched_ctrl *sched;
-	int err = 0;
-
-	g = gk20a_get(&l->g);
-	if (!g)
-		return -ENODEV;
-	sched = &l->sched_ctrl;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
-
-	if (!sched->sw_ready) {
-		err = gk20a_busy(g);
-		if (err)
-			goto free_ref;
-
-		gk20a_idle(g);
-	}
-
-	if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
-		err = -EBUSY;
-		goto free_ref;
-	}
-
-	memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
-			sched->bitmap_size);
-	memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
-
-	filp->private_data = sched;
-	nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
-
-free_ref:
-	if (err)
-		gk20a_put(g);
-	return err;
-}
-
-long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
-	unsigned long arg)
-{
-	struct gk20a_sched_ctrl *sched = filp->private_data;
-	struct gk20a *g = sched->g;
-	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
-
-	if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
-		(_IOC_NR(cmd) == 0) ||
-		(_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
-		(_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
-		return -EINVAL;
-
-	memset(buf, 0, sizeof(buf));
-	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-
-	switch (cmd) {
-	case NVGPU_SCHED_IOCTL_GET_TSGS:
-		err = gk20a_sched_dev_ioctl_get_tsgs(sched,
-			(struct nvgpu_sched_get_tsgs_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
-		err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
-			(struct nvgpu_sched_get_tsgs_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
-		err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
-			(struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
-		err = gk20a_sched_dev_ioctl_get_params(sched,
-			(struct nvgpu_sched_tsg_get_params_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
-		err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
-			(struct nvgpu_sched_tsg_timeslice_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
-		err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
-			(struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
-		err = gk20a_sched_dev_ioctl_lock_control(sched);
-		break;
-	case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
-		err = gk20a_sched_dev_ioctl_unlock_control(sched);
-		break;
-	case NVGPU_SCHED_IOCTL_GET_API_VERSION:
-		err = gk20a_sched_dev_ioctl_get_api_version(sched,
-			(struct nvgpu_sched_api_version_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_GET_TSG:
-		err = gk20a_sched_dev_ioctl_get_tsg(sched,
-			(struct nvgpu_sched_tsg_refcount_args *)buf);
-		break;
-	case NVGPU_SCHED_IOCTL_PUT_TSG:
-		err = gk20a_sched_dev_ioctl_put_tsg(sched,
-			(struct nvgpu_sched_tsg_refcount_args *)buf);
-		break;
-	default:
-		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
-		err = -ENOTTY;
-	}
-
-	/* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
-	 * purpose with NULL buffer and/or zero size to discover TSG bitmap
-	 * size. We need to update user arguments in this case too, even
-	 * if we return an error.
-	 */
-	if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
-		if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
-			err = -EFAULT;
-	}
-
-	return err;
-}
-
-int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
-{
-	struct gk20a_sched_ctrl *sched = filp->private_data;
-	struct gk20a *g = sched->g;
-	struct fifo_gk20a *f = &g->fifo;
-	struct tsg_gk20a *tsg;
-	unsigned int tsgid;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
-
-	/* release any reference to TSGs */
-	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
-		if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
-			tsg = &f->tsg[tsgid];
-			nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
-		}
-	}
-
-	/* unlock control */
-	nvgpu_mutex_acquire(&sched->control_lock);
-	sched->control_locked = false;
-	nvgpu_mutex_release(&sched->control_lock);
-
-	nvgpu_mutex_release(&sched->busy_lock);
-	gk20a_put(g);
-	return 0;
-}
-
-void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	int err;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-
-	if (!sched->sw_ready) {
-		err = gk20a_busy(g);
-		if (err) {
-			WARN_ON(err);
-			return;
-		}
-
-		gk20a_idle(g);
-	}
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
-	NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
-	sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
-	nvgpu_mutex_release(&sched->status_lock);
-	nvgpu_cond_signal_interruptible(&sched->readout_wq);
-}
-
-void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-	NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
-
-	/* clear recent_tsg_bitmap as well: if app manager did not
-	 * notice that TSG was previously added, no need to notify it
-	 * if the TSG has been released in the meantime. If the
-	 * TSG gets reallocated, app manager will be notified as usual.
-	 */
-	NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
-
-	/* do not set event_pending, we only want to notify app manager
-	 * when TSGs are added, so that it can apply sched params
-	 */
-	nvgpu_mutex_release(&sched->status_lock);
-}
-
-int gk20a_sched_ctrl_init(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-	struct fifo_gk20a *f = &g->fifo;
-	int err;
-
-	if (sched->sw_ready)
-		return 0;
-
-	sched->g = g;
-	sched->bitmap_size = roundup(f->num_channels, 64) / 8;
-	sched->status = 0;
-
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
-			g, sched, sched->bitmap_size);
-
-	sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
-	if (!sched->active_tsg_bitmap)
-		return -ENOMEM;
-
-	sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
-	if (!sched->recent_tsg_bitmap) {
-		err = -ENOMEM;
-		goto free_active;
-	}
-
-	sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
-	if (!sched->ref_tsg_bitmap) {
-		err = -ENOMEM;
-		goto free_recent;
-	}
-
-	nvgpu_cond_init(&sched->readout_wq);
-
-	err = nvgpu_mutex_init(&sched->status_lock);
-	if (err)
-		goto free_ref;
-
-	err = nvgpu_mutex_init(&sched->control_lock);
-	if (err)
-		goto free_status_lock;
-
-	err = nvgpu_mutex_init(&sched->busy_lock);
-	if (err)
-		goto free_control_lock;
-
-	sched->sw_ready = true;
-
-	return 0;
-
-free_control_lock:
-	nvgpu_mutex_destroy(&sched->control_lock);
-free_status_lock:
-	nvgpu_mutex_destroy(&sched->status_lock);
-free_ref:
-	nvgpu_kfree(g, sched->ref_tsg_bitmap);
-free_recent:
-	nvgpu_kfree(g, sched->recent_tsg_bitmap);
-free_active:
-	nvgpu_kfree(g, sched->active_tsg_bitmap);
-
-	return err;
-}
-
-void gk20a_sched_ctrl_cleanup(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
-
-	nvgpu_kfree(g, sched->active_tsg_bitmap);
-	nvgpu_kfree(g, sched->recent_tsg_bitmap);
-	nvgpu_kfree(g, sched->ref_tsg_bitmap);
-	sched->active_tsg_bitmap = NULL;
-	sched->recent_tsg_bitmap = NULL;
-	sched->ref_tsg_bitmap = NULL;
-
-	nvgpu_mutex_destroy(&sched->status_lock);
-	nvgpu_mutex_destroy(&sched->control_lock);
-	nvgpu_mutex_destroy(&sched->busy_lock);
-
-	sched->sw_ready = false;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/sched.h b/drivers/gpu/nvgpu/common/linux/sched.h
deleted file mode 100644
index a699bbea..00000000
--- a/drivers/gpu/nvgpu/common/linux/sched.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __NVGPU_SCHED_H
-#define __NVGPU_SCHED_H
-
-struct gk20a;
-struct gpu_ops;
-struct tsg_gk20a;
-struct poll_table_struct;
-
-struct gk20a_sched_ctrl {
-	struct gk20a *g;
-
-	struct nvgpu_mutex control_lock;
-	bool control_locked;
-	bool sw_ready;
-	struct nvgpu_mutex status_lock;
-	struct nvgpu_mutex busy_lock;
-
-	u64 status;
-
-	size_t bitmap_size;
-	u64 *active_tsg_bitmap;
-	u64 *recent_tsg_bitmap;
-	u64 *ref_tsg_bitmap;
-
-	struct nvgpu_cond readout_wq;
-};
-
-int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
-int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
-long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
-ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
-unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
-
-void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
-void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
-int gk20a_sched_ctrl_init(struct gk20a *);
-
-void gk20a_sched_ctrl_cleanup(struct gk20a *g);
-
-#endif /* __NVGPU_SCHED_H */
diff --git a/drivers/gpu/nvgpu/common/linux/sim.c b/drivers/gpu/nvgpu/common/linux/sim.c
deleted file mode 100644
index 8e964f39..00000000
--- a/drivers/gpu/nvgpu/common/linux/sim.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/io.h>
-#include <linux/highmem.h>
-#include <linux/platform_device.h>
-
-#include <nvgpu/log.h>
-#include <nvgpu/linux/vm.h>
-#include <nvgpu/bitops.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/dma.h>
-#include <nvgpu/soc.h>
-#include <nvgpu/hw_sim.h>
-#include <nvgpu/sim.h>
-#include "gk20a/gk20a.h"
-#include "platform_gk20a.h"
-#include "os_linux.h"
-#include "module.h"
-
-void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
-{
-	struct sim_nvgpu_linux *sim_linux =
-		container_of(sim, struct sim_nvgpu_linux, sim);
-
-	writel(v, sim_linux->regs + r);
-}
-
-u32 sim_readl(struct sim_nvgpu *sim, u32 r)
-{
-	struct sim_nvgpu_linux *sim_linux =
-		container_of(sim, struct sim_nvgpu_linux, sim);
-
-	return readl(sim_linux->regs + r);
-}
-
-void nvgpu_remove_sim_support_linux(struct gk20a *g)
-{
-	struct sim_nvgpu_linux *sim_linux;
-
-	if (!g->sim)
-		return;
-
-	sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
-	if (sim_linux->regs) {
-		sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
-		iounmap(sim_linux->regs);
-		sim_linux->regs = NULL;
-	}
-	nvgpu_kfree(g, sim_linux);
-	g->sim = NULL;
-}
-
-int nvgpu_init_sim_support_linux(struct gk20a *g,
-		struct platform_device *dev)
-{
-	struct sim_nvgpu_linux *sim_linux;
-	int err = -ENOMEM;
-
-	if (!nvgpu_platform_is_simulation(g))
-		return 0;
-
-	sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
-	if (!sim_linux)
-		return err;
-	g->sim = &sim_linux->sim;
-	g->sim->g = g;
-	sim_linux->regs = nvgpu_ioremap_resource(dev,
-						 GK20A_SIM_IORESOURCE_MEM,
-						 &sim_linux->reg_mem);
-	if (IS_ERR(sim_linux->regs)) {
-		nvgpu_err(g, "failed to remap gk20a sim regs");
-		err = PTR_ERR(sim_linux->regs);
-		goto fail;
-	}
-	sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
-	return 0;
-
-fail:
-	nvgpu_remove_sim_support_linux(g);
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/sim_pci.c b/drivers/gpu/nvgpu/common/linux/sim_pci.c
deleted file mode 100644
index d37767b7..00000000
--- a/drivers/gpu/nvgpu/common/linux/sim_pci.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/io.h>
-#include <linux/highmem.h>
-#include <linux/platform_device.h>
-
-#include <nvgpu/log.h>
-#include <nvgpu/linux/vm.h>
-#include <nvgpu/bitops.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/dma.h>
-#include <nvgpu/hw_sim_pci.h>
-#include <nvgpu/sim.h>
-#include "gk20a/gk20a.h"
-#include "os_linux.h"
-#include "module.h"
-
-static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
-{
-	u32 cfg;
-	bool is_simulation = false;
-
-	cfg = nvgpu_readl(g, sim_base + sim_config_r());
-	if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
-		is_simulation = true;
-
-	return is_simulation;
-}
-
-void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
-{
-	struct sim_nvgpu_linux *sim_linux;
-	bool is_simulation;
-
-	is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
-
-	if (!is_simulation) {
-		return;
-	}
-
-	if (!g->sim) {
-		nvgpu_warn(g, "sim_gk20a not allocated");
-		return;
-	}
-	sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
-
-	if (sim_linux->regs) {
-		sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
-		sim_linux->regs = NULL;
-	}
-	nvgpu_kfree(g, sim_linux);
-	g->sim = NULL;
-}
-
-int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	struct sim_nvgpu_linux *sim_linux;
-	int err = -ENOMEM;
-	bool is_simulation;
-
-	is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
-	__nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
-
-	if (!is_simulation)
-		return 0;
-
-	sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
-	if (!sim_linux)
-		return err;
-	g->sim = &sim_linux->sim;
-	g->sim->g = g;
-	sim_linux->regs = l->regs + sim_r();
-	sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
-
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/soc.c b/drivers/gpu/nvgpu/common/linux/soc.c
deleted file mode 100644
index 1b27d6f1..00000000
--- a/drivers/gpu/nvgpu/common/linux/soc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include <soc/tegra/chip-id.h>
-#include <soc/tegra/fuse.h>
-#include <soc/tegra/tegra_bpmp.h>
-#ifdef CONFIG_TEGRA_HV_MANAGER
-#include <soc/tegra/virt/syscalls.h>
-#endif
-
-#include <nvgpu/soc.h>
-#include "os_linux.h"
-#include "platform_gk20a.h"
-
-bool nvgpu_platform_is_silicon(struct gk20a *g)
-{
-	return tegra_platform_is_silicon();
-}
-
-bool nvgpu_platform_is_simulation(struct gk20a *g)
-{
-	return tegra_platform_is_vdk();
-}
-
-bool nvgpu_platform_is_fpga(struct gk20a *g)
-{
-	return tegra_platform_is_fpga();
-}
-
-bool nvgpu_is_hypervisor_mode(struct gk20a *g)
-{
-	return is_tegra_hypervisor_mode();
-}
-
-bool nvgpu_is_bpmp_running(struct gk20a *g)
-{
-	return tegra_bpmp_running();
-}
-
-bool nvgpu_is_soc_t194_a01(struct gk20a *g)
-{
-	return ((tegra_get_chip_id() == TEGRA194 &&
-			tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
-		true : false);
-}
-
-#ifdef CONFIG_TEGRA_HV_MANAGER
-/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
- * There is no SMMU translation. However, the device initially enumerates as a
- * PCIe device. As such, when allocation memory for this PCIe device, the DMA
- * framework ends up allocating memory using SMMU (if enabled in device tree).
- * As a result, when we switch to nvlink, we need to use underlying physical
- * addresses, even if memory mappings exist in SMMU.
- * In addition, when stage-2 SMMU translation is enabled (for instance when HV
- * is enabled), the addresses we get from dma_alloc are IPAs. We need to
- * convert them to PA.
- */
-static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct hyp_ipa_pa_info info;
-	int err;
-	u64 pa = 0ULL;
-
-	err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
-	if (err < 0) {
-		/* WAR for bug 2096877
-		 * hyp_read_ipa_pa_info only looks up RAM mappings.
-		 * assume one to one IPA:PA mapping for syncpt aperture
-		 */
-		u64 start = g->syncpt_unit_base;
-		u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
-		if ((ipa >= start) && (ipa < end)) {
-			pa = ipa;
-			nvgpu_log(g, gpu_dbg_map_v,
-				"ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
-				ipa, platform->vmid, pa);
-		} else {
-			nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
-				ipa, platform->vmid, err);
-		}
-	} else {
-		pa = info.base + info.offset;
-		nvgpu_log(g, gpu_dbg_map_v,
-				"ipa=%llx vmid=%d -> pa=%llx "
-				"base=%llx offset=%llx size=%llx\n",
-				ipa, platform->vmid, pa, info.base,
-				info.offset, info.size);
-	}
-	return pa;
-}
-#endif
-
-int nvgpu_init_soc_vars(struct gk20a *g)
-{
-#ifdef CONFIG_TEGRA_HV_MANAGER
-	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	int err;
-
-	if (nvgpu_is_hypervisor_mode(g)) {
-		err = hyp_read_gid(&platform->vmid);
-		if (err) {
-			nvgpu_err(g, "failed to read vmid");
-			return err;
-		}
-		platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
-	}
-#endif
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
deleted file mode 100644
index fad21351..00000000
--- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Semaphore Sync Framework Integration
- *
- * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/hrtimer.h>
-#include <linux/module.h>
-#include <nvgpu/lock.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/semaphore.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/kref.h>
-#include "../common/linux/channel.h"
-
-#include "../drivers/staging/android/sync.h"
-
-#include "sync_sema_android.h"
-
-static const struct sync_timeline_ops gk20a_sync_timeline_ops;
-
-struct gk20a_sync_timeline {
-	struct sync_timeline		obj;
-	u32				max;
-	u32				min;
-};
-
-/**
- * The sync framework dups pts when merging fences. We share a single
- * refcounted gk20a_sync_pt for each duped pt.
- */
-struct gk20a_sync_pt {
-	struct gk20a			*g;
-	struct nvgpu_ref			refcount;
-	u32				thresh;
-	struct nvgpu_semaphore		*sema;
-	struct gk20a_sync_timeline	*obj;
-
-	/*
-	 * Use a spin lock here since it will have better performance
-	 * than a mutex - there should be very little contention on this
-	 * lock.
-	 */
-	struct nvgpu_spinlock			lock;
-};
-
-struct gk20a_sync_pt_inst {
-	struct sync_pt			pt;
-	struct gk20a_sync_pt		*shared;
-};
-
-/**
- * Compares sync pt values a and b, both of which will trigger either before
- * or after ref (i.e. a and b trigger before ref, or a and b trigger after
- * ref). Supplying ref allows us to handle wrapping correctly.
- *
- * Returns -1 if a < b (a triggers before b)
- *	    0 if a = b (a and b trigger at the same time)
- *	    1 if a > b (b triggers before a)
- */
-static int __gk20a_sync_pt_compare_ref(
-	u32 ref,
-	u32 a,
-	u32 b)
-{
-	/*
-	 * We normalize both a and b by subtracting ref from them.
-	 * Denote the normalized values by a_n and b_n. Note that because
-	 * of wrapping, a_n and/or b_n may be negative.
-	 *
-	 * The normalized values a_n and b_n satisfy:
-	 * - a positive value triggers before a negative value
-	 * - a smaller positive value triggers before a greater positive value
-	 * - a smaller negative value (greater in absolute value) triggers
-	 *   before a greater negative value (smaller in absolute value).
-	 *
-	 * Thus we can just stick to unsigned arithmetic and compare
-	 * (u32)a_n to (u32)b_n.
-	 *
-	 * Just to reiterate the possible cases:
-	 *
-	 *	1A) ...ref..a....b....
-	 *	1B) ...ref..b....a....
-	 *	2A) ...b....ref..a....              b_n < 0
-	 *	2B) ...a....ref..b....     a_n > 0
-	 *	3A) ...a....b....ref..     a_n < 0, b_n < 0
-	 *	3A) ...b....a....ref..     a_n < 0, b_n < 0
-	 */
-	u32 a_n = a - ref;
-	u32 b_n = b - ref;
-	if (a_n < b_n)
-		return -1;
-	else if (a_n > b_n)
-		return 1;
-	else
-		return 0;
-}
-
-static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
-{
-	struct gk20a_sync_pt_inst *pti =
-			container_of(pt, struct gk20a_sync_pt_inst, pt);
-	return pti->shared;
-}
-static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
-{
-	if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
-		return NULL;
-	return (struct gk20a_sync_timeline *)obj;
-}
-
-static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
-{
-	struct gk20a_sync_pt *pt =
-		container_of(ref, struct gk20a_sync_pt, refcount);
-	struct gk20a *g = pt->g;
-
-	if (pt->sema)
-		nvgpu_semaphore_put(pt->sema);
-	nvgpu_kfree(g, pt);
-}
-
-static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
-		struct gk20a *g,
-		struct gk20a_sync_timeline *obj,
-		struct nvgpu_semaphore *sema)
-{
-	struct gk20a_sync_pt *shared;
-
-	shared = nvgpu_kzalloc(g, sizeof(*shared));
-	if (!shared)
-		return NULL;
-
-	nvgpu_ref_init(&shared->refcount);
-	shared->g = g;
-	shared->obj = obj;
-	shared->sema = sema;
-	shared->thresh = ++obj->max; /* sync framework has a lock */
-
-	nvgpu_spinlock_init(&shared->lock);
-
-	nvgpu_semaphore_get(sema);
-
-	return shared;
-}
-
-static struct sync_pt *gk20a_sync_pt_create_inst(
-		struct gk20a *g,
-		struct gk20a_sync_timeline *obj,
-		struct nvgpu_semaphore *sema)
-{
-	struct gk20a_sync_pt_inst *pti;
-
-	pti = (struct gk20a_sync_pt_inst *)
-		sync_pt_create(&obj->obj, sizeof(*pti));
-	if (!pti)
-		return NULL;
-
-	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
-	if (!pti->shared) {
-		sync_pt_free(&pti->pt);
-		return NULL;
-	}
-	return &pti->pt;
-}
-
-static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
-{
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-	if (pt)
-		nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
-}
-
-static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
-{
-	struct gk20a_sync_pt_inst *pti;
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-
-	pti = (struct gk20a_sync_pt_inst *)
-		sync_pt_create(&pt->obj->obj, sizeof(*pti));
-	if (!pti)
-		return NULL;
-	pti->shared = pt;
-	nvgpu_ref_get(&pt->refcount);
-	return &pti->pt;
-}
-
-/*
- * This function must be able to run on the same sync_pt concurrently. This
- * requires a lock to protect access to the sync_pt's internal data structures
- * which are modified as a side effect of calling this function.
- */
-static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
-{
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-	struct gk20a_sync_timeline *obj = pt->obj;
-	bool signaled = true;
-
-	nvgpu_spinlock_acquire(&pt->lock);
-	if (!pt->sema)
-		goto done;
-
-	/* Acquired == not realeased yet == active == not signaled. */
-	signaled = !nvgpu_semaphore_is_acquired(pt->sema);
-
-	if (signaled) {
-		/* Update min if necessary. */
-		if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
-						obj->min) == 1)
-			obj->min = pt->thresh;
-
-		/* Release the semaphore to the pool. */
-		nvgpu_semaphore_put(pt->sema);
-		pt->sema = NULL;
-	}
-done:
-	nvgpu_spinlock_release(&pt->lock);
-
-	return signaled;
-}
-
-static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
-{
-	bool a_expired;
-	bool b_expired;
-	struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
-	struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
-
-	if (WARN_ON(pt_a->obj != pt_b->obj))
-		return 0;
-
-	/* Early out */
-	if (a == b)
-		return 0;
-
-	a_expired = gk20a_sync_pt_has_signaled(a);
-	b_expired = gk20a_sync_pt_has_signaled(b);
-	if (a_expired && !b_expired) {
-		/* Easy, a was earlier */
-		return -1;
-	} else if (!a_expired && b_expired) {
-		/* Easy, b was earlier */
-		return 1;
-	}
-
-	/* Both a and b are expired (trigger before min) or not
-	 * expired (trigger after min), so we can use min
-	 * as a reference value for __gk20a_sync_pt_compare_ref.
-	 */
-	return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
-			pt_a->thresh, pt_b->thresh);
-}
-
-static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
-{
-	return obj->min;
-}
-
-static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
-		char *str, int size)
-{
-	struct gk20a_sync_timeline *obj =
-		(struct gk20a_sync_timeline *)timeline;
-	snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
-}
-
-static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
-					     char *str, int size)
-{
-	struct nvgpu_semaphore *s = pt->sema;
-
-	snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
-		 s->location.pool->page_idx,
-		 nvgpu_semaphore_get_value(s),
-		 nvgpu_semaphore_read(s));
-}
-
-static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
-		int size)
-{
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-
-	if (pt->sema) {
-		gk20a_sync_pt_value_str_for_sema(pt, str, size);
-		return;
-	}
-
-	snprintf(str, size, "%d", pt->thresh);
-}
-
-static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
-	.driver_name = "nvgpu_semaphore",
-	.dup = gk20a_sync_pt_dup_inst,
-	.has_signaled = gk20a_sync_pt_has_signaled,
-	.compare = gk20a_sync_pt_compare,
-	.free_pt = gk20a_sync_pt_free_inst,
-	.timeline_value_str = gk20a_sync_timeline_value_str,
-	.pt_value_str = gk20a_sync_pt_value_str,
-};
-
-/* Public API */
-
-struct sync_fence *gk20a_sync_fence_fdget(int fd)
-{
-	struct sync_fence *fence = sync_fence_fdget(fd);
-	int i;
-
-	if (!fence)
-		return NULL;
-
-	for (i = 0; i < fence->num_fences; i++) {
-		struct fence *pt = fence->cbs[i].sync_pt;
-		struct sync_pt *spt = sync_pt_from_fence(pt);
-		struct sync_timeline *t;
-
-		if (spt == NULL) {
-			sync_fence_put(fence);
-			return NULL;
-		}
-
-		t = sync_pt_parent(spt);
-		if (t->ops != &gk20a_sync_timeline_ops) {
-			sync_fence_put(fence);
-			return NULL;
-		}
-	}
-
-	return fence;
-}
-
-struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
-{
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
-	struct nvgpu_semaphore *sema;
-
-	nvgpu_spinlock_acquire(&pt->lock);
-	sema = pt->sema;
-	if (sema)
-		nvgpu_semaphore_get(sema);
-	nvgpu_spinlock_release(&pt->lock);
-
-	return sema;
-}
-
-void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
-{
-	sync_timeline_signal(timeline, 0);
-}
-
-void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
-{
-	sync_timeline_destroy(timeline);
-}
-
-struct sync_timeline *gk20a_sync_timeline_create(
-		const char *name)
-{
-	struct gk20a_sync_timeline *obj;
-
-	obj = (struct gk20a_sync_timeline *)
-		sync_timeline_create(&gk20a_sync_timeline_ops,
-				     sizeof(struct gk20a_sync_timeline),
-				     name);
-	if (!obj)
-		return NULL;
-	obj->max = 0;
-	obj->min = 0;
-	return &obj->obj;
-}
-
-struct sync_fence *gk20a_sync_fence_create(
-		struct channel_gk20a *c,
-		struct nvgpu_semaphore *sema,
-		const char *fmt, ...)
-{
-	char name[30];
-	va_list args;
-	struct sync_pt *pt;
-	struct sync_fence *fence;
-	struct gk20a *g = c->g;
-
-	struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
-	struct nvgpu_os_fence_framework *fence_framework = NULL;
-	struct gk20a_sync_timeline *timeline = NULL;
-
-	fence_framework = &os_channel_priv->fence_framework;
-
-	timeline = to_gk20a_timeline(fence_framework->timeline);
-
-	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
-	if (pt == NULL)
-		return NULL;
-
-	va_start(args, fmt);
-	vsnprintf(name, sizeof(name), fmt, args);
-	va_end(args);
-
-	fence = sync_fence_create(name, pt);
-	if (fence == NULL) {
-		sync_pt_free(pt);
-		return NULL;
-	}
-	return fence;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
deleted file mode 100644
index 4fca7bed..00000000
--- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Semaphore Sync Framework Integration
- *
- * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _GK20A_SYNC_H_
-#define _GK20A_SYNC_H_
-
-struct sync_timeline;
-struct sync_fence;
-struct sync_pt;
-struct nvgpu_semaphore;
-struct fence;
-
-#ifdef CONFIG_SYNC
-struct sync_timeline *gk20a_sync_timeline_create(const char *name);
-void gk20a_sync_timeline_destroy(struct sync_timeline *);
-void gk20a_sync_timeline_signal(struct sync_timeline *);
-struct sync_fence *gk20a_sync_fence_create(
-		struct channel_gk20a *c,
-		struct nvgpu_semaphore *,
-		const char *fmt, ...);
-struct sync_fence *gk20a_sync_fence_fdget(int fd);
-struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
-#else
-static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
-static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
-static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
-{
-	return NULL;
-}
-static inline struct sync_timeline *gk20a_sync_timeline_create(
-	const char *name) {
-		return NULL;
-}
-#endif
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.c b/drivers/gpu/nvgpu/common/linux/sysfs.c
deleted file mode 100644
index e5995bb8..00000000
--- a/drivers/gpu/nvgpu/common/linux/sysfs.c
+++ /dev/null
@@ -1,1205 +0,0 @@
-/*
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/device.h>
-#include <linux/pm_runtime.h>
-#include <linux/fb.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/nvhost.h>
-
-#include "sysfs.h"
-#include "platform_gk20a.h"
-#include "gk20a/pmu_gk20a.h"
-#include "gk20a/gr_gk20a.h"
-#include "gv11b/gr_gv11b.h"
-
-#define PTIMER_FP_FACTOR			1000000
-
-#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
-
-static ssize_t elcg_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (val) {
-		g->elcg_enabled = true;
-		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
-	} else {
-		g->elcg_enabled = false;
-		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
-	}
-
-	gk20a_idle(g);
-
-	nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t elcg_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
-}
-
-static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store);
-
-static ssize_t blcg_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val)
-		g->blcg_enabled = true;
-	else
-		g->blcg_enabled = false;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (g->ops.clock_gating.blcg_bus_load_gating_prod)
-		g->ops.clock_gating.blcg_bus_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ce_load_gating_prod)
-		g->ops.clock_gating.blcg_ce_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod)
-		g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_fifo_load_gating_prod)
-		g->ops.clock_gating.blcg_fifo_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_gr_load_gating_prod)
-		g->ops.clock_gating.blcg_gr_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
-		g->ops.clock_gating.blcg_pmu_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_xbar_load_gating_prod)
-		g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
-				g->blcg_enabled);
-	gk20a_idle(g);
-
-	nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t blcg_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
-}
-
-
-static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store);
-
-static ssize_t slcg_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val)
-		g->slcg_enabled = true;
-	else
-		g->slcg_enabled = false;
-
-	/*
-	 * TODO: slcg_therm_load_gating is not enabled anywhere during
-	 * init. Therefore, it would be incongruous to add it here. Once
-	 * it is added to init, we should add it here too.
-	 */
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (g->ops.clock_gating.slcg_bus_load_gating_prod)
-		g->ops.clock_gating.slcg_bus_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ce2_load_gating_prod)
-		g->ops.clock_gating.slcg_ce2_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_chiplet_load_gating_prod)
-		g->ops.clock_gating.slcg_chiplet_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod)
-		g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_fifo_load_gating_prod)
-		g->ops.clock_gating.slcg_fifo_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_gr_load_gating_prod)
-		g->ops.clock_gating.slcg_gr_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_perf_load_gating_prod)
-		g->ops.clock_gating.slcg_perf_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_priring_load_gating_prod)
-		g->ops.clock_gating.slcg_priring_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_pmu_load_gating_prod)
-		g->ops.clock_gating.slcg_pmu_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_xbar_load_gating_prod)
-		g->ops.clock_gating.slcg_xbar_load_gating_prod(g,
-				g->slcg_enabled);
-	gk20a_idle(g);
-
-	nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t slcg_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
-}
-
-static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store);
-
-static ssize_t ptimer_scale_factor_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	u32 src_freq_hz = platform->ptimer_src_freq;
-	u32 scaling_factor_fp;
-	ssize_t res;
-
-	if (!src_freq_hz) {
-		nvgpu_err(g, "reference clk_m rate is not set correctly");
-		return -EINVAL;
-	}
-
-	scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
-				((u32)(src_freq_hz) /
-				(u32)(PTIMER_FP_FACTOR));
-	res = snprintf(buf,
-				PAGE_SIZE,
-				"%u.%u\n",
-				scaling_factor_fp / PTIMER_FP_FACTOR,
-				scaling_factor_fp % PTIMER_FP_FACTOR);
-
-	return res;
-
-}
-
-static DEVICE_ATTR(ptimer_scale_factor,
-			S_IRUGO,
-			ptimer_scale_factor_show,
-			NULL);
-
-static ssize_t ptimer_ref_freq_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	u32 src_freq_hz = platform->ptimer_src_freq;
-	ssize_t res;
-
-	if (!src_freq_hz) {
-		nvgpu_err(g, "reference clk_m rate is not set correctly");
-		return -EINVAL;
-	}
-
-	res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
-
-	return res;
-
-}
-
-static DEVICE_ATTR(ptimer_ref_freq,
-			S_IRUGO,
-			ptimer_ref_freq_show,
-			NULL);
-
-static ssize_t ptimer_src_freq_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	u32 src_freq_hz = platform->ptimer_src_freq;
-	ssize_t res;
-
-	if (!src_freq_hz) {
-		nvgpu_err(g, "reference clk_m rate is not set correctly");
-		return -EINVAL;
-	}
-
-	res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz);
-
-	return res;
-
-}
-
-static DEVICE_ATTR(ptimer_src_freq,
-			S_IRUGO,
-			ptimer_src_freq_show,
-			NULL);
-
-
-#if defined(CONFIG_PM)
-static ssize_t railgate_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	unsigned long railgate_enable = 0;
-	/* dev is guaranteed to be valid here. Ok to de-reference */
-	struct gk20a *g = get_gk20a(dev);
-	int err;
-
-	if (kstrtoul(buf, 10, &railgate_enable) < 0)
-		return -EINVAL;
-
-	if (railgate_enable && !g->can_railgate) {
-		g->can_railgate = true;
-		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
-	} else if (railgate_enable == 0 && g->can_railgate) {
-		g->can_railgate = false;
-		pm_runtime_set_autosuspend_delay(dev, -1);
-	}
-	/* wake-up system to make rail-gating setting effective */
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-	gk20a_idle(g);
-
-	nvgpu_info(g, "railgate is %s.", g->can_railgate ?
-		"enabled" : "disabled");
-
-	return count;
-}
-
-static ssize_t railgate_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0);
-}
-
-static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read,
-			railgate_enable_store);
-#endif
-
-static ssize_t railgate_delay_store(struct device *dev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t count)
-{
-	int railgate_delay = 0, ret = 0;
-	struct gk20a *g = get_gk20a(dev);
-	int err;
-
-	if (!g->can_railgate) {
-		nvgpu_info(g, "does not support power-gating");
-		return count;
-	}
-
-	ret = sscanf(buf, "%d", &railgate_delay);
-	if (ret == 1 && railgate_delay >= 0) {
-		g->railgate_delay = railgate_delay;
-		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
-	} else
-		nvgpu_err(g, "Invalid powergate delay");
-
-	/* wake-up system to make rail-gating delay effective immediately */
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-	gk20a_idle(g);
-
-	return count;
-}
-static ssize_t railgate_delay_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay);
-}
-static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show,
-		   railgate_delay_store);
-
-static ssize_t is_railgated_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	bool is_railgated = 0;
-
-	if (platform->is_railgated)
-		is_railgated = platform->is_railgated(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
-}
-static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL);
-
-static ssize_t counters_show(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	u32 busy_cycles, total_cycles;
-	ssize_t res;
-
-	nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
-
-	res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
-
-	return res;
-}
-static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
-
-static ssize_t counters_show_reset(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	ssize_t res = counters_show(dev, attr, buf);
-	struct gk20a *g = get_gk20a(dev);
-
-	nvgpu_pmu_reset_load_counters(g);
-
-	return res;
-}
-static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
-
-static ssize_t gk20a_load_show(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	u32 busy_time;
-	ssize_t res;
-	int err;
-
-	if (!g->power_on) {
-		busy_time = 0;
-	} else {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		nvgpu_pmu_load_update(g);
-		nvgpu_pmu_load_norm(g, &busy_time);
-		gk20a_idle(g);
-	}
-
-	res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
-
-	return res;
-}
-static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL);
-
-static ssize_t elpg_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (!g->power_on) {
-		g->elpg_enabled = val ? true : false;
-	} else {
-		err = gk20a_busy(g);
-		if (err)
-			return -EAGAIN;
-		/*
-		 * Since elpg is refcounted, we should not unnecessarily call
-		 * enable/disable if it is already so.
-		 */
-		if (val && !g->elpg_enabled) {
-			g->elpg_enabled = true;
-			nvgpu_pmu_pg_global_enable(g, true);
-
-		} else if (!val && g->elpg_enabled) {
-			if (g->ops.pmu.pmu_pg_engines_feature_list &&
-				g->ops.pmu.pmu_pg_engines_feature_list(g,
-				PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
-				NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
-				nvgpu_pmu_pg_global_enable(g, false);
-				g->elpg_enabled = false;
-			} else {
-				g->elpg_enabled = false;
-				nvgpu_pmu_pg_global_enable(g, false);
-			}
-		}
-		gk20a_idle(g);
-	}
-	nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t elpg_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0);
-}
-
-static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store);
-
-static ssize_t ldiv_slowdown_factor_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0) {
-		nvgpu_err(g, "parse error for input SLOWDOWN factor\n");
-		return -EINVAL;
-	}
-
-	if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) {
-		nvgpu_err(g, "Invalid SLOWDOWN factor\n");
-		return -EINVAL;
-	}
-
-	if (val == g->ldiv_slowdown_factor)
-		return count;
-
-	if (!g->power_on) {
-		g->ldiv_slowdown_factor = val;
-	} else {
-		err = gk20a_busy(g);
-		if (err)
-			return -EAGAIN;
-
-		g->ldiv_slowdown_factor = val;
-
-		if (g->ops.pmu.pmu_pg_init_param)
-			g->ops.pmu.pmu_pg_init_param(g,
-				PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
-
-		gk20a_idle(g);
-	}
-
-	nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor);
-
-	return count;
-}
-
-static ssize_t ldiv_slowdown_factor_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
-}
-
-static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW,
-			ldiv_slowdown_factor_read, ldiv_slowdown_factor_store);
-
-static ssize_t mscg_enable_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_pmu *pmu = &g->pmu;
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (!g->power_on) {
-		g->mscg_enabled = val ? true : false;
-	} else {
-		err = gk20a_busy(g);
-		if (err)
-			return -EAGAIN;
-		/*
-		 * Since elpg is refcounted, we should not unnecessarily call
-		 * enable/disable if it is already so.
-		 */
-		if (val && !g->mscg_enabled) {
-			g->mscg_enabled = true;
-			if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
-					PMU_PG_LPWR_FEATURE_MSCG)) {
-				if (!ACCESS_ONCE(pmu->mscg_stat)) {
-					WRITE_ONCE(pmu->mscg_stat,
-						PMU_MSCG_ENABLED);
-					/* make status visible */
-					smp_mb();
-				}
-			}
-
-		} else if (!val && g->mscg_enabled) {
-			if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
-					PMU_PG_LPWR_FEATURE_MSCG)) {
-				nvgpu_pmu_pg_global_enable(g, false);
-				WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
-				/* make status visible */
-				smp_mb();
-				g->mscg_enabled = false;
-				if (g->elpg_enabled)
-					nvgpu_pmu_pg_global_enable(g, true);
-			}
-			g->mscg_enabled = false;
-		}
-		gk20a_idle(g);
-	}
-	nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t mscg_enable_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
-}
-
-static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store);
-
-static ssize_t aelpg_param_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int status = 0;
-	union pmu_ap_cmd ap_cmd;
-	int *paramlist = (int *)g->pmu.aelpg_param;
-	u32 defaultparam[5] = {
-			APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US,
-			APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US,
-			APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US,
-			APCTRL_POWER_BREAKEVEN_DEFAULT_US,
-			APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT
-	};
-
-	/* Get each parameter value from input string*/
-	sscanf(buf, "%d %d %d %d %d", &paramlist[0], &paramlist[1],
-				&paramlist[2], &paramlist[3], &paramlist[4]);
-
-	/* If parameter value is 0 then reset to SW default values*/
-	if ((paramlist[0] | paramlist[1] | paramlist[2]
-		| paramlist[3] | paramlist[4]) == 0x00) {
-		memcpy(paramlist, defaultparam, sizeof(defaultparam));
-	}
-
-	/* If aelpg is enabled & pmu is ready then post values to
-	 * PMU else store then post later
-	 */
-	if (g->aelpg_enabled && g->pmu.pmu_ready) {
-		/* Disable AELPG */
-		ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
-		ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
-		status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
-
-		/* Enable AELPG */
-		nvgpu_aelpg_init(g);
-		nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
-	}
-
-	return count;
-}
-
-static ssize_t aelpg_param_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE,
-		"%d %d %d %d %d\n", g->pmu.aelpg_param[0],
-		g->pmu.aelpg_param[1], g->pmu.aelpg_param[2],
-		g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]);
-}
-
-static DEVICE_ATTR(aelpg_param, ROOTRW,
-		aelpg_param_read, aelpg_param_store);
-
-static ssize_t aelpg_enable_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int status = 0;
-	union pmu_ap_cmd ap_cmd;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (g->pmu.pmu_ready) {
-		if (val && !g->aelpg_enabled) {
-			g->aelpg_enabled = true;
-			/* Enable AELPG */
-			ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL;
-			ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
-			status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
-		} else if (!val && g->aelpg_enabled) {
-			g->aelpg_enabled = false;
-			/* Disable AELPG */
-			ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
-			ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
-			status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
-		}
-	} else {
-		nvgpu_info(g, "PMU is not ready, AELPG request failed");
-	}
-	gk20a_idle(g);
-
-	nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" :
-			"disabled");
-
-	return count;
-}
-
-static ssize_t aelpg_enable_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
-}
-
-static DEVICE_ATTR(aelpg_enable, ROOTRW,
-		aelpg_enable_read, aelpg_enable_store);
-
-
-static ssize_t allow_all_enable_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
-}
-
-static ssize_t allow_all_enable_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	err = gk20a_busy(g);
-	g->allow_all = (val ? true : false);
-	gk20a_idle(g);
-
-	return count;
-}
-
-static DEVICE_ATTR(allow_all, ROOTRW,
-		allow_all_enable_read, allow_all_enable_store);
-
-static ssize_t emc3d_ratio_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	g->emc3d_ratio = val;
-
-	return count;
-}
-
-static ssize_t emc3d_ratio_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio);
-}
-
-static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
-
-static ssize_t fmax_at_vmin_safe_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long gpu_fmax_at_vmin_hz = 0;
-
-	if (g->ops.clk.get_fmax_at_vmin_safe)
-		gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
-}
-
-static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL);
-
-#ifdef CONFIG_PM
-static ssize_t force_idle_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-	int err = 0;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val) {
-		if (g->forced_idle)
-			return count; /* do nothing */
-		else {
-			err = __gk20a_do_idle(g, false);
-			if (!err) {
-				g->forced_idle = 1;
-				nvgpu_info(g, "gpu is idle : %d",
-					g->forced_idle);
-			}
-		}
-	} else {
-		if (!g->forced_idle)
-			return count; /* do nothing */
-		else {
-			err = __gk20a_do_unidle(g);
-			if (!err) {
-				g->forced_idle = 0;
-				nvgpu_info(g, "gpu is idle : %d",
-					g->forced_idle);
-			}
-		}
-	}
-
-	return count;
-}
-
-static ssize_t force_idle_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
-}
-
-static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
-#endif
-
-static ssize_t tpc_fs_mask_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val = 0;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (!g->gr.gpc_tpc_mask)
-		return -ENODEV;
-
-	if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
-		g->gr.gpc_tpc_mask[0] = val;
-		g->tpc_fs_mask_user = val;
-
-		g->ops.gr.set_gpc_tpc_mask(g, 0);
-
-		nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image);
-		g->gr.ctx_vars.local_golden_image = NULL;
-		g->gr.ctx_vars.golden_image_initialized = false;
-		g->gr.ctx_vars.golden_image_size = 0;
-		/* Cause next poweron to reinit just gr */
-		g->gr.sw_ready = false;
-	}
-
-	return count;
-}
-
-static ssize_t tpc_fs_mask_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gr_gk20a *gr = &g->gr;
-	u32 gpc_index;
-	u32 tpc_fs_mask = 0;
-	int err = 0;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		if (g->ops.gr.get_gpc_tpc_mask)
-			tpc_fs_mask |=
-				g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
-				(gr->max_tpc_per_gpc_count * gpc_index);
-	}
-
-	gk20a_idle(g);
-
-	return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask);
-}
-
-static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
-
-static ssize_t min_timeslice_us_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us);
-}
-
-static ssize_t min_timeslice_us_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val > g->max_timeslice_us)
-		return -EINVAL;
-
-	g->min_timeslice_us = val;
-
-	return count;
-}
-
-static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read,
-		   min_timeslice_us_store);
-
-static ssize_t max_timeslice_us_read(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us);
-}
-
-static ssize_t max_timeslice_us_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val < g->min_timeslice_us)
-		return -EINVAL;
-
-	g->max_timeslice_us = val;
-
-	return count;
-}
-
-static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read,
-		   max_timeslice_us_store);
-
-static ssize_t czf_bypass_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val >= 4)
-		return -EINVAL;
-
-	g->gr.czf_bypass = val;
-
-	return count;
-}
-
-static ssize_t czf_bypass_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return sprintf(buf, "%d\n", g->gr.czf_bypass);
-}
-
-static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
-
-static ssize_t pd_max_batches_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (val > 64)
-		return -EINVAL;
-
-	g->gr.pd_max_batches = val;
-
-	return count;
-}
-
-static ssize_t pd_max_batches_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	return sprintf(buf, "%d\n", g->gr.pd_max_batches);
-}
-
-static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store);
-
-static ssize_t gfxp_wfi_timeout_count_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gr_gk20a *gr = &g->gr;
-	unsigned long val = 0;
-	int err = -1;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	if (g->ops.gr.get_max_gfxp_wfi_timeout_count) {
-		if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g))
-			return -EINVAL;
-	}
-
-	gr->gfxp_wfi_timeout_count = val;
-
-	if (g->ops.gr.init_preemption_state && g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		err = gr_gk20a_elpg_protected_call(g,
-			g->ops.gr.init_preemption_state(g));
-
-		gk20a_idle(g);
-
-		if (err)
-			return err;
-	}
-	return count;
-}
-
-static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gr_gk20a *gr = &g->gr;
-	int err = -1;
-
-	if (count > 0 && buf[0] == 's')
-		/* sysclk */
-		gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK;
-	else
-		/* usec */
-		gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC;
-
-	if (g->ops.gr.init_preemption_state && g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		err = gr_gk20a_elpg_protected_call(g,
-			g->ops.gr.init_preemption_state(g));
-
-		gk20a_idle(g);
-
-		if (err)
-			return err;
-	}
-
-	return count;
-}
-
-static ssize_t gfxp_wfi_timeout_count_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gr_gk20a *gr = &g->gr;
-	u32 val = gr->gfxp_wfi_timeout_count;
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
-static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gr_gk20a *gr = &g->gr;
-
-	if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC)
-		return snprintf(buf, PAGE_SIZE, "usec\n");
-	else
-		return snprintf(buf, PAGE_SIZE, "sysclk\n");
-}
-
-static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH),
-		gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store);
-
-static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH),
-		gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store);
-
-void nvgpu_remove_sysfs(struct device *dev)
-{
-	device_remove_file(dev, &dev_attr_elcg_enable);
-	device_remove_file(dev, &dev_attr_blcg_enable);
-	device_remove_file(dev, &dev_attr_slcg_enable);
-	device_remove_file(dev, &dev_attr_ptimer_scale_factor);
-	device_remove_file(dev, &dev_attr_ptimer_ref_freq);
-	device_remove_file(dev, &dev_attr_ptimer_src_freq);
-	device_remove_file(dev, &dev_attr_elpg_enable);
-	device_remove_file(dev, &dev_attr_mscg_enable);
-	device_remove_file(dev, &dev_attr_emc3d_ratio);
-	device_remove_file(dev, &dev_attr_ldiv_slowdown_factor);
-
-	device_remove_file(dev, &dev_attr_fmax_at_vmin_safe);
-
-	device_remove_file(dev, &dev_attr_counters);
-	device_remove_file(dev, &dev_attr_counters_reset);
-	device_remove_file(dev, &dev_attr_load);
-	device_remove_file(dev, &dev_attr_railgate_delay);
-	device_remove_file(dev, &dev_attr_is_railgated);
-#ifdef CONFIG_PM
-	device_remove_file(dev, &dev_attr_force_idle);
-	device_remove_file(dev, &dev_attr_railgate_enable);
-#endif
-	device_remove_file(dev, &dev_attr_aelpg_param);
-	device_remove_file(dev, &dev_attr_aelpg_enable);
-	device_remove_file(dev, &dev_attr_allow_all);
-	device_remove_file(dev, &dev_attr_tpc_fs_mask);
-	device_remove_file(dev, &dev_attr_min_timeslice_us);
-	device_remove_file(dev, &dev_attr_max_timeslice_us);
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	nvgpu_nvhost_remove_symlink(get_gk20a(dev));
-#endif
-
-	device_remove_file(dev, &dev_attr_czf_bypass);
-	device_remove_file(dev, &dev_attr_pd_max_batches);
-	device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count);
-	device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
-
-	if (strcmp(dev_name(dev), "gpu.0")) {
-		struct kobject *kobj = &dev->kobj;
-		struct device *parent = container_of((kobj->parent),
-				struct device, kobj);
-		sysfs_remove_link(&parent->kobj, "gpu.0");
-	}
-}
-
-int nvgpu_create_sysfs(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int error = 0;
-
-	error |= device_create_file(dev, &dev_attr_elcg_enable);
-	error |= device_create_file(dev, &dev_attr_blcg_enable);
-	error |= device_create_file(dev, &dev_attr_slcg_enable);
-	error |= device_create_file(dev, &dev_attr_ptimer_scale_factor);
-	error |= device_create_file(dev, &dev_attr_ptimer_ref_freq);
-	error |= device_create_file(dev, &dev_attr_ptimer_src_freq);
-	error |= device_create_file(dev, &dev_attr_elpg_enable);
-	error |= device_create_file(dev, &dev_attr_mscg_enable);
-	error |= device_create_file(dev, &dev_attr_emc3d_ratio);
-	error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor);
-
-	error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe);
-
-	error |= device_create_file(dev, &dev_attr_counters);
-	error |= device_create_file(dev, &dev_attr_counters_reset);
-	error |= device_create_file(dev, &dev_attr_load);
-	error |= device_create_file(dev, &dev_attr_railgate_delay);
-	error |= device_create_file(dev, &dev_attr_is_railgated);
-#ifdef CONFIG_PM
-	error |= device_create_file(dev, &dev_attr_force_idle);
-	error |= device_create_file(dev, &dev_attr_railgate_enable);
-#endif
-	error |= device_create_file(dev, &dev_attr_aelpg_param);
-	error |= device_create_file(dev, &dev_attr_aelpg_enable);
-	error |= device_create_file(dev, &dev_attr_allow_all);
-	error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
-	error |= device_create_file(dev, &dev_attr_min_timeslice_us);
-	error |= device_create_file(dev, &dev_attr_max_timeslice_us);
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	error |= nvgpu_nvhost_create_symlink(g);
-#endif
-
-	error |= device_create_file(dev, &dev_attr_czf_bypass);
-	error |= device_create_file(dev, &dev_attr_pd_max_batches);
-	error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count);
-	error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
-
-	if (strcmp(dev_name(dev), "gpu.0")) {
-		struct kobject *kobj = &dev->kobj;
-		struct device *parent = container_of((kobj->parent),
-					struct device, kobj);
-		error |= sysfs_create_link(&parent->kobj,
-				   &dev->kobj, "gpu.0");
-	}
-
-	if (error)
-		nvgpu_err(g, "Failed to create sysfs attributes!\n");
-
-	return error;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.h b/drivers/gpu/nvgpu/common/linux/sysfs.h
deleted file mode 100644
index 80925844..00000000
--- a/drivers/gpu/nvgpu/common/linux/sysfs.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef NVGPU_SYSFS_H
-#define NVGPU_SYSFS_H
-
-struct device;
-
-int nvgpu_create_sysfs(struct device *dev);
-void nvgpu_remove_sysfs(struct device *dev);
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/thread.c b/drivers/gpu/nvgpu/common/linux/thread.c
deleted file mode 100644
index 92c556f2..00000000
--- a/drivers/gpu/nvgpu/common/linux/thread.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/kthread.h>
-
-#include <nvgpu/thread.h>
-
-int nvgpu_thread_proxy(void *threaddata)
-{
-	struct nvgpu_thread *thread = threaddata;
-	int ret = thread->fn(thread->data);
-
-	thread->running = false;
-	return ret;
-}
-
-int nvgpu_thread_create(struct nvgpu_thread *thread,
-		void *data,
-		int (*threadfn)(void *data), const char *name)
-{
-	struct task_struct *task = kthread_create(nvgpu_thread_proxy,
-			thread, name);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-
-	thread->task = task;
-	thread->fn = threadfn;
-	thread->data = data;
-	thread->running = true;
-	wake_up_process(task);
-	return 0;
-};
-
-void nvgpu_thread_stop(struct nvgpu_thread *thread)
-{
-	if (thread->task) {
-		kthread_stop(thread->task);
-		thread->task = NULL;
-	}
-};
-
-bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
-{
-	return kthread_should_stop();
-};
-
-bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
-{
-	return ACCESS_ONCE(thread->running);
-};
diff --git a/drivers/gpu/nvgpu/common/linux/timers.c b/drivers/gpu/nvgpu/common/linux/timers.c
deleted file mode 100644
index d1aa641f..00000000
--- a/drivers/gpu/nvgpu/common/linux/timers.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/ktime.h>
-#include <linux/delay.h>
-
-#include <nvgpu/timers.h>
-#include <nvgpu/soc.h>
-
-#include "gk20a/gk20a.h"
-
-#include "platform_gk20a.h"
-
-/*
- * Returns 1 if the platform is pre-Si and should ignore the timeout checking.
- * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the
- * timeout check regardless of platform).
- */
-static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout)
-{
-	if (timeout->flags & NVGPU_TIMER_NO_PRE_SI)
-		return 0;
-
-	return !nvgpu_platform_is_silicon(timeout->g);
-}
-
-/**
- * nvgpu_timeout_init - Init timer.
- *
- * @g        - nvgpu device.
- * @timeout  - The timer.
- * @duration - Timeout in milliseconds or number of retries.
- * @flags    - Flags for timer.
- *
- * This configures the timeout to start the timeout duration now, i.e: when this
- * function is called. Available flags to pass to @flags:
- *
- *   %NVGPU_TIMER_CPU_TIMER
- *   %NVGPU_TIMER_RETRY_TIMER
- *   %NVGPU_TIMER_NO_PRE_SI
- *   %NVGPU_TIMER_SILENT_TIMEOUT
- *
- * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then
- * a CPU timer is used by default.
- */
-int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout,
-		       u32 duration, unsigned long flags)
-{
-	if (flags & ~NVGPU_TIMER_FLAG_MASK)
-		return -EINVAL;
-
-	memset(timeout, 0, sizeof(*timeout));
-
-	timeout->g = g;
-	timeout->flags = flags;
-
-	if (flags & NVGPU_TIMER_RETRY_TIMER)
-		timeout->retries.max = duration;
-	else
-		timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(),
-					(s64)NSEC_PER_MSEC * duration));
-
-	return 0;
-}
-
-static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout,
-					 void *caller,
-					 const char *fmt, va_list args)
-{
-	struct gk20a *g = timeout->g;
-	ktime_t now = ktime_get();
-
-	if (nvgpu_timeout_is_pre_silicon(timeout))
-		return 0;
-
-	if (ktime_after(now, ns_to_ktime(timeout->time))) {
-		if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
-			char buf[128];
-
-			vsnprintf(buf, sizeof(buf), fmt, args);
-
-			nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf);
-		}
-
-		return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
-static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout,
-					   void *caller,
-					   const char *fmt, va_list args)
-{
-	struct gk20a *g = timeout->g;
-
-	if (nvgpu_timeout_is_pre_silicon(timeout))
-		return 0;
-
-	if (timeout->retries.attempted >= timeout->retries.max) {
-		if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
-			char buf[128];
-
-			vsnprintf(buf, sizeof(buf), fmt, args);
-
-			nvgpu_err(g, "No more retries @ %pF %s", caller, buf);
-		}
-
-		return -ETIMEDOUT;
-	}
-
-	timeout->retries.attempted++;
-
-	return 0;
-}
-
-/**
- * __nvgpu_timeout_expired_msg - Check if a timeout has expired.
- *
- * @timeout - The timeout to check.
- * @caller  - Address of the caller of this function.
- * @fmt     - The fmt string.
- *
- * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise.
- *
- * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout
- * then a message is printed based on %fmt.
- */
-int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout,
-			      void *caller, const char *fmt, ...)
-{
-	int ret;
-	va_list args;
-
-	va_start(args, fmt);
-	if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
-		ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt,
-						      args);
-	else
-		ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt,
-						    args);
-	va_end(args);
-
-	return ret;
-}
-
-/**
- * nvgpu_timeout_peek_expired - Check the status of a timeout.
- *
- * @timeout - The timeout to check.
- *
- * Returns non-zero if the timeout is expired, zero otherwise. In the case of
- * retry timers this will not increment the underlying retry count. Also if the
- * timer has expired no messages will be printed.
- *
- * This function honors the pre-Si check as well.
- */
-int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout)
-{
-	if (nvgpu_timeout_is_pre_silicon(timeout))
-		return 0;
-
-	if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
-		return timeout->retries.attempted >= timeout->retries.max;
-	else
-		return ktime_after(ktime_get(), ns_to_ktime(timeout->time));
-}
-
-/**
- * nvgpu_udelay - Delay for some number of microseconds.
- *
- * @usecs - Microseconds to wait for.
- *
- * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly
- * accurate. This is normally backed by a busy-loop so this means waits should
- * be kept short, below 100us. If longer delays are necessary then
- * nvgpu_msleep() should be preferred.
- *
- * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This
- * function will attempt to not use a busy-loop.
- */
-void nvgpu_udelay(unsigned int usecs)
-{
-	udelay(usecs);
-}
-
-/**
- * nvgpu_usleep_range - Sleep for a range of microseconds.
- *
- * @min_us - Minimum wait time.
- * @max_us - Maximum wait time.
- *
- * Wait for some number of microseconds between @min_us and @max_us. This,
- * unlike nvgpu_udelay(), will attempt to sleep for the passed number of
- * microseconds instead of busy looping. Not all platforms support this,
- * and in that case this reduces to nvgpu_udelay(min_us).
- *
- * Linux note: this is not safe to use in atomic context. If you are in
- * atomic context you must use nvgpu_udelay().
- */
-void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us)
-{
-	usleep_range(min_us, max_us);
-}
-
-/**
- * nvgpu_msleep - Sleep for some milliseconds.
- *
- * @msecs - Sleep for at least this many milliseconds.
- *
- * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms
- * or so) the sleep will be significantly longer due to scheduling overhead and
- * mechanics.
- */
-void nvgpu_msleep(unsigned int msecs)
-{
-	msleep(msecs);
-}
-
-/**
- * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock.
- *
- * Return a clock in millisecond units. The start time of the clock is
- * unspecified; the time returned can be compared with older ones to measure
- * durations. The source clock does not jump when the system clock is adjusted.
- */
-s64 nvgpu_current_time_ms(void)
-{
-	return ktime_to_ms(ktime_get());
-}
-
-/**
- * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock.
- *
- * Return a clock in nanosecond units. The start time of the clock is
- * unspecified; the time returned can be compared with older ones to measure
- * durations. The source clock does not jump when the system clock is adjusted.
- */
-s64 nvgpu_current_time_ns(void)
-{
-	return ktime_to_ns(ktime_get());
-}
-
-/**
- * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp.
- *
- * Return a "high resolution" time stamp. It does not really matter exactly what
- * it is, so long as it generally returns unique values and monotonically
- * increases - wrap around _is_ possible though in a system running for long
- * enough.
- *
- * Note: what high resolution means is system dependent.
- */
-u64 nvgpu_hr_timestamp(void)
-{
-	return get_cycles();
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
deleted file mode 100644
index 0858e6b1..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Virtualized GPU Clock Interface
- *
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/vgpu/vgpu.h>
-
-#include "gk20a/gk20a.h"
-#include "clk_vgpu.h"
-#include "ctrl/ctrlclk.h"
-#include "common/linux/platform_gk20a.h"
-
-static unsigned long
-vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE];
-
-static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain)
-{
-	struct tegra_vgpu_cmd_msg msg = {};
-	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
-	int err;
-	unsigned long ret = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	switch (api_domain) {
-	case CTRL_CLK_DOMAIN_GPCCLK:
-		msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE;
-		msg.handle = vgpu_get_handle(g);
-		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-		err = err ? err : msg.ret;
-		if (err)
-			nvgpu_err(g, "%s failed - %d", __func__, err);
-		else
-			/* return frequency in Hz */
-			ret = p->rate * 1000;
-		break;
-	case CTRL_CLK_DOMAIN_PWRCLK:
-		nvgpu_err(g, "unsupported clock: %u", api_domain);
-		break;
-	default:
-		nvgpu_err(g, "unknown clock: %u", api_domain);
-		break;
-	}
-
-	return ret;
-}
-
-static int vgpu_clk_set_rate(struct gk20a *g,
-				u32 api_domain, unsigned long rate)
-{
-	struct tegra_vgpu_cmd_msg msg = {};
-	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
-	int err = -EINVAL;
-
-	nvgpu_log_fn(g, " ");
-
-	switch (api_domain) {
-	case CTRL_CLK_DOMAIN_GPCCLK:
-		msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
-		msg.handle = vgpu_get_handle(g);
-
-		/* server dvfs framework requires frequency in kHz */
-		p->rate = (u32)(rate / 1000);
-		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-		err = err ? err : msg.ret;
-		if (err)
-			nvgpu_err(g, "%s failed - %d", __func__, err);
-		break;
-	case CTRL_CLK_DOMAIN_PWRCLK:
-		nvgpu_err(g, "unsupported clock: %u", api_domain);
-		break;
-	default:
-		nvgpu_err(g, "unknown clock: %u", api_domain);
-		break;
-	}
-
-	return err;
-}
-
-static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain)
-{
-	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
-
-	return priv->constants.max_freq;
-}
-
-void vgpu_init_clk_support(struct gk20a *g)
-{
-	g->ops.clk.get_rate = vgpu_clk_get_rate;
-	g->ops.clk.set_rate = vgpu_clk_set_rate;
-	g->ops.clk.get_maxrate = vgpu_clk_get_maxrate;
-}
-
-long vgpu_clk_round_rate(struct device *dev, unsigned long rate)
-{
-	/* server will handle frequency rounding */
-	return rate;
-}
-
-int vgpu_clk_get_freqs(struct device *dev,
-		unsigned long **freqs, int *num_freqs)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-	struct tegra_vgpu_cmd_msg msg = {};
-	struct tegra_vgpu_get_gpu_freq_table_params *p =
-					&msg.params.get_gpu_freq_table;
-	unsigned int i;
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE;
-	msg.handle = vgpu_get_handle(g);
-
-	p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE;
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	if (err) {
-		nvgpu_err(g, "%s failed - %d", __func__, err);
-		return err;
-	}
-
-	/* return frequency in Hz */
-	for (i = 0; i < p->num_freqs; i++)
-		vgpu_freq_table[i] = p->freqs[i] * 1000;
-
-	*freqs = vgpu_freq_table;
-	*num_freqs = p->num_freqs;
-
-	return 0;
-}
-
-int vgpu_clk_cap_rate(struct device *dev, unsigned long rate)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-	struct tegra_vgpu_cmd_msg msg = {};
-	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE;
-	msg.handle = vgpu_get_handle(g);
-	p->rate = (u32)rate;
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	if (err) {
-		nvgpu_err(g, "%s failed - %d", __func__, err);
-		return err;
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
deleted file mode 100644
index 8d477643..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Virtualized GPU Clock Interface
- *
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _CLK_VIRT_H_
-#define _CLK_VIRT_H_
-
-void vgpu_init_clk_support(struct gk20a *g);
-long vgpu_clk_round_rate(struct device *dev, unsigned long rate);
-int vgpu_clk_get_freqs(struct device *dev,
-			unsigned long **freqs, int *num_freqs);
-int vgpu_clk_cap_rate(struct device *dev, unsigned long rate);
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
deleted file mode 100644
index 499a8eb4..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/ctxsw_trace.h>
-#include <nvgpu/vgpu/vgpu_ivm.h>
-#include <nvgpu/vgpu/tegra_vgpu.h>
-#include <nvgpu/vgpu/vgpu.h>
-
-#include "gk20a/gk20a.h"
-#include "common/linux/os_linux.h"
-#include "vgpu/fecs_trace_vgpu.h"
-
-struct vgpu_fecs_trace {
-	struct tegra_hv_ivm_cookie *cookie;
-	struct nvgpu_ctxsw_ring_header *header;
-	struct nvgpu_ctxsw_trace_entry *entries;
-	int num_entries;
-	bool enabled;
-	void *buf;
-};
-
-int vgpu_fecs_trace_init(struct gk20a *g)
-{
-	struct device *dev = dev_from_gk20a(g);
-	struct device_node *np = dev->of_node;
-	struct of_phandle_args args;
-	struct vgpu_fecs_trace *vcst;
-	u32 mempool;
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	vcst = nvgpu_kzalloc(g, sizeof(*vcst));
-	if (!vcst)
-		return -ENOMEM;
-
-	err = of_parse_phandle_with_fixed_args(np,
-			"mempool-fecs-trace", 1, 0, &args);
-	if (err) {
-		nvgpu_info(g, "does not support fecs trace");
-		goto fail;
-	}
-	__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
-
-	mempool = args.args[0];
-	vcst->cookie = vgpu_ivm_mempool_reserve(mempool);
-	if (IS_ERR(vcst->cookie)) {
-		nvgpu_info(g,
-			"mempool  %u reserve failed", mempool);
-		vcst->cookie = NULL;
-		err = -EINVAL;
-		goto fail;
-	}
-
-	vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie),
-			vgpu_ivm_get_size(vcst->cookie));
-	if (!vcst->buf) {
-		nvgpu_info(g, "ioremap_cache failed");
-		err = -EINVAL;
-		goto fail;
-	}
-	vcst->header = vcst->buf;
-	vcst->num_entries = vcst->header->num_ents;
-	if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) {
-		nvgpu_err(g, "entry size mismatch");
-		goto fail;
-	}
-	vcst->entries = vcst->buf + sizeof(*vcst->header);
-	g->fecs_trace = (struct gk20a_fecs_trace *)vcst;
-
-	return 0;
-fail:
-	iounmap(vcst->buf);
-	if (vcst->cookie)
-		vgpu_ivm_mempool_unreserve(vcst->cookie);
-	nvgpu_kfree(g, vcst);
-	return err;
-}
-
-int vgpu_fecs_trace_deinit(struct gk20a *g)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-
-	iounmap(vcst->buf);
-	vgpu_ivm_mempool_unreserve(vcst->cookie);
-	nvgpu_kfree(g, vcst);
-	return 0;
-}
-
-int vgpu_fecs_trace_enable(struct gk20a *g)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-	struct tegra_vgpu_cmd_msg msg = {
-		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
-		.handle = vgpu_get_handle(g),
-	};
-	int err;
-
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	WARN_ON(err);
-	vcst->enabled = !err;
-	return err;
-}
-
-int vgpu_fecs_trace_disable(struct gk20a *g)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-	struct tegra_vgpu_cmd_msg msg = {
-		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
-		.handle = vgpu_get_handle(g),
-	};
-	int err;
-
-	vcst->enabled = false;
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	WARN_ON(err);
-	return err;
-}
-
-bool vgpu_fecs_trace_is_enabled(struct gk20a *g)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-
-	return (vcst && vcst->enabled);
-}
-
-int vgpu_fecs_trace_poll(struct gk20a *g)
-{
-	struct tegra_vgpu_cmd_msg msg = {
-		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL,
-		.handle = vgpu_get_handle(g),
-	};
-	int err;
-
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	WARN_ON(err);
-	return err;
-}
-
-int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-
-	*buf = vcst->buf;
-	*size = vgpu_ivm_get_size(vcst->cookie);
-	return 0;
-}
-
-int vgpu_free_user_buffer(struct gk20a *g)
-{
-	return 0;
-}
-
-int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-	unsigned long size = vgpu_ivm_get_size(vcst->cookie);
-	unsigned long vsize = vma->vm_end - vma->vm_start;
-
-	size = min(size, vsize);
-	size = round_up(size, PAGE_SIZE);
-
-	return remap_pfn_range(vma, vma->vm_start,
-			vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT,
-			size,
-			vma->vm_page_prot);
-}
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-int vgpu_fecs_trace_max_entries(struct gk20a *g,
-			struct nvgpu_ctxsw_trace_filter *filter)
-{
-	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
-
-	return vcst->header->num_ents;
-}
-
-#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE
-#error "FECS trace filter size mismatch!"
-#endif
-
-int vgpu_fecs_trace_set_filter(struct gk20a *g,
-			struct nvgpu_ctxsw_trace_filter *filter)
-{
-	struct tegra_vgpu_cmd_msg msg = {
-		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER,
-		.handle = vgpu_get_handle(g),
-	};
-	struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter;
-	int err;
-
-	memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits));
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	err = err ? err : msg.ret;
-	WARN_ON(err);
-	return err;
-}
-
-void vgpu_fecs_trace_data_update(struct gk20a *g)
-{
-	gk20a_ctxsw_trace_wake_up(g, 0);
-}
-#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
deleted file mode 100644
index 054b019b..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "gk20a/gk20a.h"
-#include "common/linux/vgpu/clk_vgpu.h"
-#include "common/linux/platform_gk20a.h"
-#include "common/linux/os_linux.h"
-
-#include <nvgpu/nvhost.h>
-
-#include <linux/platform_device.h>
-
-static int gv11b_vgpu_probe(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct resource *r;
-	void __iomem *regs;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g);
-	struct gk20a *g = platform->g;
-	int ret;
-
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode");
-	if (!r) {
-		nvgpu_err(g, "failed to get usermode regs");
-		return -ENXIO;
-	}
-	regs = devm_ioremap_resource(dev, r);
-	if (IS_ERR(regs)) {
-		nvgpu_err(g, "failed to map usermode regs");
-		return PTR_ERR(regs);
-	}
-	l->usermode_regs = regs;
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	ret = nvgpu_get_nvhost_dev(g);
-	if (ret) {
-		l->usermode_regs = NULL;
-		return ret;
-	}
-
-	ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev,
-							&g->syncpt_unit_base,
-							&g->syncpt_unit_size);
-	if (ret) {
-		nvgpu_err(g, "Failed to get syncpt interface");
-		return -ENOSYS;
-	}
-	g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
-	nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
-		g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size);
-#endif
-	vgpu_init_clk_support(platform->g);
-
-	return 0;
-}
-
-struct gk20a_platform gv11b_vgpu_tegra_platform = {
-	.has_syncpoints = true,
-
-	/* power management configuration */
-	.can_railgate_init	= false,
-	.can_elpg_init          = false,
-	.enable_slcg            = false,
-	.enable_blcg            = false,
-	.enable_elcg            = false,
-	.enable_elpg            = false,
-	.enable_aelpg           = false,
-	.can_slcg               = false,
-	.can_blcg               = false,
-	.can_elcg               = false,
-
-	.ch_wdt_timeout_ms = 5000,
-
-	.probe = gv11b_vgpu_probe,
-
-	.clk_round_rate = vgpu_clk_round_rate,
-	.get_clk_freqs = vgpu_clk_get_freqs,
-
-	/* frequency scaling configuration */
-	.devfreq_governor = "userspace",
-
-	.virtual_dev = true,
-};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
deleted file mode 100644
index 830b04ac..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Tegra Virtualized GPU Platform Interface
- *
- * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "gk20a/gk20a.h"
-#include "common/linux/platform_gk20a.h"
-#include "clk_vgpu.h"
-
-#include <nvgpu/nvhost.h>
-
-static int gk20a_tegra_probe(struct device *dev)
-{
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	int ret;
-
-	ret = nvgpu_get_nvhost_dev(platform->g);
-	if (ret)
-		return ret;
-
-	vgpu_init_clk_support(platform->g);
-	return 0;
-#else
-	return 0;
-#endif
-}
-
-struct gk20a_platform vgpu_tegra_platform = {
-	.has_syncpoints = true,
-	.aggressive_sync_destroy_thresh = 64,
-
-	/* power management configuration */
-	.can_railgate_init	= false,
-	.can_elpg_init          = false,
-	.enable_slcg            = false,
-	.enable_blcg            = false,
-	.enable_elcg            = false,
-	.enable_elpg            = false,
-	.enable_aelpg           = false,
-	.can_slcg               = false,
-	.can_blcg               = false,
-	.can_elcg               = false,
-
-	.ch_wdt_timeout_ms = 5000,
-
-	.probe = gk20a_tegra_probe,
-
-	.clk_round_rate = vgpu_clk_round_rate,
-	.get_clk_freqs = vgpu_clk_get_freqs,
-
-	/* frequency scaling configuration */
-	.devfreq_governor = "userspace",
-
-	.virtual_dev = true,
-};
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
deleted file mode 100644
index 5a8ed9fd..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/device.h>
-#include <nvgpu/vgpu/vgpu.h>
-
-#include "common/linux/platform_gk20a.h"
-
-static ssize_t vgpu_load_show(struct device *dev,
-			      struct device_attribute *attr,
-			      char *buf)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct tegra_vgpu_cmd_msg msg = {0};
-	struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load;
-	int err;
-
-	msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD;
-	msg.handle = vgpu_get_handle(g);
-	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
-	if (err)
-		return err;
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
-}
-static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
-
-void vgpu_create_sysfs(struct device *dev)
-{
-	if (device_create_file(dev, &dev_attr_load))
-		dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
-}
-
-void vgpu_remove_sysfs(struct device *dev)
-{
-	device_remove_file(dev, &dev_attr_load);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c
deleted file mode 100644
index b28b5013..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/types.h>
-#include <linux/tegra_gr_comm.h>
-
-#include "common/linux/os_linux.h"
-
-int vgpu_ivc_init(struct gk20a *g, u32 elems,
-		const size_t *queue_sizes, u32 queue_start, u32 num_queues)
-{
-	struct platform_device *pdev = to_platform_device(dev_from_gk20a(g));
-
-	return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start,
-				num_queues);
-}
-
-void vgpu_ivc_deinit(u32 queue_start, u32 num_queues)
-{
-	tegra_gr_comm_deinit(queue_start, num_queues);
-}
-
-void vgpu_ivc_release(void *handle)
-{
-	tegra_gr_comm_release(handle);
-}
-
-u32 vgpu_ivc_get_server_vmid(void)
-{
-	return tegra_gr_comm_get_server_vmid();
-}
-
-int vgpu_ivc_recv(u32 index, void **handle, void **data,
-				size_t *size, u32 *sender)
-{
-	return tegra_gr_comm_recv(index, handle, data, size, sender);
-}
-
-int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size)
-{
-	return tegra_gr_comm_send(peer, index, data, size);
-}
-
-int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle,
-				void **data, size_t *size)
-{
-	return tegra_gr_comm_sendrecv(peer, index, handle, data, size);
-}
-
-u32 vgpu_ivc_get_peer_self(void)
-{
-	return TEGRA_GR_COMM_ID_SELF;
-}
-
-void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr,
-					size_t *size)
-{
-	return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size);
-}
-
-void vgpu_ivc_oob_put_ptr(void *handle)
-{
-	tegra_gr_comm_oob_put_ptr(handle);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c
deleted file mode 100644
index 90089de8..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <nvgpu/vgpu/vgpu_ivm.h>
-
-#include <linux/tegra-ivc.h>
-
-#include "common/linux/os_linux.h"
-
-struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id)
-{
-	return tegra_hv_mempool_reserve(id);
-}
-
-int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie)
-{
-	return tegra_hv_mempool_unreserve(cookie);
-}
-
-u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie)
-{
-	return cookie->ipa;
-}
-
-u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie)
-{
-	return cookie->size;
-}
-
-void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie)
-{
-	return ioremap_cache(vgpu_ivm_get_ipa(cookie),
-				vgpu_ivm_get_size(cookie));
-}
-
-void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie,
-		void *addr)
-{
-	iounmap(addr);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
deleted file mode 100644
index 0d224eb9..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Virtualized GPU for Linux
- *
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/pm_runtime.h>
-#include <linux/pm_qos.h>
-#include <linux/platform_device.h>
-#include <soc/tegra/chip-id.h>
-
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/debug.h>
-#include <nvgpu/soc.h>
-#include <nvgpu/ctxsw_trace.h>
-#include <nvgpu/defaults.h>
-#include <nvgpu/ltc.h>
-
-#include "vgpu_linux.h"
-#include "vgpu/fecs_trace_vgpu.h"
-#include "clk_vgpu.h"
-#include "gk20a/tsg_gk20a.h"
-#include "gk20a/channel_gk20a.h"
-#include "gk20a/regops_gk20a.h"
-#include "gm20b/hal_gm20b.h"
-
-#include "common/linux/module.h"
-#include "common/linux/os_linux.h"
-#include "common/linux/ioctl.h"
-#include "common/linux/scale.h"
-#include "common/linux/driver_common.h"
-#include "common/linux/platform_gk20a.h"
-
-#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
-
-struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g)
-{
-	struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g));
-
-	return (struct vgpu_priv_data *)plat->vgpu_priv;
-}
-
-static void vgpu_remove_support(struct gk20a *g)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	vgpu_remove_support_common(g);
-
-	/* free mappings to registers, etc*/
-
-	if (l->bar1) {
-		iounmap(l->bar1);
-		l->bar1 = NULL;
-	}
-}
-
-static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
-{
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	nvgpu_mutex_init(&g->poweron_lock);
-	nvgpu_mutex_init(&g->poweroff_lock);
-	nvgpu_mutex_init(&g->ctxsw_disable_lock);
-	l->regs_saved = l->regs;
-	l->bar1_saved = l->bar1;
-
-	g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
-	g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
-	g->has_syncpoints = platform->has_syncpoints;
-	g->ptimer_src_freq = platform->ptimer_src_freq;
-	g->can_railgate = platform->can_railgate_init;
-	g->railgate_delay = platform->railgate_delay_init;
-
-	__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
-			    platform->unify_address_spaces);
-}
-
-static int vgpu_init_support(struct platform_device *pdev)
-{
-	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	struct gk20a *g = get_gk20a(&pdev->dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	void __iomem *regs;
-	int err = 0;
-
-	if (!r) {
-		nvgpu_err(g, "failed to get gk20a bar1");
-		err = -ENXIO;
-		goto fail;
-	}
-
-	if (r->name && !strcmp(r->name, "/vgpu")) {
-		regs = devm_ioremap_resource(&pdev->dev, r);
-		if (IS_ERR(regs)) {
-			nvgpu_err(g, "failed to remap gk20a bar1");
-			err = PTR_ERR(regs);
-			goto fail;
-		}
-		l->bar1 = regs;
-		l->bar1_mem = r;
-	}
-
-	nvgpu_mutex_init(&g->dbg_sessions_lock);
-	nvgpu_mutex_init(&g->client_lock);
-
-	nvgpu_init_list_node(&g->profiler_objects);
-
-	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
-	if (!g->dbg_regops_tmp_buf) {
-		nvgpu_err(g, "couldn't allocate regops tmp buf");
-		return -ENOMEM;
-	}
-	g->dbg_regops_tmp_buf_ops =
-		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
-
-	g->remove_support = vgpu_remove_support;
-	return 0;
-
- fail:
-	vgpu_remove_support(g);
-	return err;
-}
-
-int vgpu_pm_prepare_poweroff(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	int ret = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	if (!g->power_on)
-		return 0;
-
-	ret = gk20a_channel_suspend(g);
-	if (ret)
-		return ret;
-
-	g->power_on = false;
-
-	return ret;
-}
-
-int vgpu_pm_finalize_poweron(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (g->power_on)
-		return 0;
-
-	g->power_on = true;
-
-	vgpu_detect_chip(g);
-	err = vgpu_init_hal(g);
-	if (err)
-		goto done;
-
-	if (g->ops.ltc.init_fs_state)
-		g->ops.ltc.init_fs_state(g);
-
-	err = nvgpu_init_ltc_support(g);
-	if (err) {
-		nvgpu_err(g, "failed to init ltc");
-		goto done;
-	}
-
-	err = vgpu_init_mm_support(g);
-	if (err) {
-		nvgpu_err(g, "failed to init gk20a mm");
-		goto done;
-	}
-
-	err = vgpu_init_fifo_support(g);
-	if (err) {
-		nvgpu_err(g, "failed to init gk20a fifo");
-		goto done;
-	}
-
-	err = vgpu_init_gr_support(g);
-	if (err) {
-		nvgpu_err(g, "failed to init gk20a gr");
-		goto done;
-	}
-
-	err = g->ops.chip_init_gpu_characteristics(g);
-	if (err) {
-		nvgpu_err(g, "failed to init gk20a gpu characteristics");
-		goto done;
-	}
-
-	err = nvgpu_finalize_poweron_linux(l);
-	if (err)
-		goto done;
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-	gk20a_ctxsw_trace_init(g);
-#endif
-	gk20a_sched_ctrl_init(g);
-	gk20a_channel_resume(g);
-
-	g->sw_ready = true;
-
-done:
-	return err;
-}
-
-static int vgpu_qos_notify(struct notifier_block *nb,
-			  unsigned long n, void *data)
-{
-	struct gk20a_scale_profile *profile =
-			container_of(nb, struct gk20a_scale_profile,
-			qos_notify_block);
-	struct gk20a *g = get_gk20a(profile->dev);
-	u32 max_freq;
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
-	err = vgpu_clk_cap_rate(profile->dev, max_freq);
-	if (err)
-		nvgpu_err(g, "%s failed, err=%d", __func__, err);
-
-	return NOTIFY_OK; /* need notify call further */
-}
-
-static int vgpu_pm_qos_init(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct gk20a_scale_profile *profile = g->scale_profile;
-
-	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
-		if (!profile)
-			return -EINVAL;
-	} else {
-		profile = nvgpu_kzalloc(g, sizeof(*profile));
-		if (!profile)
-			return -ENOMEM;
-		g->scale_profile = profile;
-	}
-
-	profile->dev = dev;
-	profile->qos_notify_block.notifier_call = vgpu_qos_notify;
-	pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-				&profile->qos_notify_block);
-	return 0;
-}
-
-static void vgpu_pm_qos_remove(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-
-	pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
-				&g->scale_profile->qos_notify_block);
-	nvgpu_kfree(g, g->scale_profile);
-	g->scale_profile = NULL;
-}
-
-static int vgpu_pm_init(struct device *dev)
-{
-	struct gk20a *g = get_gk20a(dev);
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-	unsigned long *freqs;
-	int num_freqs;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	if (nvgpu_platform_is_simulation(g))
-		return 0;
-
-	__pm_runtime_disable(dev, false);
-
-	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
-		gk20a_scale_init(dev);
-
-	if (l->devfreq) {
-		/* set min/max frequency based on frequency table */
-		err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs);
-		if (err)
-			return err;
-
-		if (num_freqs < 1)
-			return -EINVAL;
-
-		l->devfreq->min_freq = freqs[0];
-		l->devfreq->max_freq = freqs[num_freqs - 1];
-	}
-
-	err = vgpu_pm_qos_init(dev);
-	if (err)
-		return err;
-
-	return err;
-}
-
-int vgpu_probe(struct platform_device *pdev)
-{
-	struct nvgpu_os_linux *l;
-	struct gk20a *gk20a;
-	int err;
-	struct device *dev = &pdev->dev;
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct vgpu_priv_data *priv;
-
-	if (!platform) {
-		dev_err(dev, "no platform data\n");
-		return -ENODATA;
-	}
-
-	l = kzalloc(sizeof(*l), GFP_KERNEL);
-	if (!l) {
-		dev_err(dev, "couldn't allocate gk20a support");
-		return -ENOMEM;
-	}
-	gk20a = &l->g;
-
-	nvgpu_log_fn(gk20a, " ");
-
-	nvgpu_init_gk20a(gk20a);
-
-	nvgpu_kmem_init(gk20a);
-
-	err = nvgpu_init_enabled_flags(gk20a);
-	if (err) {
-		kfree(gk20a);
-		return err;
-	}
-
-	l->dev = dev;
-	if (tegra_platform_is_vdk())
-		__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
-
-	gk20a->is_virtual = true;
-
-	priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
-	if (!priv) {
-		kfree(gk20a);
-		return -ENOMEM;
-	}
-
-	platform->g = gk20a;
-	platform->vgpu_priv = priv;
-
-	err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
-	if (err)
-		return err;
-
-	vgpu_init_support(pdev);
-
-	vgpu_init_vars(gk20a, platform);
-
-	init_rwsem(&l->busy_lock);
-
-	nvgpu_spinlock_init(&gk20a->mc_enable_lock);
-
-	gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
-
-	/* Initialize the platform interface. */
-	err = platform->probe(dev);
-	if (err) {
-		if (err == -EPROBE_DEFER)
-			nvgpu_info(gk20a, "platform probe failed");
-		else
-			nvgpu_err(gk20a, "platform probe failed");
-		return err;
-	}
-
-	if (platform->late_probe) {
-		err = platform->late_probe(dev);
-		if (err) {
-			nvgpu_err(gk20a, "late probe failed");
-			return err;
-		}
-	}
-
-	err = vgpu_comm_init(gk20a);
-	if (err) {
-		nvgpu_err(gk20a, "failed to init comm interface");
-		return -ENOSYS;
-	}
-
-	priv->virt_handle = vgpu_connect();
-	if (!priv->virt_handle) {
-		nvgpu_err(gk20a, "failed to connect to server node");
-		vgpu_comm_deinit();
-		return -ENOSYS;
-	}
-
-	err = vgpu_get_constants(gk20a);
-	if (err) {
-		vgpu_comm_deinit();
-		return err;
-	}
-
-	err = vgpu_pm_init(dev);
-	if (err) {
-		nvgpu_err(gk20a, "pm init failed");
-		return err;
-	}
-
-	err = nvgpu_thread_create(&priv->intr_handler, gk20a,
-			vgpu_intr_thread, "gk20a");
-	if (err)
-		return err;
-
-	gk20a_debug_init(gk20a, "gpu.0");
-
-	/* Set DMA parameters to allow larger sgt lists */
-	dev->dma_parms = &l->dma_parms;
-	dma_set_max_seg_size(dev, UINT_MAX);
-
-	gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
-	gk20a->timeouts_disabled_by_user = false;
-	nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0);
-
-	vgpu_create_sysfs(dev);
-	gk20a_init_gr(gk20a);
-
-	nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages);
-	gk20a->gr.max_comptag_mem = totalram_pages
-				 >> (10 - (PAGE_SHIFT - 10));
-
-	nvgpu_ref_init(&gk20a->refcount);
-
-	return 0;
-}
-
-int vgpu_remove(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct gk20a *g = get_gk20a(dev);
-
-	nvgpu_log_fn(g, " ");
-
-	vgpu_pm_qos_remove(dev);
-	if (g->remove_support)
-		g->remove_support(g);
-
-	vgpu_comm_deinit();
-	gk20a_sched_ctrl_cleanup(g);
-	gk20a_user_deinit(dev, &nvgpu_class);
-	vgpu_remove_sysfs(dev);
-	gk20a_get_platform(dev)->g = NULL;
-	gk20a_put(g);
-
-	return 0;
-}
-
-bool vgpu_is_reduced_bar1(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-
-	return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size;
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h
deleted file mode 100644
index 38379cf2..00000000
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Virtualized GPU Linux Interfaces
- *
- * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __VGPU_LINUX_H__
-#define __VGPU_LINUX_H__
-
-struct device;
-struct platform_device;
-
-#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
-
-#include <nvgpu/vgpu/vgpu.h>
-
-int vgpu_pm_prepare_poweroff(struct device *dev);
-int vgpu_pm_finalize_poweron(struct device *dev);
-int vgpu_probe(struct platform_device *dev);
-int vgpu_remove(struct platform_device *dev);
-
-void vgpu_create_sysfs(struct device *dev);
-void vgpu_remove_sysfs(struct device *dev);
-#else
-/* define placeholders for functions used outside of vgpu */
-
-static inline int vgpu_pm_prepare_poweroff(struct device *dev)
-{
-	return -ENOSYS;
-}
-static inline int vgpu_pm_finalize_poweron(struct device *dev)
-{
-	return -ENOSYS;
-}
-static inline int vgpu_probe(struct platform_device *dev)
-{
-	return -ENOSYS;
-}
-static inline int vgpu_remove(struct platform_device *dev)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c
deleted file mode 100644
index 136d4a10..00000000
--- a/drivers/gpu/nvgpu/common/linux/vidmem.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/dma-buf.h>
-#include <uapi/linux/nvgpu.h>
-
-#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
-#include <linux/platform/tegra/tegra_fd.h>
-#endif
-
-#include <nvgpu/dma.h>
-#include <nvgpu/enabled.h>
-#include <nvgpu/vidmem.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/page_allocator.h>
-
-#include <nvgpu/linux/vm.h>
-#include <nvgpu/linux/dma.h>
-#include <nvgpu/linux/vidmem.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/mm_gk20a.h"
-
-bool nvgpu_addr_is_vidmem_page_alloc(u64 addr)
-{
-	return !!(addr & 1ULL);
-}
-
-void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr)
-{
-	/* set bit 0 to indicate vidmem allocation */
-	sg_dma_address(sgl) = (addr | 1ULL);
-}
-
-struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl)
-{
-	u64 addr;
-
-	addr = sg_dma_address(sgl);
-
-	if (nvgpu_addr_is_vidmem_page_alloc(addr))
-		addr = addr & ~1ULL;
-	else
-		WARN_ON(1);
-
-	return (struct nvgpu_page_alloc *)(uintptr_t)addr;
-}
-
-static struct sg_table *gk20a_vidbuf_map_dma_buf(
-	struct dma_buf_attachment *attach, enum dma_data_direction dir)
-{
-	struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv;
-
-	return buf->mem->priv.sgt;
-}
-
-static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
-				       struct sg_table *sgt,
-				       enum dma_data_direction dir)
-{
-}
-
-static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
-{
-	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
-	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
-	struct gk20a *g = buf->g;
-
-	vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
-		   dmabuf, buf->mem->size >> 10);
-
-	if (linux_buf && linux_buf->dmabuf_priv_delete)
-		linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv);
-
-	nvgpu_kfree(g, linux_buf);
-	nvgpu_vidmem_buf_free(g, buf);
-
-	gk20a_put(g);
-}
-
-static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
-{
-	WARN_ON("Not supported");
-	return NULL;
-}
-
-static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
-				      unsigned long page_num)
-{
-	WARN_ON("Not supported");
-	return NULL;
-}
-
-static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
-{
-	return -EINVAL;
-}
-
-static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
-		struct device *dev, void *priv, void (*delete)(void *priv))
-{
-	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
-	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
-
-	linux_buf->dmabuf_priv = priv;
-	linux_buf->dmabuf_priv_delete = delete;
-
-	return 0;
-}
-
-static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
-		struct device *dev)
-{
-	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
-	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
-
-	return linux_buf->dmabuf_priv;
-}
-
-static const struct dma_buf_ops gk20a_vidbuf_ops = {
-	.map_dma_buf      = gk20a_vidbuf_map_dma_buf,
-	.unmap_dma_buf    = gk20a_vidbuf_unmap_dma_buf,
-	.release          = gk20a_vidbuf_release,
-	.kmap_atomic      = gk20a_vidbuf_kmap_atomic,
-	.kmap             = gk20a_vidbuf_kmap,
-	.mmap             = gk20a_vidbuf_mmap,
-	.set_drvdata      = gk20a_vidbuf_set_private,
-	.get_drvdata      = gk20a_vidbuf_get_private,
-};
-
-static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf)
-{
-	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-
-	exp_info.priv = buf;
-	exp_info.ops = &gk20a_vidbuf_ops;
-	exp_info.size = buf->mem->size;
-	exp_info.flags = O_RDWR;
-
-	return dma_buf_export(&exp_info);
-}
-
-struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
-{
-	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
-
-	if (dmabuf->ops != &gk20a_vidbuf_ops)
-		return NULL;
-
-	return buf->g;
-}
-
-int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
-{
-	struct nvgpu_vidmem_buf *buf = NULL;
-	struct nvgpu_vidmem_linux *priv;
-	int err, fd;
-
-	/*
-	 * This ref is released when the dma_buf is closed.
-	 */
-	if (!gk20a_get(g))
-		return -ENODEV;
-
-	vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes);
-
-	priv = nvgpu_kzalloc(g, sizeof(*priv));
-	if (!priv) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	buf = nvgpu_vidmem_user_alloc(g, bytes);
-	if (IS_ERR(buf)) {
-		err = PTR_ERR(buf);
-		goto fail;
-	}
-
-	priv->dmabuf = gk20a_vidbuf_export(buf);
-	if (IS_ERR(priv->dmabuf)) {
-		err = PTR_ERR(priv->dmabuf);
-		goto fail;
-	}
-
-	buf->priv = priv;
-
-#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
-	fd = tegra_alloc_fd(current->files, 1024, O_RDWR);
-#else
-	fd = get_unused_fd_flags(O_RDWR);
-#endif
-	if (fd < 0) {
-		/* ->release frees what we have done */
-		dma_buf_put(priv->dmabuf);
-		return fd;
-	}
-
-	/* fclose() on this drops one ref, freeing the dma buf */
-	fd_install(fd, priv->dmabuf->file);
-
-	vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
-		   priv->dmabuf, buf->mem->size >> 10);
-
-	return fd;
-
-fail:
-	nvgpu_vidmem_buf_free(g, buf);
-	nvgpu_kfree(g, priv);
-	gk20a_put(g);
-
-	vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err);
-	return err;
-}
-
-int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
-		void *buffer, u64 offset, u64 size, u32 cmd)
-{
-	struct nvgpu_vidmem_buf *vidmem_buf;
-	struct nvgpu_mem *mem;
-	int err = 0;
-
-	if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM)
-		return -EINVAL;
-
-	vidmem_buf = dmabuf->priv;
-	mem = vidmem_buf->mem;
-
-	switch (cmd) {
-	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
-		nvgpu_mem_rd_n(g, mem, offset, buffer, size);
-		break;
-
-	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE:
-		nvgpu_mem_wr_n(g, mem, offset, buffer, size);
-		break;
-
-	default:
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
-{
-	nvgpu_free(vidmem->allocator,
-		   (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
-	nvgpu_free_sgtable(g, &vidmem->priv.sgt);
-}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
deleted file mode 100644
index baa77515..00000000
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/dma-buf.h>
-#include <linux/scatterlist.h>
-#include <uapi/linux/nvgpu.h>
-
-#include <nvgpu/log.h>
-#include <nvgpu/lock.h>
-#include <nvgpu/rbtree.h>
-#include <nvgpu/vm_area.h>
-#include <nvgpu/nvgpu_mem.h>
-#include <nvgpu/page_allocator.h>
-#include <nvgpu/vidmem.h>
-
-#include <nvgpu/linux/vm.h>
-#include <nvgpu/linux/vidmem.h>
-#include <nvgpu/linux/nvgpu_mem.h>
-
-#include "gk20a/gk20a.h"
-#include "gk20a/mm_gk20a.h"
-
-#include "platform_gk20a.h"
-#include "os_linux.h"
-#include "dmabuf.h"
-
-static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
-{
-	u32 core_flags = 0;
-
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
-		core_flags |= NVGPU_VM_MAP_FIXED_OFFSET;
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE)
-		core_flags |= NVGPU_VM_MAP_CACHEABLE;
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT)
-		core_flags |= NVGPU_VM_MAP_IO_COHERENT;
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE)
-		core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE;
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC)
-		core_flags |= NVGPU_VM_MAP_L3_ALLOC;
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
-		core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
-
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
-		nvgpu_warn(g, "Ignoring deprecated flag: "
-			   "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS");
-
-	return core_flags;
-}
-
-static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
-	struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
-{
-	struct nvgpu_rbtree_node *node = NULL;
-	struct nvgpu_rbtree_node *root = vm->mapped_buffers;
-
-	nvgpu_rbtree_enum_start(0, &node, root);
-
-	while (node) {
-		struct nvgpu_mapped_buf *mapped_buffer =
-				mapped_buffer_from_rbtree_node(node);
-
-		if (mapped_buffer->os_priv.dmabuf == dmabuf &&
-		    mapped_buffer->kind == kind)
-			return mapped_buffer;
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	return NULL;
-}
-
-int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
-		      struct dma_buf **dmabuf,
-		      u64 *offset)
-{
-	struct nvgpu_mapped_buf *mapped_buffer;
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va);
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-	mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
-	if (!mapped_buffer) {
-		nvgpu_mutex_release(&vm->update_gmmu_lock);
-		return -EINVAL;
-	}
-
-	*dmabuf = mapped_buffer->os_priv.dmabuf;
-	*offset = gpu_va - mapped_buffer->addr;
-
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-
-	return 0;
-}
-
-u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
-{
-	return os_buf->dmabuf->size;
-}
-
-/*
- * vm->update_gmmu_lock must be held. This checks to see if we already have
- * mapped the passed buffer into this VM. If so, just return the existing
- * mapping address.
- */
-struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
-					       struct nvgpu_os_buffer *os_buf,
-					       u64 map_addr,
-					       u32 flags,
-					       int kind)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	struct nvgpu_mapped_buf *mapped_buffer = NULL;
-
-	if (flags & NVGPU_VM_MAP_FIXED_OFFSET) {
-		mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
-		if (!mapped_buffer)
-			return NULL;
-
-		if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
-		    mapped_buffer->kind != (u32)kind)
-			return NULL;
-	} else {
-		mapped_buffer =
-			__nvgpu_vm_find_mapped_buf_reverse(vm,
-							   os_buf->dmabuf,
-							   kind);
-		if (!mapped_buffer)
-			return NULL;
-	}
-
-	if (mapped_buffer->flags != flags)
-		return NULL;
-
-	/*
-	 * If we find the mapping here then that means we have mapped it already
-	 * and the prior pin and get must be undone.
-	 */
-	gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
-		       mapped_buffer->os_priv.sgt);
-	dma_buf_put(os_buf->dmabuf);
-
-	nvgpu_log(g, gpu_dbg_map,
-		  "gv: 0x%04x_%08x + 0x%-7zu "
-		  "[dma: 0x%010llx, pa: 0x%010llx] "
-		  "pgsz=%-3dKb as=%-2d "
-		  "flags=0x%x apt=%s (reused)",
-		  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
-		  os_buf->dmabuf->size,
-		  (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
-		  (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
-		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
-		  vm_aspace_id(vm),
-		  mapped_buffer->flags,
-		  nvgpu_aperture_str(g,
-				     gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
-
-	return mapped_buffer;
-}
-
-int nvgpu_vm_map_linux(struct vm_gk20a *vm,
-		       struct dma_buf *dmabuf,
-		       u64 offset_align,
-		       u32 flags,
-		       s16 compr_kind,
-		       s16 incompr_kind,
-		       int rw_flag,
-		       u64 buffer_offset,
-		       u64 mapping_size,
-		       struct vm_gk20a_mapping_batch *batch,
-		       u64 *gpu_va)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	struct device *dev = dev_from_gk20a(g);
-	struct nvgpu_os_buffer os_buf;
-	struct sg_table *sgt;
-	struct nvgpu_sgt *nvgpu_sgt = NULL;
-	struct nvgpu_mapped_buf *mapped_buffer = NULL;
-	struct dma_buf_attachment *attachment;
-	u64 map_addr = 0ULL;
-	int err = 0;
-
-	if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
-		map_addr = offset_align;
-
-	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
-	if (IS_ERR(sgt)) {
-		nvgpu_warn(g, "Failed to pin dma_buf!");
-		return PTR_ERR(sgt);
-	}
-	os_buf.dmabuf = dmabuf;
-	os_buf.attachment = attachment;
-	os_buf.dev = dev;
-
-	if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
-		err = -EINVAL;
-		goto clean_up;
-	}
-
-	nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
-	if (!nvgpu_sgt) {
-		err = -ENOMEM;
-		goto clean_up;
-	}
-
-	mapped_buffer = nvgpu_vm_map(vm,
-				     &os_buf,
-				     nvgpu_sgt,
-				     map_addr,
-				     mapping_size,
-				     buffer_offset,
-				     rw_flag,
-				     flags,
-				     compr_kind,
-				     incompr_kind,
-				     batch,
-				     gk20a_dmabuf_aperture(g, dmabuf));
-
-	nvgpu_sgt_free(g, nvgpu_sgt);
-
-	if (IS_ERR(mapped_buffer)) {
-		err = PTR_ERR(mapped_buffer);
-		goto clean_up;
-	}
-
-	mapped_buffer->os_priv.dmabuf = dmabuf;
-	mapped_buffer->os_priv.attachment = attachment;
-	mapped_buffer->os_priv.sgt    = sgt;
-
-	*gpu_va = mapped_buffer->addr;
-	return 0;
-
-clean_up:
-	gk20a_mm_unpin(dev, dmabuf, attachment, sgt);
-
-	return err;
-}
-
-int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
-			int dmabuf_fd,
-			u64 *offset_align,
-			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
-			s16 compr_kind,
-			s16 incompr_kind,
-			u64 buffer_offset,
-			u64 mapping_size,
-			struct vm_gk20a_mapping_batch *batch)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	struct dma_buf *dmabuf;
-	u64 ret_va;
-	int err = 0;
-
-	/* get ref to the mem handle (released on unmap_locked) */
-	dmabuf = dma_buf_get(dmabuf_fd);
-	if (IS_ERR(dmabuf)) {
-		nvgpu_warn(g, "%s: fd %d is not a dmabuf",
-			   __func__, dmabuf_fd);
-		return PTR_ERR(dmabuf);
-	}
-
-	/* verify that we're not overflowing the buffer, i.e.
-	 * (buffer_offset + mapping_size)> dmabuf->size.
-	 *
-	 * Since buffer_offset + mapping_size could overflow, first check
-	 * that mapping size < dmabuf_size, at which point we can subtract
-	 * mapping_size from both sides for the final comparison.
-	 */
-	if ((mapping_size > dmabuf->size) ||
-			(buffer_offset > (dmabuf->size - mapping_size))) {
-		nvgpu_err(g,
-			  "buf size %llx < (offset(%llx) + map_size(%llx))\n",
-			  (u64)dmabuf->size, buffer_offset, mapping_size);
-		dma_buf_put(dmabuf);
-		return -EINVAL;
-	}
-
-	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
-	if (err) {
-		dma_buf_put(dmabuf);
-		return err;
-	}
-
-	err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
-				 nvgpu_vm_translate_linux_flags(g, flags),
-				 compr_kind, incompr_kind,
-				 gk20a_mem_flag_none,
-				 buffer_offset,
-				 mapping_size,
-				 batch,
-				 &ret_va);
-
-	if (!err)
-		*offset_align = ret_va;
-	else
-		dma_buf_put(dmabuf);
-
-	return err;
-}
-
-/*
- * This is the function call-back for freeing OS specific components of an
- * nvgpu_mapped_buf. This should most likely never be called outside of the
- * core MM framework!
- *
- * Note: the VM lock will be held.
- */
-void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
-{
-	struct vm_gk20a *vm = mapped_buffer->vm;
-
-	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
-		       mapped_buffer->os_priv.attachment,
-		       mapped_buffer->os_priv.sgt);
-
-	dma_buf_put(mapped_buffer->os_priv.dmabuf);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 117920da..7e0aee11 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -41,7 +41,7 @@
 #include "fecs_trace_gk20a.h"
 #include "gk20a.h"
 #include "gr_gk20a.h"
-#include "common/linux/os_linux.h"
+#include "os/linux/os_linux.h"
 
 #include <nvgpu/log.h>
 
diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c
index 057527a9..07e281a5 100644
--- a/drivers/gpu/nvgpu/gp106/clk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c
@@ -24,7 +24,7 @@
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
-#include "common/linux/os_linux.h"
+#include "os/linux/os_linux.h"
 #endif
 
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gp106/mclk_gp106.c b/drivers/gpu/nvgpu/gp106/mclk_gp106.c
index 074aec92..4ea9e6f6 100644
--- a/drivers/gpu/nvgpu/gp106/mclk_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/mclk_gp106.c
@@ -28,7 +28,7 @@
 #include "gk20a/gk20a.h"
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
-#include "common/linux/os_linux.h"
+#include "os/linux/os_linux.h"
 #endif
 #include "gp106/mclk_gp106.h"
 
diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c
index b3862abe..b1f6fefb 100644
--- a/drivers/gpu/nvgpu/gp106/therm_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c
@@ -27,7 +27,7 @@
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
-#include "common/linux/os_linux.h"
+#include "os/linux/os_linux.h"
 #endif
 
 #include <nvgpu/hw/gp106/hw_therm_gp106.h>
diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b.h b/drivers/gpu/nvgpu/gp10b/platform_gp10b.h
deleted file mode 100644
index d256d126..00000000
--- a/drivers/gpu/nvgpu/gp10b/platform_gp10b.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * GP10B Platform (SoC) Interface
- *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _GP10B_PLATFORM_H_
-#define _GP10B_PLATFORM_H_
-
-struct device;
-
-int gp10b_tegra_get_clocks(struct device *dev);
-int gp10b_tegra_reset_assert(struct device *dev);
-int gp10b_tegra_reset_deassert(struct device *dev);
-void gp10b_tegra_scale_init(struct device *dev);
-long gp10b_round_clk_rate(struct device *dev, unsigned long rate);
-int gp10b_clk_get_freqs(struct device *dev,
-			unsigned long **freqs, int *num_freqs);
-void gp10b_tegra_prescale(struct device *dev);
-void gp10b_tegra_postscale(struct device *pdev, unsigned long freq);
-#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
index ba6012ec..7d50d222 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
@@ -25,6 +25,8 @@
 
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 
+#include <nvgpu/types.h>
+
 struct nvgpu_nvhost_dev;
 struct gk20a;
 struct sync_pt;
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
new file mode 100644
index 00000000..32b333f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -0,0 +1,1786 @@
+/*
+ * Color decompression engine support
+ *
+ * Copyright (c) 2014-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/dma-buf.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <trace/events/gk20a.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/os_sched.h>
+
+#include <nvgpu/linux/vm.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "gk20a/fence_gk20a.h"
+#include "gk20a/gr_gk20a.h"
+
+#include "cde.h"
+#include "os_linux.h"
+#include "dmabuf.h"
+#include "channel.h"
+#include "cde_gm20b.h"
+#include "cde_gp10b.h"
+
+#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+
+static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
+static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
+
+#define CTX_DELETE_TIME 1000
+
+#define MAX_CTX_USE_COUNT 42
+#define MAX_CTX_RETRY_TIME 2000
+
+static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
+{
+	struct nvgpu_mapped_buf *buffer;
+	dma_addr_t addr = 0;
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+	buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
+	if (buffer)
+		addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	return addr;
+}
+
+static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
+{
+	unsigned int i;
+
+	for (i = 0; i < cde_ctx->num_bufs; i++) {
+		struct nvgpu_mem *mem = cde_ctx->mem + i;
+		nvgpu_dma_unmap_free(cde_ctx->vm, mem);
+	}
+
+	nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
+
+	cde_ctx->convert_cmd = NULL;
+	cde_ctx->init_convert_cmd = NULL;
+	cde_ctx->num_bufs = 0;
+	cde_ctx->num_params = 0;
+	cde_ctx->init_cmd_num_entries = 0;
+	cde_ctx->convert_cmd_num_entries = 0;
+	cde_ctx->init_cmd_executed = false;
+}
+
+static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
+__must_hold(&cde_app->mutex)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct channel_gk20a *ch = cde_ctx->ch;
+	struct vm_gk20a *vm = ch->vm;
+
+	trace_gk20a_cde_remove_ctx(cde_ctx);
+
+	/* release mapped memory */
+	gk20a_deinit_cde_img(cde_ctx);
+	nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
+			 cde_ctx->backing_store_vaddr);
+
+	/*
+	 * free the channel
+	 * gk20a_channel_close() will also unbind the channel from TSG
+	 */
+	gk20a_channel_close(ch);
+	nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
+
+	/* housekeeping on app */
+	nvgpu_list_del(&cde_ctx->list);
+	l->cde_app.ctx_count--;
+	nvgpu_kfree(g, cde_ctx);
+}
+
+static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
+		bool wait_finish)
+__releases(&cde_app->mutex)
+__acquires(&cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
+
+	/* permanent contexts do not have deleter works */
+	if (!cde_ctx->is_temporary)
+		return;
+
+	if (wait_finish) {
+		nvgpu_mutex_release(&cde_app->mutex);
+		cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
+		nvgpu_mutex_acquire(&cde_app->mutex);
+	} else {
+		cancel_delayed_work(&cde_ctx->ctx_deleter_work);
+	}
+}
+
+static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
+__must_hold(&l->cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
+
+	/* safe to go off the mutex in cancel_deleter since app is
+	 * deinitialised; no new jobs are started. deleter works may be only at
+	 * waiting for the mutex or before, going to abort */
+
+	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
+			&cde_app->free_contexts, gk20a_cde_ctx, list) {
+		gk20a_cde_cancel_deleter(cde_ctx, true);
+		gk20a_cde_remove_ctx(cde_ctx);
+	}
+
+	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
+			&cde_app->used_contexts, gk20a_cde_ctx, list) {
+		gk20a_cde_cancel_deleter(cde_ctx, true);
+		gk20a_cde_remove_ctx(cde_ctx);
+	}
+}
+
+static void gk20a_cde_stop(struct nvgpu_os_linux *l)
+__must_hold(&l->cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+
+	/* prevent further conversions and delayed works from working */
+	cde_app->initialised = false;
+	/* free all data, empty the list */
+	gk20a_cde_remove_contexts(l);
+}
+
+void gk20a_cde_destroy(struct nvgpu_os_linux *l)
+__acquires(&l->cde_app->mutex)
+__releases(&l->cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+
+	if (!cde_app->initialised)
+		return;
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+	gk20a_cde_stop(l);
+	nvgpu_mutex_release(&cde_app->mutex);
+
+	nvgpu_mutex_destroy(&cde_app->mutex);
+}
+
+void gk20a_cde_suspend(struct nvgpu_os_linux *l)
+__acquires(&l->cde_app->mutex)
+__releases(&l->cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
+
+	if (!cde_app->initialised)
+		return;
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+
+	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
+			&cde_app->free_contexts, gk20a_cde_ctx, list) {
+		gk20a_cde_cancel_deleter(cde_ctx, false);
+	}
+
+	nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
+			&cde_app->used_contexts, gk20a_cde_ctx, list) {
+		gk20a_cde_cancel_deleter(cde_ctx, false);
+	}
+
+	nvgpu_mutex_release(&cde_app->mutex);
+
+}
+
+static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
+__must_hold(&l->cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a_cde_ctx *cde_ctx;
+
+	cde_ctx = gk20a_cde_allocate_context(l);
+	if (IS_ERR(cde_ctx))
+		return PTR_ERR(cde_ctx);
+
+	nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
+	cde_app->ctx_count++;
+	if (cde_app->ctx_count > cde_app->ctx_count_top)
+		cde_app->ctx_count_top = cde_app->ctx_count;
+
+	return 0;
+}
+
+static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
+__must_hold(&l->cde_app->mutex)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
+		err = gk20a_cde_create_context(l);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	gk20a_cde_remove_contexts(l);
+	return err;
+}
+
+static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
+			      struct nvgpu_firmware *img,
+			      struct gk20a_cde_hdr_buf *buf)
+{
+	struct nvgpu_mem *mem;
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	int err;
+
+	/* check that the file can hold the buf */
+	if (buf->data_byte_offset != 0 &&
+	    buf->data_byte_offset + buf->num_bytes > img->size) {
+		nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
+			   cde_ctx->num_bufs);
+		return -EINVAL;
+	}
+
+	/* check that we have enough buf elems available */
+	if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
+		nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
+			   cde_ctx->num_bufs);
+		return -ENOMEM;
+	}
+
+	/* allocate buf */
+	mem = cde_ctx->mem + cde_ctx->num_bufs;
+	err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
+	if (err) {
+		nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
+			   cde_ctx->num_bufs);
+		return -ENOMEM;
+	}
+
+	/* copy the content */
+	if (buf->data_byte_offset != 0)
+		memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
+		       buf->num_bytes);
+
+	cde_ctx->num_bufs++;
+
+	return 0;
+}
+
+static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
+			      int type, s32 shift, u64 mask, u64 value)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	u32 *target_mem_ptr = target;
+	u64 *target_mem_ptr_u64 = target;
+	u64 current_value, new_value;
+
+	value = (shift >= 0) ? value << shift : value >> -shift;
+	value &= mask;
+
+	/* read current data from the location */
+	current_value = 0;
+	if (type == TYPE_PARAM_TYPE_U32) {
+		if (mask != 0xfffffffful)
+			current_value = *target_mem_ptr;
+	} else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
+		if (mask != ~0ul)
+			current_value = *target_mem_ptr_u64;
+	} else if (type == TYPE_PARAM_TYPE_U64_BIG) {
+		current_value = *target_mem_ptr_u64;
+		current_value = (u64)(current_value >> 32) |
+			(u64)(current_value << 32);
+	} else {
+		nvgpu_warn(g, "cde: unknown type. type=%d",
+			   type);
+		return -EINVAL;
+	}
+
+	current_value &= ~mask;
+	new_value = current_value | value;
+
+	/* store the element data back */
+	if (type == TYPE_PARAM_TYPE_U32)
+		*target_mem_ptr = (u32)new_value;
+	else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
+		*target_mem_ptr_u64 = new_value;
+	else  {
+		new_value = (u64)(new_value >> 32) |
+			(u64)(new_value << 32);
+		*target_mem_ptr_u64 = new_value;
+	}
+
+	return 0;
+}
+
+static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
+				  struct nvgpu_firmware *img,
+				  struct gk20a_cde_hdr_replace *replace)
+{
+	struct nvgpu_mem *source_mem;
+	struct nvgpu_mem *target_mem;
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	u32 *target_mem_ptr;
+	u64 vaddr;
+	int err;
+
+	if (replace->target_buf >= cde_ctx->num_bufs ||
+	    replace->source_buf >= cde_ctx->num_bufs) {
+		nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
+			   replace->target_buf, replace->source_buf,
+			   cde_ctx->num_bufs);
+		return -EINVAL;
+	}
+
+	source_mem = cde_ctx->mem + replace->source_buf;
+	target_mem = cde_ctx->mem + replace->target_buf;
+	target_mem_ptr = target_mem->cpu_va;
+
+	if (source_mem->size < (replace->source_byte_offset + 3) ||
+	    target_mem->size < (replace->target_byte_offset + 3)) {
+		nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
+			   replace->target_byte_offset,
+			   replace->source_byte_offset,
+			 source_mem->size,
+			 target_mem->size);
+		return -EINVAL;
+	}
+
+	/* calculate the target pointer */
+	target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
+
+	/* determine patch value */
+	vaddr = source_mem->gpu_va + replace->source_byte_offset;
+	err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
+				 replace->shift, replace->mask,
+				 vaddr);
+	if (err) {
+		nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
+			   err, replace->target_buf,
+			   replace->target_byte_offset,
+			   replace->source_buf,
+			   replace->source_byte_offset);
+	}
+
+	return err;
+}
+
+static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct nvgpu_mem *target_mem;
+	u32 *target_mem_ptr;
+	u64 new_data;
+	int user_id = 0, err;
+	unsigned int i;
+
+	for (i = 0; i < cde_ctx->num_params; i++) {
+		struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
+		target_mem = cde_ctx->mem + param->target_buf;
+		target_mem_ptr = target_mem->cpu_va;
+		target_mem_ptr += (param->target_byte_offset / sizeof(u32));
+
+		switch (param->id) {
+		case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
+			new_data = g->gr.comptags_per_cacheline;
+			break;
+		case TYPE_PARAM_GPU_CONFIGURATION:
+			new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
+				g->gr.cacheline_size;
+			break;
+		case TYPE_PARAM_FIRSTPAGEOFFSET:
+			new_data = cde_ctx->surf_param_offset;
+			break;
+		case TYPE_PARAM_NUMPAGES:
+			new_data = cde_ctx->surf_param_lines;
+			break;
+		case TYPE_PARAM_BACKINGSTORE:
+			new_data = cde_ctx->backing_store_vaddr;
+			break;
+		case TYPE_PARAM_DESTINATION:
+			new_data = cde_ctx->compbit_vaddr;
+			break;
+		case TYPE_PARAM_DESTINATION_SIZE:
+			new_data = cde_ctx->compbit_size;
+			break;
+		case TYPE_PARAM_BACKINGSTORE_SIZE:
+			new_data = g->gr.compbit_store.mem.size;
+			break;
+		case TYPE_PARAM_SOURCE_SMMU_ADDR:
+			new_data = gpuva_to_iova_base(cde_ctx->vm,
+						      cde_ctx->surf_vaddr);
+			if (new_data == 0) {
+				nvgpu_warn(g, "cde: failed to find 0x%llx",
+						cde_ctx->surf_vaddr);
+				return -EINVAL;
+			}
+			break;
+		case TYPE_PARAM_BACKINGSTORE_BASE_HW:
+			new_data = g->gr.compbit_store.base_hw;
+			break;
+		case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
+			new_data = g->gr.gobs_per_comptagline_per_slice;
+			break;
+		case TYPE_PARAM_SCATTERBUFFER:
+			new_data = cde_ctx->scatterbuffer_vaddr;
+			break;
+		case TYPE_PARAM_SCATTERBUFFER_SIZE:
+			new_data = cde_ctx->scatterbuffer_size;
+			break;
+		default:
+			user_id = param->id - NUM_RESERVED_PARAMS;
+			if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
+				continue;
+			new_data = cde_ctx->user_param_values[user_id];
+		}
+
+		nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d	param_id=%d	target_buf=%u	target_byte_offset=%lld	data_value=0x%llx	data_offset/data_diff=%lld	data_type=%d	data_shift=%d	data_mask=0x%llx",
+			  i, param->id, param->target_buf,
+			  param->target_byte_offset, new_data,
+			  param->data_offset, param->type, param->shift,
+			  param->mask);
+
+		new_data += param->data_offset;
+
+		err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
+					 param->shift, param->mask, new_data);
+
+		if (err) {
+			nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
+				   err, i, param->id, param->target_buf,
+				   param->target_byte_offset, new_data);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
+				struct nvgpu_firmware *img,
+				struct gk20a_cde_hdr_param *param)
+{
+	struct nvgpu_mem *target_mem;
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+
+	if (param->target_buf >= cde_ctx->num_bufs) {
+		nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
+			   cde_ctx->num_params, param->target_buf,
+			   cde_ctx->num_bufs);
+		return -EINVAL;
+	}
+
+	target_mem = cde_ctx->mem + param->target_buf;
+	if (target_mem->size < (param->target_byte_offset + 3)) {
+		nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
+			   cde_ctx->num_params, param->target_byte_offset,
+			   target_mem->size);
+		return -EINVAL;
+	}
+
+	/* does this parameter fit into our parameter structure */
+	if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
+		nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
+			   cde_ctx->num_params);
+		return -ENOMEM;
+	}
+
+	/* is the given id valid? */
+	if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
+		nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
+			   param->id, cde_ctx->num_params,
+			   NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
+		return -EINVAL;
+	}
+
+	cde_ctx->params[cde_ctx->num_params] = *param;
+	cde_ctx->num_params++;
+
+	return 0;
+}
+
+static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
+					 struct nvgpu_firmware *img,
+					 u32 required_class)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	int err;
+
+	/* CDE enabled */
+	cde_ctx->ch->cde = true;
+
+	err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
+	if (err) {
+		nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
+			   err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
+				  struct nvgpu_firmware *img,
+				  u32 op,
+				  struct gk20a_cde_cmd_elem *cmd_elem,
+				  u32 num_elems)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
+	u32 *num_entries;
+	unsigned int i;
+
+	/* check command type */
+	if (op == TYPE_BUF_COMMAND_INIT) {
+		gpfifo = &cde_ctx->init_convert_cmd;
+		num_entries = &cde_ctx->init_cmd_num_entries;
+	} else if (op == TYPE_BUF_COMMAND_CONVERT) {
+		gpfifo = &cde_ctx->convert_cmd;
+		num_entries = &cde_ctx->convert_cmd_num_entries;
+	} else {
+		nvgpu_warn(g, "cde: unknown command. op=%u",
+			   op);
+		return -EINVAL;
+	}
+
+	/* allocate gpfifo entries to be pushed */
+	*gpfifo = nvgpu_kzalloc(g,
+				sizeof(struct nvgpu_gpfifo_entry) * num_elems);
+	if (!*gpfifo) {
+		nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
+		return -ENOMEM;
+	}
+
+	gpfifo_elem = *gpfifo;
+	for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
+		struct nvgpu_mem *target_mem;
+
+		/* validate the current entry */
+		if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
+			nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
+				   cmd_elem->target_buf, cde_ctx->num_bufs);
+			return -EINVAL;
+		}
+
+		target_mem = cde_ctx->mem + cmd_elem->target_buf;
+		if (target_mem->size<
+		    cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
+			nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
+				   target_mem->size,
+				   cmd_elem->target_byte_offset,
+				   cmd_elem->num_bytes);
+			return -EINVAL;
+		}
+
+		/* store the element into gpfifo */
+		gpfifo_elem->entry0 =
+			u64_lo32(target_mem->gpu_va +
+			cmd_elem->target_byte_offset);
+		gpfifo_elem->entry1 =
+			u64_hi32(target_mem->gpu_va +
+			cmd_elem->target_byte_offset) |
+			pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
+						 sizeof(u32));
+	}
+
+	*num_entries = num_elems;
+	return 0;
+}
+
+static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
+		sizeof(struct nvgpu_gpfifo_entry);
+	unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
+		sizeof(struct nvgpu_gpfifo_entry);
+	unsigned long total_bytes = init_bytes + conv_bytes;
+	struct nvgpu_gpfifo_entry *combined_cmd;
+
+	/* allocate buffer that has space for both */
+	combined_cmd = nvgpu_kzalloc(g, total_bytes);
+	if (!combined_cmd) {
+		nvgpu_warn(g,
+			"cde: could not allocate memory for gpfifo entries");
+		return -ENOMEM;
+	}
+
+	/* move the original init here and append convert */
+	memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
+	memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
+			cde_ctx->convert_cmd, conv_bytes);
+
+	nvgpu_kfree(g, cde_ctx->init_convert_cmd);
+	nvgpu_kfree(g, cde_ctx->convert_cmd);
+
+	cde_ctx->init_convert_cmd = combined_cmd;
+	cde_ctx->convert_cmd = combined_cmd
+		+ cde_ctx->init_cmd_num_entries;
+
+	return 0;
+}
+
+static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
+			      struct nvgpu_firmware *img)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	u32 *data = (u32 *)img->data;
+	u32 num_of_elems;
+	struct gk20a_cde_hdr_elem *elem;
+	u32 min_size = 0;
+	int err = 0;
+	unsigned int i;
+
+	min_size += 2 * sizeof(u32);
+	if (img->size < min_size) {
+		nvgpu_warn(g, "cde: invalid image header");
+		return -EINVAL;
+	}
+
+	cde_app->firmware_version = data[0];
+	num_of_elems = data[1];
+
+	min_size += num_of_elems * sizeof(*elem);
+	if (img->size < min_size) {
+		nvgpu_warn(g, "cde: bad image");
+		return -EINVAL;
+	}
+
+	elem = (struct gk20a_cde_hdr_elem *)&data[2];
+	for (i = 0; i < num_of_elems; i++) {
+		int err = 0;
+		switch (elem->type) {
+		case TYPE_BUF:
+			err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
+			break;
+		case TYPE_REPLACE:
+			err = gk20a_init_cde_replace(cde_ctx, img,
+						     &elem->replace);
+			break;
+		case TYPE_PARAM:
+			err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
+			break;
+		case TYPE_REQUIRED_CLASS:
+			err = gk20a_init_cde_required_class(cde_ctx, img,
+				elem->required_class);
+			break;
+		case TYPE_COMMAND:
+		{
+			struct gk20a_cde_cmd_elem *cmd = (void *)
+				&img->data[elem->command.data_byte_offset];
+			err = gk20a_init_cde_command(cde_ctx, img,
+				elem->command.op, cmd,
+				elem->command.num_entries);
+			break;
+		}
+		case TYPE_ARRAY:
+			memcpy(&cde_app->arrays[elem->array.id][0],
+				elem->array.data,
+				MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
+			break;
+		default:
+			nvgpu_warn(g, "cde: unknown header element");
+			err = -EINVAL;
+		}
+
+		if (err)
+			goto deinit_image;
+
+		elem++;
+	}
+
+	if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
+		nvgpu_warn(g, "cde: convert command not defined");
+		err = -EINVAL;
+		goto deinit_image;
+	}
+
+	if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
+		nvgpu_warn(g, "cde: convert command not defined");
+		err = -EINVAL;
+		goto deinit_image;
+	}
+
+	err = gk20a_cde_pack_cmdbufs(cde_ctx);
+	if (err)
+		goto deinit_image;
+
+	return 0;
+
+deinit_image:
+	gk20a_deinit_cde_img(cde_ctx);
+	return err;
+}
+
+static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
+				    u32 op, struct nvgpu_channel_fence *fence,
+				    u32 flags, struct gk20a_fence **fence_out)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct nvgpu_gpfifo_entry *gpfifo = NULL;
+	int num_entries = 0;
+
+	/* check command type */
+	if (op == TYPE_BUF_COMMAND_INIT) {
+		/* both init and convert combined */
+		gpfifo = cde_ctx->init_convert_cmd;
+		num_entries = cde_ctx->init_cmd_num_entries
+			+ cde_ctx->convert_cmd_num_entries;
+	} else if (op == TYPE_BUF_COMMAND_CONVERT) {
+		gpfifo = cde_ctx->convert_cmd;
+		num_entries = cde_ctx->convert_cmd_num_entries;
+	} else if (op == TYPE_BUF_COMMAND_NOOP) {
+		/* Any non-null gpfifo will suffice with 0 num_entries */
+		gpfifo = cde_ctx->init_convert_cmd;
+		num_entries = 0;
+	} else {
+		nvgpu_warn(g, "cde: unknown buffer");
+		return -EINVAL;
+	}
+
+	if (gpfifo == NULL) {
+		nvgpu_warn(g, "cde: buffer not available");
+		return -ENOSYS;
+	}
+
+	return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
+				   num_entries, flags, fence, fence_out,
+				   NULL);
+}
+
+static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
+__acquires(&cde_app->mutex)
+__releases(&cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
+	struct gk20a *g = &cde_ctx->l->g;
+
+	nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
+	trace_gk20a_cde_release(cde_ctx);
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+
+	if (cde_ctx->in_use) {
+		cde_ctx->in_use = false;
+		nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
+		cde_app->ctx_usecount--;
+	} else {
+		nvgpu_log_info(g, "double release cde context %p", cde_ctx);
+	}
+
+	nvgpu_mutex_release(&cde_app->mutex);
+}
+
+static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
+__acquires(&cde_app->mutex)
+__releases(&cde_app->mutex)
+{
+	struct delayed_work *delay_work = to_delayed_work(work);
+	struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
+			struct gk20a_cde_ctx, ctx_deleter_work);
+	struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	int err;
+
+	/* someone has just taken it? engine deletion started? */
+	if (cde_ctx->in_use || !cde_app->initialised)
+		return;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
+			"cde: attempting to delete temporary %p", cde_ctx);
+
+	err = gk20a_busy(g);
+	if (err) {
+		/* this context would find new use anyway later, so not freeing
+		 * here does not leak anything */
+		nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
+				" temp ctx deletion");
+		return;
+	}
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+	if (cde_ctx->in_use || !cde_app->initialised) {
+		nvgpu_log(g, gpu_dbg_cde_ctx,
+				"cde: context use raced, not deleting %p",
+				cde_ctx);
+		goto out;
+	}
+
+	WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
+			"double pending %p", cde_ctx);
+
+	gk20a_cde_remove_ctx(cde_ctx);
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
+			"cde: destroyed %p count=%d use=%d max=%d",
+			cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
+			cde_app->ctx_count_top);
+
+out:
+	nvgpu_mutex_release(&cde_app->mutex);
+	gk20a_idle(g);
+}
+
+static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
+__must_hold(&cde_app->mutex)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a_cde_ctx *cde_ctx;
+
+	/* exhausted? */
+
+	if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
+		return ERR_PTR(-EAGAIN);
+
+	/* idle context available? */
+
+	if (!nvgpu_list_empty(&cde_app->free_contexts)) {
+		cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
+				gk20a_cde_ctx, list);
+		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
+				"cde: got free %p count=%d use=%d max=%d",
+				cde_ctx, cde_app->ctx_count,
+				cde_app->ctx_usecount,
+				cde_app->ctx_count_top);
+		trace_gk20a_cde_get_context(cde_ctx);
+
+		/* deleter work may be scheduled, but in_use prevents it */
+		cde_ctx->in_use = true;
+		nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
+		cde_app->ctx_usecount++;
+
+		/* cancel any deletions now that ctx is in use */
+		gk20a_cde_cancel_deleter(cde_ctx, true);
+		return cde_ctx;
+	}
+
+	/* no free contexts, get a temporary one */
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
+			"cde: no free contexts, count=%d",
+			cde_app->ctx_count);
+
+	cde_ctx = gk20a_cde_allocate_context(l);
+	if (IS_ERR(cde_ctx)) {
+		nvgpu_warn(g, "cde: cannot allocate context: %ld",
+				PTR_ERR(cde_ctx));
+		return cde_ctx;
+	}
+
+	trace_gk20a_cde_get_context(cde_ctx);
+	cde_ctx->in_use = true;
+	cde_ctx->is_temporary = true;
+	cde_app->ctx_usecount++;
+	cde_app->ctx_count++;
+	if (cde_app->ctx_count > cde_app->ctx_count_top)
+		cde_app->ctx_count_top = cde_app->ctx_count;
+	nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
+
+	return cde_ctx;
+}
+
+static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
+__releases(&cde_app->mutex)
+__acquires(&cde_app->mutex)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a_cde_ctx *cde_ctx = NULL;
+	struct nvgpu_timeout timeout;
+
+	nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
+			   NVGPU_TIMER_CPU_TIMER);
+
+	do {
+		cde_ctx = gk20a_cde_do_get_context(l);
+		if (PTR_ERR(cde_ctx) != -EAGAIN)
+			break;
+
+		/* exhausted, retry */
+		nvgpu_mutex_release(&cde_app->mutex);
+		cond_resched();
+		nvgpu_mutex_acquire(&cde_app->mutex);
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	return cde_ctx;
+}
+
+static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_ctx *cde_ctx;
+	int ret;
+
+	cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
+	if (!cde_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	cde_ctx->l = l;
+	cde_ctx->dev = dev_from_gk20a(g);
+
+	ret = gk20a_cde_load(cde_ctx);
+	if (ret) {
+		nvgpu_kfree(g, cde_ctx);
+		return ERR_PTR(ret);
+	}
+
+	nvgpu_init_list_node(&cde_ctx->list);
+	cde_ctx->is_temporary = false;
+	cde_ctx->in_use = false;
+	INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
+			gk20a_cde_ctx_deleter_fn);
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
+	trace_gk20a_cde_allocate_context(cde_ctx);
+	return cde_ctx;
+}
+
+int gk20a_cde_convert(struct nvgpu_os_linux *l,
+		      struct dma_buf *compbits_scatter_buf,
+		      u64 compbits_byte_offset,
+		      u64 scatterbuffer_byte_offset,
+		      struct nvgpu_channel_fence *fence,
+		      u32 __flags, struct gk20a_cde_param *params,
+		      int num_params, struct gk20a_fence **fence_out)
+__acquires(&l->cde_app->mutex)
+__releases(&l->cde_app->mutex)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_ctx *cde_ctx = NULL;
+	struct gk20a_comptags comptags;
+	struct nvgpu_os_buffer os_buf = {
+		compbits_scatter_buf,
+		NULL,
+		dev_from_gk20a(g)
+	};
+	u64 mapped_compbits_offset = 0;
+	u64 compbits_size = 0;
+	u64 mapped_scatterbuffer_offset = 0;
+	u64 scatterbuffer_size = 0;
+	u64 map_vaddr = 0;
+	u64 map_offset = 0;
+	u64 map_size = 0;
+	u8 *surface = NULL;
+	u64 big_page_mask = 0;
+	u32 flags;
+	int err, i;
+	const s16 compbits_kind = 0;
+	u32 submit_op;
+	struct dma_buf_attachment *attachment;
+
+	nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
+		  compbits_byte_offset, scatterbuffer_byte_offset);
+
+	/* scatter buffer must be after compbits buffer */
+	if (scatterbuffer_byte_offset &&
+	    scatterbuffer_byte_offset < compbits_byte_offset)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	nvgpu_mutex_acquire(&l->cde_app.mutex);
+	cde_ctx = gk20a_cde_get_context(l);
+	nvgpu_mutex_release(&l->cde_app.mutex);
+	if (IS_ERR(cde_ctx)) {
+		err = PTR_ERR(cde_ctx);
+		goto exit_idle;
+	}
+
+	/* First, map the buffer to local va */
+
+	/* ensure that the compbits buffer has drvdata */
+	err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
+			dev_from_gk20a(g));
+	if (err)
+		goto exit_idle;
+
+	/* compbits don't start at page aligned offset, so we need to align
+	   the region to be mapped */
+	big_page_mask = cde_ctx->vm->big_page_size - 1;
+	map_offset = compbits_byte_offset & ~big_page_mask;
+	map_size = compbits_scatter_buf->size - map_offset;
+
+
+	/* compute compbit start offset from the beginning of the mapped
+	   area */
+	mapped_compbits_offset = compbits_byte_offset - map_offset;
+	if (scatterbuffer_byte_offset) {
+		compbits_size = scatterbuffer_byte_offset -
+				compbits_byte_offset;
+		mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
+					      map_offset;
+		scatterbuffer_size = compbits_scatter_buf->size -
+				     scatterbuffer_byte_offset;
+	} else {
+		compbits_size = compbits_scatter_buf->size -
+				compbits_byte_offset;
+	}
+
+	nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
+		  map_offset, map_size);
+	nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
+		  mapped_compbits_offset, compbits_size);
+	nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
+		  mapped_scatterbuffer_offset, scatterbuffer_size);
+
+
+	/* map the destination buffer */
+	get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
+	err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
+				 NVGPU_VM_MAP_CACHEABLE |
+				 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
+				 NVGPU_KIND_INVALID,
+				 compbits_kind, /* incompressible kind */
+				 gk20a_mem_flag_none,
+				 map_offset, map_size,
+				 NULL,
+				 &map_vaddr);
+	if (err) {
+		dma_buf_put(compbits_scatter_buf);
+		err = -EINVAL;
+		goto exit_idle;
+	}
+
+	if (scatterbuffer_byte_offset &&
+	    l->ops.cde.need_scatter_buffer &&
+	    l->ops.cde.need_scatter_buffer(g)) {
+		struct sg_table *sgt;
+		void *scatter_buffer;
+
+		surface = dma_buf_vmap(compbits_scatter_buf);
+		if (IS_ERR(surface)) {
+			nvgpu_warn(g,
+				   "dma_buf_vmap failed");
+			err = -EINVAL;
+			goto exit_unmap_vaddr;
+		}
+
+		scatter_buffer = surface + scatterbuffer_byte_offset;
+
+		nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
+			  surface, scatter_buffer);
+		sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
+				   &attachment);
+		if (IS_ERR(sgt)) {
+			nvgpu_warn(g,
+				   "mm_pin failed");
+			err = -EINVAL;
+			goto exit_unmap_surface;
+		} else {
+			err = l->ops.cde.populate_scatter_buffer(g, sgt,
+					compbits_byte_offset, scatter_buffer,
+					scatterbuffer_size);
+			WARN_ON(err);
+
+			gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
+				       attachment, sgt);
+			if (err)
+				goto exit_unmap_surface;
+		}
+
+		__cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
+		dma_buf_vunmap(compbits_scatter_buf, surface);
+		surface = NULL;
+	}
+
+	/* store source buffer compression tags */
+	gk20a_get_comptags(&os_buf, &comptags);
+	cde_ctx->surf_param_offset = comptags.offset;
+	cde_ctx->surf_param_lines = comptags.lines;
+
+	/* store surface vaddr. This is actually compbit vaddr, but since
+	   compbits live in the same surface, and we can get the alloc base
+	   address by using gpuva_to_iova_base, this will do */
+	cde_ctx->surf_vaddr = map_vaddr;
+
+	/* store information about destination */
+	cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
+	cde_ctx->compbit_size = compbits_size;
+
+	cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
+	cde_ctx->scatterbuffer_size = scatterbuffer_size;
+
+	/* remove existing argument data */
+	memset(cde_ctx->user_param_values, 0,
+	       sizeof(cde_ctx->user_param_values));
+
+	/* read user space arguments for the conversion */
+	for (i = 0; i < num_params; i++) {
+		struct gk20a_cde_param *param = params + i;
+		int id = param->id - NUM_RESERVED_PARAMS;
+
+		if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
+			nvgpu_warn(g, "cde: unknown user parameter");
+			err = -EINVAL;
+			goto exit_unmap_surface;
+		}
+		cde_ctx->user_param_values[id] = param->value;
+	}
+
+	/* patch data */
+	err = gk20a_cde_patch_params(cde_ctx);
+	if (err) {
+		nvgpu_warn(g, "cde: failed to patch parameters");
+		goto exit_unmap_surface;
+	}
+
+	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
+		 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
+	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
+		 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
+	nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
+		 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
+
+	/* take always the postfence as it is needed for protecting the
+	 * cde context */
+	flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
+
+	/* gk20a_cde_execute_buffer() will grab a power reference of it's own */
+	gk20a_idle(g);
+
+	if (comptags.lines == 0) {
+		/*
+		 * Nothing to do on the buffer, but do a null kickoff for
+		 * managing the pre and post fences.
+		 */
+		submit_op = TYPE_BUF_COMMAND_NOOP;
+	} else if (!cde_ctx->init_cmd_executed) {
+		/*
+		 * First time, so include the init pushbuf too in addition to
+		 * the conversion code.
+		 */
+		submit_op = TYPE_BUF_COMMAND_INIT;
+	} else {
+		/*
+		 * The usual condition: execute just the conversion.
+		 */
+		submit_op = TYPE_BUF_COMMAND_CONVERT;
+	}
+	err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
+			fence, flags, fence_out);
+
+	if (comptags.lines != 0 && !err)
+		cde_ctx->init_cmd_executed = true;
+
+	/* unmap the buffers - channel holds references to them now */
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
+
+	return err;
+
+exit_unmap_surface:
+	if (surface)
+		dma_buf_vunmap(compbits_scatter_buf, surface);
+exit_unmap_vaddr:
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
+exit_idle:
+	gk20a_idle(g);
+	return err;
+}
+
+static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
+__acquires(&cde_app->mutex)
+__releases(&cde_app->mutex)
+{
+	struct gk20a_cde_ctx *cde_ctx = data;
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	bool channel_idle;
+
+	channel_gk20a_joblist_lock(ch);
+	channel_idle = channel_gk20a_joblist_is_empty(ch);
+	channel_gk20a_joblist_unlock(ch);
+
+	if (!channel_idle)
+		return;
+
+	trace_gk20a_cde_finished_ctx_cb(cde_ctx);
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
+	if (!cde_ctx->in_use)
+		nvgpu_log_info(g, "double finish cde context %p on channel %p",
+				cde_ctx, ch);
+
+	if (ch->has_timedout) {
+		if (cde_ctx->is_temporary) {
+			nvgpu_warn(g,
+					"cde: channel had timed out"
+					" (temporary channel)");
+			/* going to be deleted anyway */
+		} else {
+			nvgpu_warn(g,
+					"cde: channel had timed out"
+					", reloading");
+			/* mark it to be deleted, replace with a new one */
+			nvgpu_mutex_acquire(&cde_app->mutex);
+			cde_ctx->is_temporary = true;
+			if (gk20a_cde_create_context(l)) {
+				nvgpu_err(g, "cde: can't replace context");
+			}
+			nvgpu_mutex_release(&cde_app->mutex);
+		}
+	}
+
+	/* delete temporary contexts later (watch for doubles) */
+	if (cde_ctx->is_temporary && cde_ctx->in_use) {
+		WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
+		schedule_delayed_work(&cde_ctx->ctx_deleter_work,
+			msecs_to_jiffies(CTX_DELETE_TIME));
+	}
+
+	if (!ch->has_timedout)
+		gk20a_cde_ctx_release(cde_ctx);
+}
+
+static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
+{
+	struct nvgpu_os_linux *l = cde_ctx->l;
+	struct gk20a *g = &l->g;
+	struct nvgpu_firmware *img;
+	struct channel_gk20a *ch;
+	struct tsg_gk20a *tsg;
+	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_gpfifo_args gpfifo_args;
+	int err = 0;
+	u64 vaddr;
+
+	img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
+	if (!img) {
+		nvgpu_err(g, "cde: could not fetch the firmware");
+		return -ENOSYS;
+	}
+
+	tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
+	if (!tsg) {
+		nvgpu_err(g, "cde: could not create TSG");
+		err = -ENOMEM;
+		goto err_get_gk20a_channel;
+	}
+
+	ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
+			cde_ctx,
+			-1,
+			false);
+	if (!ch) {
+		nvgpu_warn(g, "cde: gk20a channel not available");
+		err = -ENOMEM;
+		goto err_get_gk20a_channel;
+	}
+
+	ch->timeout.enabled = false;
+
+	/* bind the channel to the vm */
+	err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
+	if (err) {
+		nvgpu_warn(g, "cde: could not bind vm");
+		goto err_commit_va;
+	}
+
+	err = gk20a_tsg_bind_channel(tsg, ch);
+	if (err) {
+		nvgpu_err(g, "cde: unable to bind to tsg");
+		goto err_alloc_gpfifo;
+	}
+
+	gpfifo_args.num_entries = 1024;
+	gpfifo_args.num_inflight_jobs = 0;
+	gpfifo_args.flags = 0;
+	/* allocate gpfifo (1024 should be more than enough) */
+	err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
+	if (err) {
+		nvgpu_warn(g, "cde: unable to allocate gpfifo");
+		goto err_alloc_gpfifo;
+	}
+
+	/* map backing store to gpu virtual space */
+	vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
+			       g->gr.compbit_store.mem.size,
+			       NVGPU_VM_MAP_CACHEABLE,
+			       gk20a_mem_flag_read_only,
+			       false,
+			       gr->compbit_store.mem.aperture);
+
+	if (!vaddr) {
+		nvgpu_warn(g, "cde: cannot map compression bit backing store");
+		err = -ENOMEM;
+		goto err_map_backingstore;
+	}
+
+	/* store initialisation data */
+	cde_ctx->ch = ch;
+	cde_ctx->tsg = tsg;
+	cde_ctx->vm = ch->vm;
+	cde_ctx->backing_store_vaddr = vaddr;
+
+	/* initialise the firmware */
+	err = gk20a_init_cde_img(cde_ctx, img);
+	if (err) {
+		nvgpu_warn(g, "cde: image initialisation failed");
+		goto err_init_cde_img;
+	}
+
+	/* initialisation done */
+	nvgpu_release_firmware(g, img);
+
+	return 0;
+
+err_init_cde_img:
+	nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
+err_map_backingstore:
+err_alloc_gpfifo:
+	nvgpu_vm_put(ch->vm);
+err_commit_va:
+err_get_gk20a_channel:
+	nvgpu_release_firmware(g, img);
+	nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
+	return err;
+}
+
+int gk20a_cde_reload(struct nvgpu_os_linux *l)
+__acquires(&l->cde_app->mutex)
+__releases(&l->cde_app->mutex)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	int err;
+
+	if (!cde_app->initialised)
+		return -ENOSYS;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+
+	gk20a_cde_stop(l);
+
+	err = gk20a_cde_create_contexts(l);
+	if (!err)
+		cde_app->initialised = true;
+
+	nvgpu_mutex_release(&cde_app->mutex);
+
+	gk20a_idle(g);
+	return err;
+}
+
+int gk20a_init_cde_support(struct nvgpu_os_linux *l)
+__acquires(&cde_app->mutex)
+__releases(&cde_app->mutex)
+{
+	struct gk20a_cde_app *cde_app = &l->cde_app;
+	struct gk20a *g = &l->g;
+	int err;
+
+	if (cde_app->initialised)
+		return 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
+
+	err = nvgpu_mutex_init(&cde_app->mutex);
+	if (err)
+		return err;
+
+	nvgpu_mutex_acquire(&cde_app->mutex);
+
+	nvgpu_init_list_node(&cde_app->free_contexts);
+	nvgpu_init_list_node(&cde_app->used_contexts);
+	cde_app->ctx_count = 0;
+	cde_app->ctx_count_top = 0;
+	cde_app->ctx_usecount = 0;
+
+	err = gk20a_cde_create_contexts(l);
+	if (!err)
+		cde_app->initialised = true;
+
+	nvgpu_mutex_release(&cde_app->mutex);
+	nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
+
+	if (err)
+		nvgpu_mutex_destroy(&cde_app->mutex);
+
+	return err;
+}
+
+enum cde_launch_patch_id {
+	PATCH_H_QMD_CTA_RASTER_WIDTH_ID     = 1024,
+	PATCH_H_QMD_CTA_RASTER_HEIGHT_ID    = 1025,
+	PATCH_QMD_CTA_RASTER_DEPTH_ID       = 1026, /* for firmware v0 only */
+	PATCH_QMD_CTA_THREAD_DIMENSION0_ID  = 1027,
+	PATCH_QMD_CTA_THREAD_DIMENSION1_ID  = 1028,
+	PATCH_QMD_CTA_THREAD_DIMENSION2_ID  = 1029, /* for firmware v0 only */
+	PATCH_USER_CONST_XTILES_ID          = 1030, /* for firmware v0 only */
+	PATCH_USER_CONST_YTILES_ID          = 1031, /* for firmware v0 only */
+	PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
+	PATCH_USER_CONST_DSTPITCH_ID        = 1033, /* for firmware v0 only */
+	PATCH_H_USER_CONST_FLAGS_ID         = 1034, /* for firmware v0 only */
+	PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID  = 1035,
+	PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID  = 1036,
+	PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID  = 1037,
+	PATCH_VPC_CURRENT_GROUP_SIZE_X_ID   = 1038,
+	PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID   = 1039,
+	PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID   = 1040,
+	PATCH_USER_CONST_XBLOCKS_ID         = 1041,
+	PATCH_H_USER_CONST_DSTOFFSET_ID     = 1042,
+	PATCH_V_QMD_CTA_RASTER_WIDTH_ID     = 1043,
+	PATCH_V_QMD_CTA_RASTER_HEIGHT_ID    = 1044,
+	PATCH_V_USER_CONST_DSTOFFSET_ID     = 1045,
+	PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID  = 1046,
+	PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID  = 1047,
+	PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID  = 1048,
+	PATCH_H_LAUNCH_WORD1_ID             = 1049,
+	PATCH_H_LAUNCH_WORD2_ID             = 1050,
+	PATCH_V_LAUNCH_WORD1_ID             = 1051,
+	PATCH_V_LAUNCH_WORD2_ID             = 1052,
+	PATCH_H_QMD_PROGRAM_OFFSET_ID       = 1053,
+	PATCH_H_QMD_REGISTER_COUNT_ID       = 1054,
+	PATCH_V_QMD_PROGRAM_OFFSET_ID       = 1055,
+	PATCH_V_QMD_REGISTER_COUNT_ID       = 1056,
+};
+
+/* maximum number of WRITE_PATCHes in the below function */
+#define MAX_CDE_LAUNCH_PATCHES		  32
+
+static int gk20a_buffer_convert_gpu_to_cde_v1(
+		struct nvgpu_os_linux *l,
+		struct dma_buf *dmabuf, u32 consumer,
+		u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
+		u64 scatterbuffer_offset,
+		u32 width, u32 height, u32 block_height_log2,
+		u32 submit_flags, struct nvgpu_channel_fence *fence_in,
+		struct gk20a_buffer_state *state)
+{
+	struct gk20a *g = &l->g;
+	struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
+	int param = 0;
+	int err = 0;
+	struct gk20a_fence *new_fence = NULL;
+	const int wgx = 8;
+	const int wgy = 8;
+	const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
+	const int xalign = compbits_per_byte * wgx;
+	const int yalign = wgy;
+
+	/* Compute per launch parameters */
+	const int xtiles = (width + 7) >> 3;
+	const int ytiles = (height + 7) >> 3;
+	const int gridw_h = roundup(xtiles, xalign) / xalign;
+	const int gridh_h = roundup(ytiles, yalign) / yalign;
+	const int gridw_v = roundup(ytiles, xalign) / xalign;
+	const int gridh_v = roundup(xtiles, yalign) / yalign;
+	const int xblocks = (xtiles + 1) >> 1;
+	const int voffset = compbits_voffset - compbits_hoffset;
+
+	int hprog = -1;
+	int vprog = -1;
+
+	if (l->ops.cde.get_program_numbers)
+		l->ops.cde.get_program_numbers(g, block_height_log2,
+					       l->cde_app.shader_parameter,
+					       &hprog, &vprog);
+	else {
+		nvgpu_warn(g, "cde: chip not supported");
+		return -ENOSYS;
+	}
+
+	if (hprog < 0 || vprog < 0) {
+		nvgpu_warn(g, "cde: could not determine programs");
+		return -ENOSYS;
+	}
+
+	if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
+		nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
+			   xtiles, ytiles);
+
+	nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
+		  width, height, block_height_log2,
+		  compbits_hoffset, compbits_voffset, scatterbuffer_offset);
+	nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
+		  width, height, xtiles, ytiles);
+	nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
+		  wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
+	nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
+		  hprog,
+		  l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
+		  l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
+		  vprog,
+		  l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
+		  l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
+
+	/* Write parameters */
+#define WRITE_PATCH(NAME, VALUE) \
+	params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
+	WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
+	WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
+		block_height_log2);
+	WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
+	WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
+	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
+	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
+	WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
+
+	WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
+	WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
+	WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
+	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
+	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
+	WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
+
+	WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
+	WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
+	WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
+	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
+	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
+	WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
+
+	WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
+		l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
+	WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
+		l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
+	WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
+		l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
+	WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
+		l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
+
+	if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
+		WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
+		WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
+	} else {
+		WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
+		WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
+	}
+
+	if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
+		WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
+		WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
+	} else {
+		WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
+		WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
+			l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
+	}
+#undef WRITE_PATCH
+
+	err = gk20a_cde_convert(l, dmabuf,
+				compbits_hoffset,
+				scatterbuffer_offset,
+				fence_in, submit_flags,
+				params, param, &new_fence);
+	if (err)
+		goto out;
+
+	/* compbits generated, update state & fence */
+	gk20a_fence_put(state->fence);
+	state->fence = new_fence;
+	state->valid_compbits |= consumer &
+		(NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
+out:
+	return err;
+}
+
+static int gk20a_buffer_convert_gpu_to_cde(
+		struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
+		u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
+		u64 scatterbuffer_offset,
+		u32 width, u32 height, u32 block_height_log2,
+		u32 submit_flags, struct nvgpu_channel_fence *fence_in,
+		struct gk20a_buffer_state *state)
+{
+	struct gk20a *g = &l->g;
+	int err = 0;
+
+	if (!l->cde_app.initialised)
+		return -ENOSYS;
+
+	nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
+		l->cde_app.firmware_version);
+
+	if (l->cde_app.firmware_version == 1) {
+		err = gk20a_buffer_convert_gpu_to_cde_v1(
+		    l, dmabuf, consumer, offset, compbits_hoffset,
+		    compbits_voffset, scatterbuffer_offset,
+		    width, height, block_height_log2,
+		    submit_flags, fence_in, state);
+	} else {
+		nvgpu_err(g, "unsupported CDE firmware version %d",
+			l->cde_app.firmware_version);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int gk20a_prepare_compressible_read(
+		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
+		u64 compbits_hoffset, u64 compbits_voffset,
+		u64 scatterbuffer_offset,
+		u32 width, u32 height, u32 block_height_log2,
+		u32 submit_flags, struct nvgpu_channel_fence *fence,
+		u32 *valid_compbits, u32 *zbc_color,
+		struct gk20a_fence **fence_out)
+{
+	struct gk20a *g = &l->g;
+	int err = 0;
+	struct gk20a_buffer_state *state;
+	struct dma_buf *dmabuf;
+	u32 missing_bits;
+
+	dmabuf = dma_buf_get(buffer_fd);
+	if (IS_ERR(dmabuf))
+		return -EINVAL;
+
+	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
+	if (err) {
+		dma_buf_put(dmabuf);
+		return err;
+	}
+
+	missing_bits = (state->valid_compbits ^ request) & request;
+
+	nvgpu_mutex_acquire(&state->lock);
+
+	if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
+
+		gk20a_fence_put(state->fence);
+		state->fence = NULL;
+		/* state->fence = decompress();
+		state->valid_compbits = 0; */
+		err = -EINVAL;
+		goto out;
+	} else if (missing_bits) {
+		u32 missing_cde_bits = missing_bits &
+			 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
+		if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
+		    missing_cde_bits) {
+			err = gk20a_buffer_convert_gpu_to_cde(
+					l, dmabuf,
+					missing_cde_bits,
+					offset, compbits_hoffset,
+					compbits_voffset, scatterbuffer_offset,
+					width, height, block_height_log2,
+					submit_flags, fence,
+					state);
+			if (err)
+				goto out;
+		}
+	}
+
+	if (state->fence && fence_out)
+		*fence_out = gk20a_fence_get(state->fence);
+
+	if (valid_compbits)
+		*valid_compbits = state->valid_compbits;
+
+	if (zbc_color)
+		*zbc_color = state->zbc_color;
+
+out:
+	nvgpu_mutex_release(&state->lock);
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
+				  u32 valid_compbits, u64 offset, u32 zbc_color)
+{
+	int err;
+	struct gk20a_buffer_state *state;
+	struct dma_buf *dmabuf;
+
+	dmabuf = dma_buf_get(buffer_fd);
+	if (IS_ERR(dmabuf)) {
+		nvgpu_err(g, "invalid dmabuf");
+		return -EINVAL;
+	}
+
+	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
+	if (err) {
+		nvgpu_err(g, "could not get state from dmabuf");
+		dma_buf_put(dmabuf);
+		return err;
+	}
+
+	nvgpu_mutex_acquire(&state->lock);
+
+	/* Update the compbits state. */
+	state->valid_compbits = valid_compbits;
+	state->zbc_color = zbc_color;
+
+	/* Discard previous compbit job fence. */
+	gk20a_fence_put(state->fence);
+	state->fence = NULL;
+
+	nvgpu_mutex_release(&state->lock);
+	dma_buf_put(dmabuf);
+	return 0;
+}
+
+int nvgpu_cde_init_ops(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	u32 ver = g->params.gpu_arch + g->params.gpu_impl;
+
+	switch (ver) {
+	case GK20A_GPUID_GM20B:
+	case GK20A_GPUID_GM20B_B:
+		l->ops.cde = gm20b_cde_ops.cde;
+		break;
+	case NVGPU_GPUID_GP10B:
+		l->ops.cde = gp10b_cde_ops.cde;
+		break;
+	default:
+		/* CDE is optional, so today ignoring unknown chip is fine */
+		break;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h
new file mode 100644
index 00000000..5928b624
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde.h
@@ -0,0 +1,326 @@
+/*
+ * GK20A color decompression engine support
+ *
+ * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CDE_GK20A_H_
+#define _CDE_GK20A_H_
+
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+
+#define MAX_CDE_BUFS		10
+#define MAX_CDE_PARAMS		64
+#define MAX_CDE_USER_PARAMS	40
+#define MAX_CDE_ARRAY_ENTRIES	9
+
+/*
+ * The size of the context ring buffer that is dedicated for handling cde
+ * jobs.  Re-using a context (=channel) for a differnt cde job forces a cpu
+ * wait on the previous job to that channel, so increasing this value
+ * reduces the likelihood of stalls.
+ */
+#define NUM_CDE_CONTEXTS	4
+
+struct dma_buf;
+struct device;
+struct nvgpu_os_linux;
+struct gk20a;
+struct gk20a_fence;
+struct nvgpu_channel_fence;
+struct channel_gk20a;
+struct vm_gk20a;
+struct nvgpu_gpfifo_entry;
+
+/*
+ * this element defines a buffer that is allocated and mapped into gpu address
+ * space. data_byte_offset defines the beginning of the buffer inside the
+ * firmare. num_bytes defines how many bytes the firmware contains.
+ *
+ * If data_byte_offset is zero, we allocate an empty buffer.
+ */
+
+struct gk20a_cde_hdr_buf {
+	u64 data_byte_offset;
+	u64 num_bytes;
+};
+
+/*
+ * this element defines a constant patching in buffers. It basically
+ * computes physical address to <source_buf>+source_byte_offset. The
+ * address is then modified into patch value as per:
+ *    value = (current_value & ~mask) | (address << shift) & mask .
+ *
+ * The type field defines the register size as:
+ *  0=u32,
+ *  1=u64 (little endian),
+ *  2=u64 (big endian)
+ */
+
+struct gk20a_cde_hdr_replace {
+	u32 target_buf;
+	u32 source_buf;
+	s32 shift;
+	u32 type;
+	u64 target_byte_offset;
+	u64 source_byte_offset;
+	u64 mask;
+};
+
+enum {
+	TYPE_PARAM_TYPE_U32 = 0,
+	TYPE_PARAM_TYPE_U64_LITTLE,
+	TYPE_PARAM_TYPE_U64_BIG
+};
+
+/*
+ * this element defines a runtime patching in buffers. Parameters with id from
+ * 0 to 1024 are reserved for special usage as follows:
+ *   0 = comptags_per_cacheline,
+ *   1 = slices_per_fbp,
+ *   2 = num_fbps
+ *   3 = source buffer first page offset
+ *   4 = source buffer block height log2
+ *   5 = backing store memory address
+ *   6 = destination memory address
+ *   7 = destination size (bytes)
+ *   8 = backing store size (bytes)
+ *   9 = cache line size
+ *
+ * Parameters above id 1024 are user-specified. I.e. they determine where a
+ * parameters from user space should be placed in buffers, what is their
+ * type, etc.
+ *
+ * Once the value is available, we add data_offset to the value.
+ *
+ * The value address is then modified into patch value as per:
+ *    value = (current_value & ~mask) | (address << shift) & mask .
+ *
+ * The type field defines the register size as:
+ *  0=u32,
+ *  1=u64 (little endian),
+ *  2=u64 (big endian)
+ */
+
+struct gk20a_cde_hdr_param {
+	u32 id;
+	u32 target_buf;
+	s32 shift;
+	u32 type;
+	s64 data_offset;
+	u64 target_byte_offset;
+	u64 mask;
+};
+
+enum {
+	TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
+	TYPE_PARAM_GPU_CONFIGURATION,
+	TYPE_PARAM_FIRSTPAGEOFFSET,
+	TYPE_PARAM_NUMPAGES,
+	TYPE_PARAM_BACKINGSTORE,
+	TYPE_PARAM_DESTINATION,
+	TYPE_PARAM_DESTINATION_SIZE,
+	TYPE_PARAM_BACKINGSTORE_SIZE,
+	TYPE_PARAM_SOURCE_SMMU_ADDR,
+	TYPE_PARAM_BACKINGSTORE_BASE_HW,
+	TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
+	TYPE_PARAM_SCATTERBUFFER,
+	TYPE_PARAM_SCATTERBUFFER_SIZE,
+	NUM_RESERVED_PARAMS = 1024,
+};
+
+/*
+ * This header element defines a command. The op field determines whether the
+ * element is defining an init (0) or convert command (1). data_byte_offset
+ * denotes the beginning address of command elements in the file.
+ */
+
+struct gk20a_cde_hdr_command {
+	u32 op;
+	u32 num_entries;
+	u64 data_byte_offset;
+};
+
+enum {
+	TYPE_BUF_COMMAND_INIT = 0,
+	TYPE_BUF_COMMAND_CONVERT,
+	TYPE_BUF_COMMAND_NOOP
+};
+
+/*
+ * This is a command element defines one entry inside push buffer. target_buf
+ * defines the buffer including the pushbuffer entries, target_byte_offset the
+ * offset inside the buffer and num_bytes the number of words in the buffer.
+ */
+
+struct gk20a_cde_cmd_elem {
+	u32 target_buf;
+	u32 padding;
+	u64 target_byte_offset;
+	u64 num_bytes;
+};
+
+/*
+ * This element is used for storing a small array of data.
+ */
+
+enum {
+	ARRAY_PROGRAM_OFFSET = 0,
+	ARRAY_REGISTER_COUNT,
+	ARRAY_LAUNCH_COMMAND,
+	NUM_CDE_ARRAYS
+};
+
+struct gk20a_cde_hdr_array {
+	u32 id;
+	u32 data[MAX_CDE_ARRAY_ENTRIES];
+};
+
+/*
+ * Following defines a single header element. Each element has a type and
+ * some of the data structures.
+ */
+
+struct gk20a_cde_hdr_elem {
+	u32 type;
+	u32 padding;
+	union {
+		struct gk20a_cde_hdr_buf buf;
+		struct gk20a_cde_hdr_replace replace;
+		struct gk20a_cde_hdr_param param;
+		u32 required_class;
+		struct gk20a_cde_hdr_command command;
+		struct gk20a_cde_hdr_array array;
+	};
+};
+
+enum {
+	TYPE_BUF = 0,
+	TYPE_REPLACE,
+	TYPE_PARAM,
+	TYPE_REQUIRED_CLASS,
+	TYPE_COMMAND,
+	TYPE_ARRAY
+};
+
+struct gk20a_cde_param {
+	u32 id;
+	u32 padding;
+	u64 value;
+};
+
+struct gk20a_cde_ctx {
+	struct nvgpu_os_linux *l;
+	struct device *dev;
+
+	/* channel related data */
+	struct channel_gk20a *ch;
+	struct tsg_gk20a *tsg;
+	struct vm_gk20a *vm;
+
+	/* buf converter configuration */
+	struct nvgpu_mem mem[MAX_CDE_BUFS];
+	unsigned int num_bufs;
+
+	/* buffer patching params (where should patching be done) */
+	struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
+	unsigned int num_params;
+
+	/* storage for user space parameter values */
+	u32 user_param_values[MAX_CDE_USER_PARAMS];
+
+	u32 surf_param_offset;
+	u32 surf_param_lines;
+	u64 surf_vaddr;
+
+	u64 compbit_vaddr;
+	u64 compbit_size;
+
+	u64 scatterbuffer_vaddr;
+	u64 scatterbuffer_size;
+
+	u64 backing_store_vaddr;
+
+	struct nvgpu_gpfifo_entry *init_convert_cmd;
+	int init_cmd_num_entries;
+
+	struct nvgpu_gpfifo_entry *convert_cmd;
+	int convert_cmd_num_entries;
+
+	struct kobj_attribute attr;
+
+	bool init_cmd_executed;
+
+	struct nvgpu_list_node list;
+	bool is_temporary;
+	bool in_use;
+	struct delayed_work ctx_deleter_work;
+};
+
+static inline struct gk20a_cde_ctx *
+gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
+{
+	return (struct gk20a_cde_ctx *)
+		((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
+};
+
+struct gk20a_cde_app {
+	bool initialised;
+	struct nvgpu_mutex mutex;
+
+	struct nvgpu_list_node free_contexts;
+	struct nvgpu_list_node used_contexts;
+	unsigned int ctx_count;
+	unsigned int ctx_usecount;
+	unsigned int ctx_count_top;
+
+	u32 firmware_version;
+
+	u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
+
+	u32 shader_parameter;
+};
+
+void gk20a_cde_destroy(struct nvgpu_os_linux *l);
+void gk20a_cde_suspend(struct nvgpu_os_linux *l);
+int gk20a_init_cde_support(struct nvgpu_os_linux *l);
+int gk20a_cde_reload(struct nvgpu_os_linux *l);
+int gk20a_cde_convert(struct nvgpu_os_linux *l,
+		struct dma_buf *compbits_buf,
+		u64 compbits_byte_offset,
+		u64 scatterbuffer_byte_offset,
+		struct nvgpu_channel_fence *fence,
+		u32 __flags, struct gk20a_cde_param *params,
+		int num_params, struct gk20a_fence **fence_out);
+
+int gk20a_prepare_compressible_read(
+		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
+		u64 compbits_hoffset, u64 compbits_voffset,
+		u64 scatterbuffer_offset,
+		u32 width, u32 height, u32 block_height_log2,
+		u32 submit_flags, struct nvgpu_channel_fence *fence,
+		u32 *valid_compbits, u32 *zbc_color,
+		struct gk20a_fence **fence_out);
+int gk20a_mark_compressible_write(
+		struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
+		u32 zbc_color);
+int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.c b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c
new file mode 100644
index 00000000..1cd15c54
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c
@@ -0,0 +1,64 @@
+/*
+ * GM20B CDE
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "gk20a/gk20a.h"
+#include "cde_gm20b.h"
+
+enum programs {
+	PROG_HPASS              = 0,
+	PROG_VPASS_LARGE        = 1,
+	PROG_VPASS_SMALL        = 2,
+	PROG_HPASS_DEBUG        = 3,
+	PROG_VPASS_LARGE_DEBUG  = 4,
+	PROG_VPASS_SMALL_DEBUG  = 5,
+	PROG_PASSTHROUGH        = 6,
+};
+
+static void gm20b_cde_get_program_numbers(struct gk20a *g,
+					  u32 block_height_log2,
+					  u32 shader_parameter,
+					  int *hprog_out, int *vprog_out)
+{
+	int hprog = PROG_HPASS;
+	int vprog = (block_height_log2 >= 2) ?
+		PROG_VPASS_LARGE : PROG_VPASS_SMALL;
+	if (shader_parameter == 1) {
+		hprog = PROG_PASSTHROUGH;
+		vprog = PROG_PASSTHROUGH;
+	} else if (shader_parameter == 2) {
+		hprog = PROG_HPASS_DEBUG;
+		vprog = (block_height_log2 >= 2) ?
+			PROG_VPASS_LARGE_DEBUG :
+			PROG_VPASS_SMALL_DEBUG;
+	}
+
+	*hprog_out = hprog;
+	*vprog_out = vprog;
+}
+
+struct nvgpu_os_linux_ops gm20b_cde_ops = {
+	.cde = {
+		.get_program_numbers = gm20b_cde_get_program_numbers,
+	},
+};
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.h b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h
new file mode 100644
index 00000000..640d6ab6
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h
@@ -0,0 +1,32 @@
+/*
+ * GM20B CDE
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NVHOST_GM20B_CDE
+#define _NVHOST_GM20B_CDE
+
+#include "os_linux.h"
+
+extern struct nvgpu_os_linux_ops gm20b_cde_ops;
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.c b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c
new file mode 100644
index 00000000..5c0e79a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c
@@ -0,0 +1,161 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "gk20a/gk20a.h"
+#include "cde_gp10b.h"
+
+#include <nvgpu/log.h>
+#include <nvgpu/dma.h>
+
+enum gp10b_programs {
+	GP10B_PROG_HPASS              = 0,
+	GP10B_PROG_HPASS_4K           = 1,
+	GP10B_PROG_VPASS              = 2,
+	GP10B_PROG_VPASS_4K           = 3,
+	GP10B_PROG_HPASS_DEBUG        = 4,
+	GP10B_PROG_HPASS_4K_DEBUG     = 5,
+	GP10B_PROG_VPASS_DEBUG        = 6,
+	GP10B_PROG_VPASS_4K_DEBUG     = 7,
+	GP10B_PROG_PASSTHROUGH        = 8,
+};
+
+void gp10b_cde_get_program_numbers(struct gk20a *g,
+					  u32 block_height_log2,
+					  u32 shader_parameter,
+					  int *hprog_out, int *vprog_out)
+{
+	int hprog, vprog;
+
+	if (shader_parameter == 1) {
+		hprog = GP10B_PROG_PASSTHROUGH;
+		vprog = GP10B_PROG_PASSTHROUGH;
+	} else {
+		hprog = GP10B_PROG_HPASS;
+		vprog = GP10B_PROG_VPASS;
+		if (shader_parameter == 2) {
+			hprog = GP10B_PROG_HPASS_DEBUG;
+			vprog = GP10B_PROG_VPASS_DEBUG;
+		}
+		if (!nvgpu_iommuable(g)) {
+			if (!g->mm.disable_bigpage) {
+				nvgpu_warn(g,
+					   "When no IOMMU big pages cannot be used");
+			}
+			hprog |= 1;
+			vprog |= 1;
+		}
+	}
+
+	*hprog_out = hprog;
+	*vprog_out = vprog;
+}
+
+bool gp10b_need_scatter_buffer(struct gk20a *g)
+{
+	return !nvgpu_iommuable(g);
+}
+
+static u8 parity(u32 a)
+{
+	a ^= a>>16u;
+	a ^= a>>8u;
+	a ^= a>>4u;
+	a &= 0xfu;
+	return (0x6996u >> a) & 1u;
+}
+
+int gp10b_populate_scatter_buffer(struct gk20a *g,
+					 struct sg_table *sgt,
+					 size_t surface_size,
+					 void *scatter_buffer_ptr,
+					 size_t scatter_buffer_size)
+{
+	/* map scatter buffer to CPU VA and fill it */
+	const u32 page_size_log2 = 12;
+	const u32 page_size = 1 << page_size_log2;
+	const u32 page_size_shift = page_size_log2 - 7u;
+
+	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
+	const u32 getSliceMaskGP10B = 0x3ffffe48;
+	u8 *scatter_buffer = scatter_buffer_ptr;
+
+	size_t i;
+	struct scatterlist *sg = NULL;
+	u8 d = 0;
+	size_t page = 0;
+	size_t pages_left;
+
+	surface_size = round_up(surface_size, page_size);
+
+	pages_left = surface_size >> page_size_log2;
+	if ((pages_left >> 3) > scatter_buffer_size)
+	    return -ENOMEM;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		unsigned int j;
+		u64 surf_pa = sg_phys(sg);
+		unsigned int n = (int)(sg->length >> page_size_log2);
+
+		nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
+
+		for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
+			u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
+			u8 scatter_bit = parity(addr);
+			u8 bit = page & 7;
+
+			d |= scatter_bit << bit;
+			if (bit == 7) {
+				scatter_buffer[page >> 3] = d;
+				d = 0;
+			}
+
+			++page;
+			--pages_left;
+		}
+
+		if (pages_left == 0)
+			break;
+	}
+
+	/* write the last byte in case the number of pages is not divisible by 8 */
+	if ((page & 7) != 0)
+		scatter_buffer[page >> 3] = d;
+
+	if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
+		nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
+		for (i = 0; i < page >> 3; i++) {
+			nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
+		}
+	}
+
+	return 0;
+}
+
+struct nvgpu_os_linux_ops gp10b_cde_ops = {
+	.cde = {
+		.get_program_numbers = gp10b_cde_get_program_numbers,
+		.need_scatter_buffer = gp10b_need_scatter_buffer,
+		.populate_scatter_buffer = gp10b_populate_scatter_buffer,
+	},
+};
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.h b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h
new file mode 100644
index 00000000..52e9f292
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h
@@ -0,0 +1,32 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NVHOST_GP10B_CDE
+#define _NVHOST_GP10B_CDE
+
+#include "os_linux.h"
+
+extern struct nvgpu_os_linux_ops gp10b_cde_ops;
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c
new file mode 100644
index 00000000..165f33db
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ce2.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2017, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/types.h>
+
+#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+
+#include "gk20a/ce2_gk20a.h"
+#include "gk20a/gk20a.h"
+#include "channel.h"
+
+static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
+{
+	/* there is no local memory available,
+	don't allow local memory related CE flags */
+	if (!g->mm.vidmem.size) {
+		launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
+			NVGPU_CE_DST_LOCATION_LOCAL_FB);
+	}
+	return launch_flags;
+}
+
+int gk20a_ce_execute_ops(struct gk20a *g,
+		u32 ce_ctx_id,
+		u64 src_buf,
+		u64 dst_buf,
+		u64 size,
+		unsigned int payload,
+		int launch_flags,
+		int request_operation,
+		u32 submit_flags,
+		struct gk20a_fence **gk20a_fence_out)
+{
+	int ret = -EPERM;
+	struct gk20a_ce_app *ce_app = &g->ce_app;
+	struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
+	bool found = false;
+	u32 *cmd_buf_cpu_va;
+	u64 cmd_buf_gpu_va = 0;
+	u32 methodSize;
+	u32 cmd_buf_read_offset;
+	u32 dma_copy_class;
+	struct nvgpu_gpfifo_entry gpfifo;
+	struct nvgpu_channel_fence fence = {0, 0};
+	struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
+
+	if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
+		goto end;
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+
+	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
+			&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
+		if (ce_ctx->ctx_id == ce_ctx_id) {
+			found = true;
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&ce_app->app_mutex);
+
+	if (!found) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
+		ret = -ENODEV;
+		goto end;
+	}
+
+	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
+
+	ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
+
+	cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
+			(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
+
+	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
+
+	if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
+		struct gk20a_fence **prev_post_fence =
+			&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
+
+		ret = gk20a_fence_wait(g, *prev_post_fence,
+				       gk20a_get_gr_idle_timeout(g));
+
+		gk20a_fence_put(*prev_post_fence);
+		*prev_post_fence = NULL;
+		if (ret)
+			goto noop;
+	}
+
+	cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
+
+	dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
+	methodSize = gk20a_ce_prepare_submit(src_buf,
+					dst_buf,
+					size,
+					&cmd_buf_cpu_va[cmd_buf_read_offset],
+					NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
+					payload,
+					gk20a_get_valid_launch_flags(g, launch_flags),
+					request_operation,
+					dma_copy_class);
+
+	if (methodSize) {
+		/* store the element into gpfifo */
+		gpfifo.entry0 =
+			u64_lo32(cmd_buf_gpu_va);
+		gpfifo.entry1 =
+			(u64_hi32(cmd_buf_gpu_va) |
+			pbdma_gp_entry1_length_f(methodSize));
+
+		/* take always the postfence as it is needed for protecting the ce context */
+		submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
+
+		nvgpu_smp_wmb();
+
+		ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
+					1, submit_flags, &fence,
+					&ce_cmd_buf_fence_out, NULL);
+
+		if (!ret) {
+			ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
+				ce_cmd_buf_fence_out;
+			if (gk20a_fence_out) {
+				gk20a_fence_get(ce_cmd_buf_fence_out);
+				*gk20a_fence_out = ce_cmd_buf_fence_out;
+			}
+
+			/* Next available command buffer queue Index */
+			++ce_ctx->cmd_buf_read_queue_offset;
+		}
+	} else {
+		ret = -ENOMEM;
+	}
+noop:
+	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
+end:
+	return ret;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
new file mode 100644
index 00000000..7810bc21
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -0,0 +1,1021 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/os_sched.h>
+
+/*
+ * This is required for nvgpu_vm_find_buf() which is used in the tracing
+ * code. Once we can get and access userspace buffers without requiring
+ * direct dma_buf usage this can be removed.
+ */
+#include <nvgpu/linux/vm.h>
+
+#include "gk20a/gk20a.h"
+
+#include "channel.h"
+#include "ioctl_channel.h"
+#include "os_linux.h"
+
+#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+
+#include <linux/uaccess.h>
+#include <linux/dma-buf.h>
+#include <trace/events/gk20a.h>
+#include <uapi/linux/nvgpu.h>
+
+#include "sync_sema_android.h"
+
+u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
+{
+	u32 flags = 0;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
+		flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+		flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
+		flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+		flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
+		flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
+		flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
+
+	return flags;
+}
+
+/*
+ * API to convert error_notifiers in common code and of the form
+ * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
+ * space and of the form  NVGPU_CHANNEL_*
+ */
+static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
+{
+	switch (error_notifier) {
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
+		return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
+		return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
+	case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
+		return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
+		return NVGPU_CHANNEL_GR_EXCEPTION;
+	case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
+		return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
+	case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
+		return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
+	case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
+		return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
+	case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
+		return NVGPU_CHANNEL_PBDMA_ERROR;
+	case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
+		return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
+	case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
+		return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
+	case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
+		return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
+	}
+
+	pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
+
+	return error_notifier;
+}
+
+/**
+ * nvgpu_set_error_notifier_locked()
+ * Should be called with ch->error_notifier_mutex held
+ *
+ * error should be of the form  NVGPU_ERR_NOTIFIER_*
+ */
+void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	error = nvgpu_error_notifier_to_channel_notifier(error);
+
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		struct timespec time_data;
+		u64 nsec;
+
+		getnstimeofday(&time_data);
+		nsec = ((u64)time_data.tv_sec) * 1000000000u +
+				(u64)time_data.tv_nsec;
+		notification->time_stamp.nanoseconds[0] =
+				(u32)nsec;
+		notification->time_stamp.nanoseconds[1] =
+				(u32)(nsec >> 32);
+		notification->info32 = error;
+		notification->status = 0xffff;
+
+		nvgpu_err(ch->g,
+		    "error notifier set to %d for ch %d", error, ch->chid);
+	}
+}
+
+/* error should be of the form  NVGPU_ERR_NOTIFIER_* */
+void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	nvgpu_set_error_notifier_locked(ch, error);
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+
+		/* Don't overwrite error flag if it is already set */
+		if (notification->status != 0xffff)
+			nvgpu_set_error_notifier_locked(ch, error);
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+/* error_notifier should be of the form  NVGPU_ERR_NOTIFIER_* */
+bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	bool notifier_set = false;
+
+	error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		struct nvgpu_notification *notification =
+			priv->error_notifier.notification;
+		u32 err = notification->info32;
+
+		if (err == error_notifier)
+			notifier_set = true;
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+
+	return notifier_set;
+}
+
+static void gk20a_channel_update_runcb_fn(struct work_struct *work)
+{
+	struct nvgpu_channel_completion_cb *completion_cb =
+		container_of(work, struct nvgpu_channel_completion_cb, work);
+	struct nvgpu_channel_linux *priv =
+		container_of(completion_cb,
+				struct nvgpu_channel_linux, completion_cb);
+	struct channel_gk20a *ch = priv->ch;
+	void (*fn)(struct channel_gk20a *, void *);
+	void *user_data;
+
+	nvgpu_spinlock_acquire(&completion_cb->lock);
+	fn = completion_cb->fn;
+	user_data = completion_cb->user_data;
+	nvgpu_spinlock_release(&completion_cb->lock);
+
+	if (fn)
+		fn(ch, user_data);
+}
+
+static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	priv->completion_cb.fn = NULL;
+	priv->completion_cb.user_data = NULL;
+	nvgpu_spinlock_init(&priv->completion_cb.lock);
+	INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
+}
+
+static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_spinlock_acquire(&priv->completion_cb.lock);
+	priv->completion_cb.fn = NULL;
+	priv->completion_cb.user_data = NULL;
+	nvgpu_spinlock_release(&priv->completion_cb.lock);
+	cancel_work_sync(&priv->completion_cb.work);
+}
+
+static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	if (priv->completion_cb.fn)
+		schedule_work(&priv->completion_cb.work);
+}
+
+static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	if (priv->completion_cb.fn)
+		cancel_work_sync(&priv->completion_cb.work);
+}
+
+struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
+		void (*update_fn)(struct channel_gk20a *, void *),
+		void *update_fn_data,
+		int runlist_id,
+		bool is_privileged_channel)
+{
+	struct channel_gk20a *ch;
+	struct nvgpu_channel_linux *priv;
+
+	ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel,
+				nvgpu_current_pid(g), nvgpu_current_tid(g));
+
+	if (ch) {
+		priv = ch->os_priv;
+		nvgpu_spinlock_acquire(&priv->completion_cb.lock);
+		priv->completion_cb.fn = update_fn;
+		priv->completion_cb.user_data = update_fn_data;
+		nvgpu_spinlock_release(&priv->completion_cb.lock);
+	}
+
+	return ch;
+}
+
+static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
+{
+}
+
+static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
+{
+	nvgpu_channel_work_completion_clear(ch);
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	gk20a_channel_free_cycle_stats_buffer(ch);
+	gk20a_channel_free_cycle_stats_snapshot(ch);
+#endif
+}
+
+static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv;
+	int err;
+
+	priv = nvgpu_kzalloc(g, sizeof(*priv));
+	if (!priv)
+		return -ENOMEM;
+
+	ch->os_priv = priv;
+	priv->ch = ch;
+
+#ifdef CONFIG_SYNC
+	ch->has_os_fence_framework_support = true;
+#endif
+
+	err = nvgpu_mutex_init(&priv->error_notifier.mutex);
+	if (err) {
+		nvgpu_kfree(g, priv);
+		return err;
+	}
+
+	nvgpu_channel_work_completion_init(ch);
+
+	return 0;
+}
+
+static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_destroy(&priv->error_notifier.mutex);
+	nvgpu_kfree(g, priv);
+
+	ch->os_priv = NULL;
+
+#ifdef CONFIG_SYNC
+	ch->has_os_fence_framework_support = false;
+#endif
+}
+
+static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch,
+	const char *fmt, ...)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework;
+	char name[30];
+	va_list args;
+
+	fence_framework = &priv->fence_framework;
+
+	va_start(args, fmt);
+	vsnprintf(name, sizeof(name), fmt, args);
+	va_end(args);
+
+	fence_framework->timeline = gk20a_sync_timeline_create(name);
+
+	if (!fence_framework->timeline)
+		return -EINVAL;
+
+	return 0;
+}
+static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework;
+
+	fence_framework = &priv->fence_framework;
+
+	gk20a_sync_timeline_signal(fence_framework->timeline);
+}
+
+static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework;
+
+	fence_framework = &priv->fence_framework;
+
+	gk20a_sync_timeline_destroy(fence_framework->timeline);
+	fence_framework->timeline = NULL;
+}
+
+static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework;
+
+	fence_framework = &priv->fence_framework;
+
+	return (fence_framework->timeline != NULL);
+}
+
+int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	struct fifo_gk20a *f = &g->fifo;
+	int chid;
+	int err;
+
+	for (chid = 0; chid < (int)f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		err = nvgpu_channel_alloc_linux(g, ch);
+		if (err)
+			goto err_clean;
+	}
+
+	g->os_channel.open = nvgpu_channel_open_linux;
+	g->os_channel.close = nvgpu_channel_close_linux;
+	g->os_channel.work_completion_signal =
+		nvgpu_channel_work_completion_signal;
+	g->os_channel.work_completion_cancel_sync =
+		nvgpu_channel_work_completion_cancel_sync;
+
+	g->os_channel.os_fence_framework_inst_exists =
+		nvgpu_channel_fence_framework_exists;
+	g->os_channel.init_os_fence_framework =
+		nvgpu_channel_init_os_fence_framework;
+	g->os_channel.signal_os_fence_framework =
+		nvgpu_channel_signal_os_fence_framework;
+	g->os_channel.destroy_os_fence_framework =
+		nvgpu_channel_destroy_os_fence_framework;
+
+	return 0;
+
+err_clean:
+	for (; chid >= 0; chid--) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		nvgpu_channel_free_linux(g, ch);
+	}
+	return err;
+}
+
+void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	struct fifo_gk20a *f = &g->fifo;
+	unsigned int chid;
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		nvgpu_channel_free_linux(g, ch);
+	}
+
+	g->os_channel.os_fence_framework_inst_exists = NULL;
+	g->os_channel.init_os_fence_framework = NULL;
+	g->os_channel.signal_os_fence_framework = NULL;
+	g->os_channel.destroy_os_fence_framework = NULL;
+}
+
+u32 nvgpu_get_gpfifo_entry_size(void)
+{
+	return sizeof(struct nvgpu_gpfifo_entry);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void trace_write_pushbuffer(struct channel_gk20a *c,
+				   struct nvgpu_gpfifo_entry *g)
+{
+	void *mem = NULL;
+	unsigned int words;
+	u64 offset;
+	struct dma_buf *dmabuf = NULL;
+
+	if (gk20a_debug_trace_cmdbuf) {
+		u64 gpu_va = (u64)g->entry0 |
+			(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
+		int err;
+
+		words = pbdma_gp_entry1_length_v(g->entry1);
+		err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
+		if (!err)
+			mem = dma_buf_vmap(dmabuf);
+	}
+
+	if (mem) {
+		u32 i;
+		/*
+		 * Write in batches of 128 as there seems to be a limit
+		 * of how much you can output to ftrace at once.
+		 */
+		for (i = 0; i < words; i += 128U) {
+			trace_gk20a_push_cmdbuf(
+				c->g->name,
+				0,
+				min(words - i, 128U),
+				offset + i * sizeof(u32),
+				mem);
+		}
+		dma_buf_vunmap(dmabuf, mem);
+	}
+}
+#endif
+
+static void trace_write_pushbuffer_range(struct channel_gk20a *c,
+					 struct nvgpu_gpfifo_entry *g,
+					 struct nvgpu_gpfifo_entry __user *user_gpfifo,
+					 int offset,
+					 int count)
+{
+#ifdef CONFIG_DEBUG_FS
+	u32 size;
+	int i;
+	struct nvgpu_gpfifo_entry *gp;
+	bool gpfifo_allocated = false;
+
+	if (!gk20a_debug_trace_cmdbuf)
+		return;
+
+	if (!g && !user_gpfifo)
+		return;
+
+	if (!g) {
+		size = count * sizeof(struct nvgpu_gpfifo_entry);
+		if (size) {
+			g = nvgpu_big_malloc(c->g, size);
+			if (!g)
+				return;
+
+			if (copy_from_user(g, user_gpfifo, size)) {
+				nvgpu_big_free(c->g, g);
+				return;
+			}
+		}
+		gpfifo_allocated = true;
+	}
+
+	gp = g + offset;
+	for (i = 0; i < count; i++, gp++)
+		trace_write_pushbuffer(c, gp);
+
+	if (gpfifo_allocated)
+		nvgpu_big_free(c->g, g);
+#endif
+}
+
+/*
+ * Handle the submit synchronization - pre-fences and post-fences.
+ */
+static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
+				      struct nvgpu_channel_fence *fence,
+				      struct channel_gk20a_job *job,
+				      struct priv_cmd_entry **wait_cmd,
+				      struct priv_cmd_entry **incr_cmd,
+				      struct gk20a_fence **post_fence,
+				      bool register_irq,
+				      u32 flags)
+{
+	struct gk20a *g = c->g;
+	bool need_sync_fence = false;
+	bool new_sync_created = false;
+	int wait_fence_fd = -1;
+	int err = 0;
+	bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
+	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
+
+	if (g->aggressive_sync_destroy_thresh) {
+		nvgpu_mutex_acquire(&c->sync_lock);
+		if (!c->sync) {
+			c->sync = gk20a_channel_sync_create(c, false);
+			if (!c->sync) {
+				err = -ENOMEM;
+				nvgpu_mutex_release(&c->sync_lock);
+				goto fail;
+			}
+			new_sync_created = true;
+		}
+		nvgpu_atomic_inc(&c->sync->refcount);
+		nvgpu_mutex_release(&c->sync_lock);
+	}
+
+	if (g->ops.fifo.resetup_ramfc && new_sync_created) {
+		err = g->ops.fifo.resetup_ramfc(c);
+		if (err)
+			goto fail;
+	}
+
+	/*
+	 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
+	 * submission when user requested and the wait hasn't expired.
+	 */
+	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
+		int max_wait_cmds = c->deterministic ? 1 : 0;
+
+		if (!pre_alloc_enabled)
+			job->wait_cmd = nvgpu_kzalloc(g,
+				sizeof(struct priv_cmd_entry));
+
+		if (!job->wait_cmd) {
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
+			wait_fence_fd = fence->id;
+			err = c->sync->wait_fd(c->sync, wait_fence_fd,
+					       job->wait_cmd, max_wait_cmds);
+		} else {
+			err = c->sync->wait_syncpt(c->sync, fence->id,
+						   fence->value,
+						   job->wait_cmd);
+		}
+
+		if (err)
+			goto clean_up_wait_cmd;
+
+		if (job->wait_cmd->valid)
+			*wait_cmd = job->wait_cmd;
+	}
+
+	if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
+	    (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
+		need_sync_fence = true;
+
+	/*
+	 * Always generate an increment at the end of a GPFIFO submission. This
+	 * is used to keep track of method completion for idle railgating. The
+	 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
+	 */
+	job->post_fence = gk20a_alloc_fence(c);
+	if (!job->post_fence) {
+		err = -ENOMEM;
+		goto clean_up_wait_cmd;
+	}
+	if (!pre_alloc_enabled)
+		job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
+
+	if (!job->incr_cmd) {
+		err = -ENOMEM;
+		goto clean_up_post_fence;
+	}
+
+	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
+		err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
+				 job->post_fence, need_wfi, need_sync_fence,
+				 register_irq);
+	else
+		err = c->sync->incr(c->sync, job->incr_cmd,
+				    job->post_fence, need_sync_fence,
+				    register_irq);
+	if (!err) {
+		*incr_cmd = job->incr_cmd;
+		*post_fence = job->post_fence;
+	} else
+		goto clean_up_incr_cmd;
+
+	return 0;
+
+clean_up_incr_cmd:
+	free_priv_cmdbuf(c, job->incr_cmd);
+	if (!pre_alloc_enabled)
+		job->incr_cmd = NULL;
+clean_up_post_fence:
+	gk20a_fence_put(job->post_fence);
+	job->post_fence = NULL;
+clean_up_wait_cmd:
+	free_priv_cmdbuf(c, job->wait_cmd);
+	if (!pre_alloc_enabled)
+		job->wait_cmd = NULL;
+fail:
+	*wait_cmd = NULL;
+	return err;
+}
+
+static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
+		struct priv_cmd_entry *cmd)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	struct nvgpu_gpfifo_entry x = {
+		.entry0 = u64_lo32(cmd->gva),
+		.entry1 = u64_hi32(cmd->gva) |
+			pbdma_gp_entry1_length_f(cmd->size)
+	};
+
+	nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
+			&x, sizeof(x));
+
+	if (cmd->mem->aperture == APERTURE_SYSMEM)
+		trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
+				cmd->mem->cpu_va + cmd->off * sizeof(u32));
+
+	c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
+}
+
+/*
+ * Copy source gpfifo entries into the gpfifo ring buffer, potentially
+ * splitting into two memcpys to handle wrap-around.
+ */
+static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
+		struct nvgpu_gpfifo_entry *kern_gpfifo,
+		struct nvgpu_gpfifo_entry __user *user_gpfifo,
+		u32 num_entries)
+{
+	/* byte offsets */
+	u32 gpfifo_size =
+		c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
+	u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
+	u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
+	u32 end = start + len; /* exclusive */
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	struct nvgpu_gpfifo_entry *cpu_src;
+	int err;
+
+	if (user_gpfifo && !c->gpfifo.pipe) {
+		/*
+		 * This path (from userspace to sysmem) is special in order to
+		 * avoid two copies unnecessarily (from user to pipe, then from
+		 * pipe to gpu sysmem buffer).
+		 */
+		if (end > gpfifo_size) {
+			/* wrap-around */
+			int length0 = gpfifo_size - start;
+			int length1 = len - length0;
+			void __user *user2 = (u8 __user *)user_gpfifo + length0;
+
+			err = copy_from_user(gpfifo_mem->cpu_va + start,
+					user_gpfifo, length0);
+			if (err)
+				return err;
+
+			err = copy_from_user(gpfifo_mem->cpu_va,
+					user2, length1);
+			if (err)
+				return err;
+		} else {
+			err = copy_from_user(gpfifo_mem->cpu_va + start,
+					user_gpfifo, len);
+			if (err)
+				return err;
+		}
+
+		trace_write_pushbuffer_range(c, NULL, user_gpfifo,
+				0, num_entries);
+		goto out;
+	} else if (user_gpfifo) {
+		/* from userspace to vidmem, use the common copy path below */
+		err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
+		if (err)
+			return err;
+
+		cpu_src = c->gpfifo.pipe;
+	} else {
+		/* from kernel to either sysmem or vidmem, don't need
+		 * copy_from_user so use the common path below */
+		cpu_src = kern_gpfifo;
+	}
+
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		int length0 = gpfifo_size - start;
+		int length1 = len - length0;
+		void *src2 = (u8 *)cpu_src + length0;
+
+		nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
+		nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
+	} else {
+		nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
+
+	}
+
+	trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
+
+out:
+	c->gpfifo.put = (c->gpfifo.put + num_entries) &
+		(c->gpfifo.entry_num - 1);
+
+	return 0;
+}
+
+int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_submit_gpfifo_args *args,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct gk20a_fence **fence_out,
+				struct fifo_profile_gk20a *profile)
+{
+	struct gk20a *g = c->g;
+	struct priv_cmd_entry *wait_cmd = NULL;
+	struct priv_cmd_entry *incr_cmd = NULL;
+	struct gk20a_fence *post_fence = NULL;
+	struct channel_gk20a_job *job = NULL;
+	/* we might need two extra gpfifo entries - one for pre fence
+	 * and one for post fence. */
+	const int extra_entries = 2;
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
+	int err = 0;
+	bool need_job_tracking;
+	bool need_deferred_cleanup = false;
+	struct nvgpu_gpfifo_entry __user *user_gpfifo = args ?
+		(struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL;
+
+	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
+		return -ENODEV;
+
+	if (c->has_timedout)
+		return -ETIMEDOUT;
+
+	if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
+		return -ENOMEM;
+
+	/* fifo not large enough for request. Return error immediately.
+	 * Kernel can insert gpfifo entries before and after user gpfifos.
+	 * So, add extra_entries in user request. Also, HW with fifo size N
+	 * can accept only N-1 entreis and so the below condition */
+	if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
+		nvgpu_err(g, "not enough gpfifo space allocated");
+		return -ENOMEM;
+	}
+
+	if (!gpfifo && !args)
+		return -EINVAL;
+
+	if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
+		      NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
+	    !fence)
+		return -EINVAL;
+
+	/* an address space needs to have been bound at this point. */
+	if (!gk20a_channel_as_bound(c)) {
+		nvgpu_err(g,
+			    "not bound to an address space at time of gpfifo"
+			    " submission.");
+		return -EINVAL;
+	}
+
+	gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
+
+	/* update debug settings */
+	nvgpu_ltc_sync_enabled(g);
+
+	nvgpu_log_info(g, "channel %d", c->chid);
+
+	/*
+	 * Job tracking is necessary for any of the following conditions:
+	 *  - pre- or post-fence functionality
+	 *  - channel wdt
+	 *  - GPU rail-gating with non-deterministic channels
+	 *  - buffer refcounting
+	 *
+	 * If none of the conditions are met, then job tracking is not
+	 * required and a fast submit can be done (ie. only need to write
+	 * out userspace GPFIFO entries and update GP_PUT).
+	 */
+	need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
+			(flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
+			c->timeout.enabled ||
+			(g->can_railgate && !c->deterministic) ||
+			!skip_buffer_refcounting;
+
+	if (need_job_tracking) {
+		bool need_sync_framework = false;
+
+		/*
+		 * If the channel is to have deterministic latency and
+		 * job tracking is required, the channel must have
+		 * pre-allocated resources. Otherwise, we fail the submit here
+		 */
+		if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
+			return -EINVAL;
+
+		need_sync_framework =
+			gk20a_channel_sync_needs_sync_framework(g) ||
+			(flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
+			 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
+
+		/*
+		 * Deferred clean-up is necessary for any of the following
+		 * conditions:
+		 * - channel's deterministic flag is not set
+		 * - dependency on sync framework, which could make the
+		 *   behavior of the clean-up operation non-deterministic
+		 *   (should not be performed in the submit path)
+		 * - channel wdt
+		 * - GPU rail-gating with non-deterministic channels
+		 * - buffer refcounting
+		 *
+		 * If none of the conditions are met, then deferred clean-up
+		 * is not required, and we clean-up one job-tracking
+		 * resource in the submit path.
+		 */
+		need_deferred_cleanup = !c->deterministic ||
+					need_sync_framework ||
+					c->timeout.enabled ||
+					(g->can_railgate &&
+					 !c->deterministic) ||
+					!skip_buffer_refcounting;
+
+		/*
+		 * For deterministic channels, we don't allow deferred clean_up
+		 * processing to occur. In cases we hit this, we fail the submit
+		 */
+		if (c->deterministic && need_deferred_cleanup)
+			return -EINVAL;
+
+		if (!c->deterministic) {
+			/*
+			 * Get a power ref unless this is a deterministic
+			 * channel that holds them during the channel lifetime.
+			 * This one is released by gk20a_channel_clean_up_jobs,
+			 * via syncpt or sema interrupt, whichever is used.
+			 */
+			err = gk20a_busy(g);
+			if (err) {
+				nvgpu_err(g,
+					"failed to host gk20a to submit gpfifo, process %s",
+					current->comm);
+				return err;
+			}
+		}
+
+		if (!need_deferred_cleanup) {
+			/* clean up a single job */
+			gk20a_channel_clean_up_jobs(c, false);
+		}
+	}
+
+
+	/* Grab access to HW to deal with do_idle */
+	if (c->deterministic)
+		nvgpu_rwsem_down_read(&g->deterministic_busy);
+
+	if (c->deterministic && c->deterministic_railgate_allowed) {
+		/*
+		 * Nope - this channel has dropped its own power ref. As
+		 * deterministic submits don't hold power on per each submitted
+		 * job like normal ones do, the GPU might railgate any time now
+		 * and thus submit is disallowed.
+		 */
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	trace_gk20a_channel_submit_gpfifo(g->name,
+					  c->chid,
+					  num_entries,
+					  flags,
+					  fence ? fence->id : 0,
+					  fence ? fence->value : 0);
+
+	nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	/*
+	 * Make sure we have enough space for gpfifo entries. Check cached
+	 * values first and then read from HW. If no space, return EAGAIN
+	 * and let userpace decide to re-try request or not.
+	 */
+	if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
+		if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
+			err = -EAGAIN;
+			goto clean_up;
+		}
+	}
+
+	if (c->has_timedout) {
+		err = -ETIMEDOUT;
+		goto clean_up;
+	}
+
+	if (need_job_tracking) {
+		err = channel_gk20a_alloc_job(c, &job);
+		if (err)
+			goto clean_up;
+
+		err = gk20a_submit_prepare_syncs(c, fence, job,
+						 &wait_cmd, &incr_cmd,
+						 &post_fence,
+						 need_deferred_cleanup,
+						 flags);
+		if (err)
+			goto clean_up_job;
+	}
+
+	gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
+
+	if (wait_cmd)
+		gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
+
+	if (gpfifo || user_gpfifo)
+		err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
+				num_entries);
+	if (err)
+		goto clean_up_job;
+
+	/*
+	 * And here's where we add the incr_cmd we generated earlier. It should
+	 * always run!
+	 */
+	if (incr_cmd)
+		gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
+
+	if (fence_out)
+		*fence_out = gk20a_fence_get(post_fence);
+
+	if (need_job_tracking)
+		/* TODO! Check for errors... */
+		gk20a_channel_add_job(c, job, skip_buffer_refcounting);
+	gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
+
+	g->ops.fifo.userd_gp_put(g, c);
+
+	/* No hw access beyond this point */
+	if (c->deterministic)
+		nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+	trace_gk20a_channel_submitted_gpfifo(g->name,
+				c->chid,
+				num_entries,
+				flags,
+				post_fence ? post_fence->syncpt_id : 0,
+				post_fence ? post_fence->syncpt_value : 0);
+
+	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	gk20a_fifo_profile_snapshot(profile, PROFILE_END);
+
+	nvgpu_log_fn(g, "done");
+	return err;
+
+clean_up_job:
+	channel_gk20a_free_job(c, job);
+clean_up:
+	nvgpu_log_fn(g, "fail");
+	gk20a_fence_put(post_fence);
+	if (c->deterministic)
+		nvgpu_rwsem_up_read(&g->deterministic_busy);
+	else if (need_deferred_cleanup)
+		gk20a_idle(g);
+
+	return err;
+}
+
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h
new file mode 100644
index 00000000..4a58b10c
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_CHANNEL_H__
+#define __NVGPU_CHANNEL_H__
+
+#include <linux/workqueue.h>
+#include <linux/dma-buf.h>
+
+#include <nvgpu/types.h>
+
+struct channel_gk20a;
+struct nvgpu_gpfifo;
+struct nvgpu_submit_gpfifo_args;
+struct nvgpu_channel_fence;
+struct gk20a_fence;
+struct fifo_profile_gk20a;
+struct nvgpu_os_linux;
+
+struct sync_fence;
+struct sync_timeline;
+
+struct nvgpu_channel_completion_cb {
+	/*
+	 * Signal channel owner via a callback, if set, in job cleanup with
+	 * schedule_work. Means that something finished on the channel (perhaps
+	 * more than one job).
+	 */
+	void (*fn)(struct channel_gk20a *, void *);
+	void *user_data;
+	/* Make access to the two above atomic */
+	struct nvgpu_spinlock lock;
+	/* Per-channel async work task, cannot reschedule itself */
+	struct work_struct work;
+};
+
+struct nvgpu_error_notifier {
+	struct dma_buf *dmabuf;
+	void *vaddr;
+
+	struct nvgpu_notification *notification;
+
+	struct nvgpu_mutex mutex;
+};
+
+/*
+ * This struct contains fence_related data.
+ * e.g. sync_timeline for sync_fences.
+ */
+struct nvgpu_os_fence_framework {
+	struct sync_timeline *timeline;
+};
+
+struct nvgpu_channel_linux {
+	struct channel_gk20a *ch;
+
+	struct nvgpu_os_fence_framework fence_framework;
+
+	struct nvgpu_channel_completion_cb completion_cb;
+	struct nvgpu_error_notifier error_notifier;
+
+	struct dma_buf *cyclestate_buffer_handler;
+};
+
+u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
+int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
+void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
+
+struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
+		void (*update_fn)(struct channel_gk20a *, void *),
+		void *update_fn_data,
+		int runlist_id,
+		bool is_privileged_channel);
+
+int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_submit_gpfifo_args *args,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct gk20a_fence **fence_out,
+				struct fifo_profile_gk20a *profile);
+
+#endif /* __NVGPU_CHANNEL_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/clk.c b/drivers/gpu/nvgpu/os/linux/clk.c
new file mode 100644
index 00000000..414b17c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/clk.c
@@ -0,0 +1,165 @@
+/*
+ * Linux clock support
+ *
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+
+#include <soc/tegra/tegra-dvfs.h>
+#include <soc/tegra/tegra-bpmp-dvfs.h>
+
+#include "clk.h"
+#include "os_linux.h"
+#include "platform_gk20a.h"
+
+#include "gk20a/gk20a.h"
+
+static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
+	unsigned long ret;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		if (g->clk.tegra_clk)
+			ret = g->clk.cached_rate ?
+				g->clk.cached_rate :
+				clk_get_rate(g->clk.tegra_clk);
+		else
+			ret = platform->cached_rate ?
+				platform->cached_rate :
+				clk_get_rate(platform->clk[0]);
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		ret = clk_get_rate(platform->clk[1]);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static int nvgpu_linux_clk_set_rate(struct gk20a *g,
+				     u32 api_domain, unsigned long rate)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
+	int ret;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		if (g->clk.tegra_clk) {
+			ret = clk_set_rate(g->clk.tegra_clk, rate);
+			if (!ret)
+				g->clk.cached_rate = rate;
+		} else {
+			ret = clk_set_rate(platform->clk[0], rate);
+			if (!ret)
+				platform->cached_rate = rate;
+		}
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		ret = clk_set_rate(platform->clk[1], rate);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
+
+	/*
+	 * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
+	 * to frequency governor is a shared user on the gbus. The latter can be
+	 * accessed as GPU clock parent, and incorporate DVFS related data.
+	 */
+	if (g->clk.tegra_clk)
+		return tegra_dvfs_get_fmax_at_vmin_safe_t(
+			clk_get_parent(g->clk.tegra_clk));
+
+	if (platform->maxmin_clk_id)
+		return tegra_bpmp_dvfs_get_fmax_at_vmin(
+			platform->maxmin_clk_id);
+
+	return 0;
+}
+
+static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
+{
+	struct clk *c;
+
+	c = clk_get_sys("gpu_ref", "gpu_ref");
+	if (IS_ERR(c)) {
+		nvgpu_err(g, "failed to get GPCPLL reference clock");
+		return 0;
+	}
+
+	return clk_get_rate(c);
+}
+
+static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
+	unsigned long rate)
+{
+	return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
+				clk_get_parent(clk->tegra_clk), rate);
+}
+
+static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
+{
+	int ret;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk));
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
+{
+	return clk_prepare_enable(clk->tegra_clk);
+}
+
+static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
+{
+	clk_disable_unprepare(clk->tegra_clk);
+}
+
+void nvgpu_linux_init_clk_support(struct gk20a *g)
+{
+	g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
+	g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
+	g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
+	g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
+	g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
+	g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
+	g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
+	g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/clk.h b/drivers/gpu/nvgpu/os/linux/clk.h
new file mode 100644
index 00000000..614a7fd7
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/clk.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVGPU_COMMON_LINUX_CLK_H
+
+struct gk20a;
+void nvgpu_linux_init_clk_support(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c
new file mode 100644
index 00000000..353f6363
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/comptags.c
@@ -0,0 +1,140 @@
+/*
+* Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+
+#include <nvgpu/comptags.h>
+
+#include <nvgpu/linux/vm.h>
+
+#include "gk20a/gk20a.h"
+#include "dmabuf.h"
+
+void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
+			struct gk20a_comptags *comptags)
+{
+	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
+							     buf->dev);
+
+	if (!comptags)
+		return;
+
+	if (!priv) {
+		memset(comptags, 0, sizeof(*comptags));
+		return;
+	}
+
+	nvgpu_mutex_acquire(&priv->lock);
+	*comptags = priv->comptags;
+	nvgpu_mutex_release(&priv->lock);
+}
+
+int gk20a_alloc_or_get_comptags(struct gk20a *g,
+				struct nvgpu_os_buffer *buf,
+				struct gk20a_comptag_allocator *allocator,
+				struct gk20a_comptags *comptags)
+{
+	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
+							     buf->dev);
+	u32 offset;
+	int err;
+	unsigned int ctag_granularity;
+	u32 lines;
+
+	if (!priv)
+		return -ENOSYS;
+
+	nvgpu_mutex_acquire(&priv->lock);
+
+	if (priv->comptags.allocated) {
+		/*
+		 * already allocated
+		 */
+		*comptags = priv->comptags;
+
+		err = 0;
+		goto exit_locked;
+	}
+
+	ctag_granularity = g->ops.fb.compression_page_size(g);
+	lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
+
+	/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
+	if (lines < 1) {
+		err = -EINVAL;
+		goto exit_locked;
+	}
+
+	/* store the allocator so we can use it when we free the ctags */
+	priv->comptag_allocator = allocator;
+	err = gk20a_comptaglines_alloc(allocator, &offset, lines);
+	if (!err) {
+		priv->comptags.offset = offset;
+		priv->comptags.lines = lines;
+		priv->comptags.needs_clear = true;
+	} else {
+		priv->comptags.offset = 0;
+		priv->comptags.lines = 0;
+		priv->comptags.needs_clear = false;
+	}
+
+	/*
+	 * We don't report an error here if comptag alloc failed. The
+	 * caller will simply fallback to incompressible kinds. It
+	 * would not be safe to re-allocate comptags anyways on
+	 * successive calls, as that would break map aliasing.
+	 */
+	err = 0;
+	priv->comptags.allocated = true;
+
+	*comptags = priv->comptags;
+
+exit_locked:
+	nvgpu_mutex_release(&priv->lock);
+
+	return err;
+}
+
+bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
+{
+	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
+							     buf->dev);
+	bool clear_started = false;
+
+	if (priv) {
+		nvgpu_mutex_acquire(&priv->lock);
+
+		clear_started = priv->comptags.needs_clear;
+
+		if (!clear_started)
+			nvgpu_mutex_release(&priv->lock);
+	}
+
+	return clear_started;
+}
+
+void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
+				 bool clear_successful)
+{
+	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
+							     buf->dev);
+	if (priv) {
+		if (clear_successful)
+			priv->comptags.needs_clear = false;
+
+		nvgpu_mutex_release(&priv->lock);
+	}
+}
diff --git a/drivers/gpu/nvgpu/os/linux/cond.c b/drivers/gpu/nvgpu/os/linux/cond.c
new file mode 100644
index 00000000..633c34fd
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/cond.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include <nvgpu/cond.h>
+
+int nvgpu_cond_init(struct nvgpu_cond *cond)
+{
+	init_waitqueue_head(&cond->wq);
+	cond->initialized = true;
+
+	return 0;
+}
+
+void nvgpu_cond_destroy(struct nvgpu_cond *cond)
+{
+	cond->initialized = false;
+}
+
+int nvgpu_cond_signal(struct nvgpu_cond *cond)
+{
+	if (!cond->initialized)
+		return -EINVAL;
+
+	wake_up(&cond->wq);
+
+	return 0;
+}
+
+int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
+{
+	if (!cond->initialized)
+		return -EINVAL;
+
+	wake_up_interruptible(&cond->wq);
+
+	return 0;
+}
+
+int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
+{
+	if (!cond->initialized)
+		return -EINVAL;
+
+	wake_up_all(&cond->wq);
+
+	return 0;
+}
+
+int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
+{
+	if (!cond->initialized)
+		return -EINVAL;
+
+	wake_up_interruptible_all(&cond->wq);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
new file mode 100644
index 00000000..a335988a
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/wait.h>
+#include <linux/ktime.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <trace/events/gk20a.h>
+#include <uapi/linux/nvgpu.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/barrier.h>
+
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+
+#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+
+#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE	(128*PAGE_SIZE)
+
+/* Userland-facing FIFO (one global + eventually one per VM) */
+struct gk20a_ctxsw_dev {
+	struct gk20a *g;
+
+	struct nvgpu_ctxsw_ring_header *hdr;
+	struct nvgpu_ctxsw_trace_entry *ents;
+	struct nvgpu_ctxsw_trace_filter filter;
+	bool write_enabled;
+	struct nvgpu_cond readout_wq;
+	size_t size;
+	u32 num_ents;
+
+	nvgpu_atomic_t vma_ref;
+
+	struct nvgpu_mutex write_lock;
+};
+
+
+struct gk20a_ctxsw_trace {
+	struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
+};
+
+static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
+{
+	return (hdr->write_idx == hdr->read_idx);
+}
+
+static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
+{
+	return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
+}
+
+static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
+{
+	return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
+}
+
+ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
+	loff_t *off)
+{
+	struct gk20a_ctxsw_dev *dev = filp->private_data;
+	struct gk20a *g = dev->g;
+	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+	struct nvgpu_ctxsw_trace_entry __user *entry =
+		(struct nvgpu_ctxsw_trace_entry *) buf;
+	size_t copied = 0;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
+		"filp=%p buf=%p size=%zu", filp, buf, size);
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+	while (ring_is_empty(hdr)) {
+		nvgpu_mutex_release(&dev->write_lock);
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
+			!ring_is_empty(hdr), 0);
+		if (err)
+			return err;
+		nvgpu_mutex_acquire(&dev->write_lock);
+	}
+
+	while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
+		if (ring_is_empty(hdr))
+			break;
+
+		if (copy_to_user(entry, &dev->ents[hdr->read_idx],
+			sizeof(*entry))) {
+			nvgpu_mutex_release(&dev->write_lock);
+			return -EFAULT;
+		}
+
+		hdr->read_idx++;
+		if (hdr->read_idx >= hdr->num_ents)
+			hdr->read_idx = 0;
+
+		entry++;
+		copied += sizeof(*entry);
+		size -= sizeof(*entry);
+	}
+
+	nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
+		hdr->read_idx);
+
+	*off = hdr->read_idx;
+	nvgpu_mutex_release(&dev->write_lock);
+
+	return copied;
+}
+
+static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
+{
+	struct gk20a *g = dev->g;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
+	nvgpu_mutex_acquire(&dev->write_lock);
+	dev->write_enabled = true;
+	nvgpu_mutex_release(&dev->write_lock);
+	dev->g->ops.fecs_trace.enable(dev->g);
+	return 0;
+}
+
+static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
+{
+	struct gk20a *g = dev->g;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
+	dev->g->ops.fecs_trace.disable(dev->g);
+	nvgpu_mutex_acquire(&dev->write_lock);
+	dev->write_enabled = false;
+	nvgpu_mutex_release(&dev->write_lock);
+	return 0;
+}
+
+static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
+					size_t size)
+{
+	struct gk20a *g = dev->g;
+	void *buf;
+	int err;
+
+	if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
+		return -EBUSY;
+
+	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
+	if (err)
+		return err;
+
+
+	dev->hdr = buf;
+	dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
+	dev->size = size;
+	dev->num_ents = dev->hdr->num_ents;
+
+	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
+		dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
+	return 0;
+}
+
+int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
+		void **buf, size_t *size)
+{
+	struct nvgpu_ctxsw_ring_header *hdr;
+
+	*size = roundup(*size, PAGE_SIZE);
+	hdr = vmalloc_user(*size);
+	if (!hdr)
+		return -ENOMEM;
+
+	hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
+	hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
+	hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
+		/ sizeof(struct nvgpu_ctxsw_trace_entry);
+	hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
+	hdr->drop_count = 0;
+	hdr->read_idx = 0;
+	hdr->write_idx = 0;
+	hdr->write_seqno = 0;
+
+	*buf = hdr;
+	return 0;
+}
+
+int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
+{
+	struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
+
+	nvgpu_vfree(g, dev->hdr);
+	return 0;
+}
+
+static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
+	struct nvgpu_ctxsw_ring_setup_args *args)
+{
+	struct gk20a *g = dev->g;
+	size_t size = args->size;
+	int ret;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
+
+	if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
+		return -EINVAL;
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+	ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+	nvgpu_mutex_release(&dev->write_lock);
+
+	return ret;
+}
+
+static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
+	struct nvgpu_ctxsw_trace_filter_args *args)
+{
+	struct gk20a *g = dev->g;
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+	dev->filter = args->filter;
+	nvgpu_mutex_release(&dev->write_lock);
+
+	if (g->ops.fecs_trace.set_filter)
+		g->ops.fecs_trace.set_filter(g, &dev->filter);
+	return 0;
+}
+
+static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
+	struct nvgpu_ctxsw_trace_filter_args *args)
+{
+	nvgpu_mutex_acquire(&dev->write_lock);
+	args->filter = dev->filter;
+	nvgpu_mutex_release(&dev->write_lock);
+
+	return 0;
+}
+
+static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
+{
+	struct gk20a *g = dev->g;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (g->ops.fecs_trace.flush)
+		err = g->ops.fecs_trace.flush(g);
+
+	if (likely(!err))
+		err = g->ops.fecs_trace.poll(g);
+
+	gk20a_idle(g);
+	return err;
+}
+
+int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l;
+	struct gk20a *g;
+	struct gk20a_ctxsw_trace *trace;
+	struct gk20a_ctxsw_dev *dev;
+	int err;
+	size_t size;
+	u32 n;
+
+	/* only one VM for now */
+	const int vmid = 0;
+
+	l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
+	g = gk20a_get(&l->g);
+	if (!g)
+		return -ENODEV;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto free_ref;
+	}
+
+	err = gk20a_busy(g);
+	if (err)
+		goto free_ref;
+
+	trace = g->ctxsw_trace;
+	if (!trace) {
+		err = -ENODEV;
+		goto idle;
+	}
+
+	/* Allow only one user for this device */
+	dev = &trace->devs[vmid];
+	nvgpu_mutex_acquire(&dev->write_lock);
+	if (dev->hdr) {
+		err = -EBUSY;
+		goto done;
+	}
+
+	/* By default, allocate ring buffer big enough to accommodate
+	 * FECS records with default event filter */
+
+	/* enable all traces by default */
+	NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
+
+	/* compute max number of entries generated with this filter */
+	n = g->ops.fecs_trace.max_entries(g, &dev->filter);
+
+	size = sizeof(struct nvgpu_ctxsw_ring_header) +
+			n * sizeof(struct nvgpu_ctxsw_trace_entry);
+	nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
+		size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
+
+	err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
+	if (!err) {
+		filp->private_data = dev;
+		nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
+			filp, dev, size);
+	}
+
+done:
+	nvgpu_mutex_release(&dev->write_lock);
+
+idle:
+	gk20a_idle(g);
+free_ref:
+	if (err)
+		gk20a_put(g);
+	return err;
+}
+
+int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_ctxsw_dev *dev = filp->private_data;
+	struct gk20a *g = dev->g;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
+
+	g->ops.fecs_trace.disable(g);
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+	dev->write_enabled = false;
+	nvgpu_mutex_release(&dev->write_lock);
+
+	if (dev->hdr) {
+		dev->g->ops.fecs_trace.free_user_buffer(dev->g);
+		dev->hdr = NULL;
+	}
+	gk20a_put(g);
+	return 0;
+}
+
+long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
+	unsigned long arg)
+{
+	struct gk20a_ctxsw_dev *dev = filp->private_data;
+	struct gk20a *g = dev->g;
+	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
+		err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
+		break;
+	case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
+		err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
+		break;
+	case NVGPU_CTXSW_IOCTL_RING_SETUP:
+		err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
+			(struct nvgpu_ctxsw_ring_setup_args *) buf);
+		break;
+	case NVGPU_CTXSW_IOCTL_SET_FILTER:
+		err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
+			(struct nvgpu_ctxsw_trace_filter_args *) buf);
+		break;
+	case NVGPU_CTXSW_IOCTL_GET_FILTER:
+		err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
+			(struct nvgpu_ctxsw_trace_filter_args *) buf);
+		break;
+	case NVGPU_CTXSW_IOCTL_POLL:
+		err = gk20a_ctxsw_dev_ioctl_poll(dev);
+		break;
+	default:
+		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
+			cmd);
+		err = -ENOTTY;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
+
+	return err;
+}
+
+unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
+{
+	struct gk20a_ctxsw_dev *dev = filp->private_data;
+	struct gk20a *g = dev->g;
+	struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
+	unsigned int mask = 0;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+	poll_wait(filp, &dev->readout_wq.wq, wait);
+	if (!ring_is_empty(hdr))
+		mask |= POLLIN | POLLRDNORM;
+	nvgpu_mutex_release(&dev->write_lock);
+
+	return mask;
+}
+
+static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
+{
+	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+	struct gk20a *g = dev->g;
+
+	nvgpu_atomic_inc(&dev->vma_ref);
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+		nvgpu_atomic_read(&dev->vma_ref));
+}
+
+static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
+{
+	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
+	struct gk20a *g = dev->g;
+
+	nvgpu_atomic_dec(&dev->vma_ref);
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
+		nvgpu_atomic_read(&dev->vma_ref));
+}
+
+static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
+	.open = gk20a_ctxsw_dev_vma_open,
+	.close = gk20a_ctxsw_dev_vma_close,
+};
+
+int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
+				struct vm_area_struct *vma)
+{
+	return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
+}
+
+int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gk20a_ctxsw_dev *dev = filp->private_data;
+	struct gk20a *g = dev->g;
+	int ret;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
+		vma->vm_start, vma->vm_end);
+
+	ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
+	if (likely(!ret)) {
+		vma->vm_private_data = dev;
+		vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
+		vma->vm_ops->open(vma);
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+static int gk20a_ctxsw_init_devs(struct gk20a *g)
+{
+	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+	struct gk20a_ctxsw_dev *dev = trace->devs;
+	int err;
+	int i;
+
+	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+		dev->g = g;
+		dev->hdr = NULL;
+		dev->write_enabled = false;
+		nvgpu_cond_init(&dev->readout_wq);
+		err = nvgpu_mutex_init(&dev->write_lock);
+		if (err)
+			return err;
+		nvgpu_atomic_set(&dev->vma_ref, 0);
+		dev++;
+	}
+	return 0;
+}
+#endif
+
+int gk20a_ctxsw_trace_init(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
+
+	/* if tracing is not supported, skip this */
+	if (!g->ops.fecs_trace.init)
+		return 0;
+
+	if (likely(trace))
+		return 0;
+
+	trace = nvgpu_kzalloc(g, sizeof(*trace));
+	if (unlikely(!trace))
+		return -ENOMEM;
+	g->ctxsw_trace = trace;
+
+	err = gk20a_ctxsw_init_devs(g);
+	if (err)
+		goto fail;
+
+	err = g->ops.fecs_trace.init(g);
+	if (unlikely(err))
+		goto fail;
+
+	return 0;
+
+fail:
+	memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
+	nvgpu_kfree(g, trace);
+	g->ctxsw_trace = NULL;
+	return err;
+#else
+	return 0;
+#endif
+}
+
+void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	struct gk20a_ctxsw_trace *trace;
+	struct gk20a_ctxsw_dev *dev;
+	int i;
+
+	if (!g->ctxsw_trace)
+		return;
+
+	trace = g->ctxsw_trace;
+	dev = trace->devs;
+
+	for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
+		nvgpu_mutex_destroy(&dev->write_lock);
+		dev++;
+	}
+
+	nvgpu_kfree(g, g->ctxsw_trace);
+	g->ctxsw_trace = NULL;
+
+	g->ops.fecs_trace.deinit(g);
+#endif
+}
+
+int gk20a_ctxsw_trace_write(struct gk20a *g,
+		struct nvgpu_ctxsw_trace_entry *entry)
+{
+	struct nvgpu_ctxsw_ring_header *hdr;
+	struct gk20a_ctxsw_dev *dev;
+	int ret = 0;
+	const char *reason;
+	u32 write_idx;
+
+	if (!g->ctxsw_trace)
+		return 0;
+
+	if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
+		return -ENODEV;
+
+	dev = &g->ctxsw_trace->devs[entry->vmid];
+	hdr = dev->hdr;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"dev=%p hdr=%p", dev, hdr);
+
+	nvgpu_mutex_acquire(&dev->write_lock);
+
+	if (unlikely(!hdr)) {
+		/* device has been released */
+		ret = -ENODEV;
+		goto done;
+	}
+
+	write_idx = hdr->write_idx;
+	if (write_idx >= dev->num_ents) {
+		nvgpu_err(dev->g,
+			"write_idx=%u out of range [0..%u]",
+			write_idx, dev->num_ents);
+		ret = -ENOSPC;
+		reason = "write_idx out of range";
+		goto disable;
+	}
+
+	entry->seqno = hdr->write_seqno++;
+
+	if (!dev->write_enabled) {
+		ret = -EBUSY;
+		reason = "write disabled";
+		goto drop;
+	}
+
+	if (unlikely(ring_is_full(hdr))) {
+		ret = -ENOSPC;
+		reason = "user fifo full";
+		goto drop;
+	}
+
+	if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
+		reason = "filtered out";
+		goto filter;
+	}
+
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
+		entry->seqno, entry->context_id, entry->pid,
+		entry->tag, entry->timestamp);
+
+	dev->ents[write_idx] = *entry;
+
+	/* ensure record is written before updating write index */
+	nvgpu_smp_wmb();
+
+	write_idx++;
+	if (unlikely(write_idx >= hdr->num_ents))
+		write_idx = 0;
+	hdr->write_idx = write_idx;
+	nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
+		hdr->read_idx, hdr->write_idx, ring_len(hdr));
+
+	nvgpu_mutex_release(&dev->write_lock);
+	return ret;
+
+disable:
+	g->ops.fecs_trace.disable(g);
+
+drop:
+	hdr->drop_count++;
+
+filter:
+	nvgpu_log(g, gpu_dbg_ctxsw,
+			"dropping seqno=%d context_id=%08x pid=%lld "
+			"tag=%x time=%llx (%s)",
+			entry->seqno, entry->context_id, entry->pid,
+			entry->tag, entry->timestamp, reason);
+
+done:
+	nvgpu_mutex_release(&dev->write_lock);
+	return ret;
+}
+
+void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
+{
+	struct gk20a_ctxsw_dev *dev;
+
+	if (!g->ctxsw_trace)
+		return;
+
+	dev = &g->ctxsw_trace->devs[vmid];
+	nvgpu_cond_signal_interruptible(&dev->readout_wq);
+}
+
+void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	struct nvgpu_ctxsw_trace_entry entry = {
+		.vmid = 0,
+		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+		.context_id = 0,
+		.pid = ch->tgid,
+	};
+
+	if (!g->ctxsw_trace)
+		return;
+
+	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
+	gk20a_ctxsw_trace_write(g, &entry);
+	gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+	trace_gk20a_channel_reset(ch->chid, ch->tsgid);
+}
+
+void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	struct nvgpu_ctxsw_trace_entry entry = {
+		.vmid = 0,
+		.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
+		.context_id = 0,
+		.pid = tsg->tgid,
+	};
+
+	if (!g->ctxsw_trace)
+		return;
+
+	g->ops.ptimer.read_ptimer(g, &entry.timestamp);
+	gk20a_ctxsw_trace_write(g, &entry);
+	gk20a_ctxsw_trace_wake_up(g, 0);
+#endif
+	trace_gk20a_channel_reset(~0, tsg->tsgid);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h
new file mode 100644
index 00000000..88ca7f25
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __CTXSW_TRACE_H__
+#define __CTXSW_TRACE_H__
+
+#include <nvgpu/types.h>
+
+#define GK20A_CTXSW_TRACE_NUM_DEVS			1
+
+struct file;
+struct inode;
+struct poll_table_struct;
+
+struct gk20a;
+
+int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
+int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
+long gk20a_ctxsw_dev_ioctl(struct file *filp,
+			 unsigned int cmd, unsigned long arg);
+ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
+			     size_t size, loff_t *offs);
+unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
+				  struct poll_table_struct *pts);
+
+#endif /* __CTXSW_TRACE_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c
new file mode 100644
index 00000000..8738f3e7
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2017-2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "debug_ce.h"
+#include "debug_fifo.h"
+#include "debug_gr.h"
+#include "debug_allocator.h"
+#include "debug_kmem.h"
+#include "debug_pmu.h"
+#include "debug_sched.h"
+#include "debug_hal.h"
+#include "debug_xve.h"
+#include "os_linux.h"
+#include "platform_gk20a.h"
+
+#include "gk20a/gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include <nvgpu/debug.h>
+
+unsigned int gk20a_debug_trace_cmdbuf;
+
+static inline void gk20a_debug_write_printk(void *ctx, const char *str,
+					    size_t len)
+{
+	pr_info("%s", str);
+}
+
+static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
+						size_t len)
+{
+	seq_write((struct seq_file *)ctx, str, len);
+}
+
+void gk20a_debug_output(struct gk20a_debug_output *o,
+					const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+	o->fn(o->ctx, o->buf, len);
+}
+
+static int gk20a_gr_dump_regs(struct gk20a *g,
+		struct gk20a_debug_output *o)
+{
+	if (g->ops.gr.dump_gr_regs)
+		gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
+
+	return 0;
+}
+
+int gk20a_gr_debug_dump(struct gk20a *g)
+{
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	gk20a_gr_dump_regs(g, &o);
+
+	return 0;
+}
+
+static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a *g = gk20a_get_platform(dev)->g;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	int err;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EINVAL;
+	}
+
+	gk20a_gr_dump_regs(g, &o);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+void gk20a_debug_dump(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	if (platform->dump_platform_dependencies)
+		platform->dump_platform_dependencies(dev_from_gk20a(g));
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+}
+
+static int gk20a_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	struct gk20a *g;
+	int err;
+
+	g = gk20a_get_platform(dev)->g;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EFAULT;
+	}
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+
+	gk20a_idle(g);
+	return 0;
+}
+
+static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_gr_debug_show, inode->i_private);
+}
+
+static int gk20a_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_debug_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_gr_debug_fops = {
+	.open		= gk20a_gr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations gk20a_debug_fops = {
+	.open		= gk20a_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
+{
+	g->ops.fifo.dump_pbdma_status(g, o);
+	g->ops.fifo.dump_eng_status(g, o);
+
+	gk20a_debug_dump_all_channel_status_ramfc(g, o);
+}
+
+static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char buf[3];
+	struct gk20a *g = file->private_data;
+
+	if (g->mm.disable_bigpage)
+		buf[0] = 'Y';
+	else
+		buf[0] = 'N';
+	buf[1] = '\n';
+	buf[2] = 0x00;
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char buf[32];
+	int buf_size;
+	bool bv;
+	struct gk20a *g = file->private_data;
+
+	buf_size = min(count, (sizeof(buf)-1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	if (strtobool(buf, &bv) == 0) {
+		g->mm.disable_bigpage = bv;
+		gk20a_init_gpu_characteristics(g);
+	}
+
+	return count;
+}
+
+static struct file_operations disable_bigpage_fops = {
+	.open =		simple_open,
+	.read =		disable_bigpage_read,
+	.write =	disable_bigpage_write,
+};
+
+static int railgate_residency_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+	unsigned long time_since_last_state_transition_ms;
+	unsigned long total_rail_gate_time_ms;
+	unsigned long total_rail_ungate_time_ms;
+
+	if (platform->is_railgated(dev_from_gk20a(g))) {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_gate_complete);
+		total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
+		total_rail_gate_time_ms =
+					g->pstats.total_rail_gate_time_ms +
+					time_since_last_state_transition_ms;
+	} else {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_ungate_complete);
+		total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
+		total_rail_ungate_time_ms =
+					g->pstats.total_rail_ungate_time_ms +
+					time_since_last_state_transition_ms;
+	}
+
+	seq_printf(s, "Time with Rails Gated: %lu ms\n"
+			"Time with Rails UnGated: %lu ms\n"
+			"Total railgating cycles: %lu\n",
+			total_rail_gate_time_ms,
+			total_rail_ungate_time_ms,
+			g->pstats.railgating_cycle_count - 1);
+	return 0;
+
+}
+
+static int railgate_residency_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, railgate_residency_show, inode->i_private);
+}
+
+static const struct file_operations railgate_residency_fops = {
+	.open		= railgate_residency_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int gk20a_railgating_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *d;
+
+	d = debugfs_create_file(
+		"railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
+						&railgate_residency_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+static ssize_t timeouts_enabled_read(struct file *file,
+			char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char buf[3];
+	struct gk20a *g = file->private_data;
+
+	if (nvgpu_is_timeouts_enabled(g))
+		buf[0] = 'Y';
+	else
+		buf[0] = 'N';
+	buf[1] = '\n';
+	buf[2] = 0x00;
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t timeouts_enabled_write(struct file *file,
+			const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char buf[3];
+	int buf_size;
+	bool timeouts_enabled;
+	struct gk20a *g = file->private_data;
+
+	buf_size = min(count, (sizeof(buf)-1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	if (strtobool(buf, &timeouts_enabled) == 0) {
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+		if (timeouts_enabled == false) {
+			/* requesting to disable timeouts */
+			if (g->timeouts_disabled_by_user == false) {
+				nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
+				g->timeouts_disabled_by_user = true;
+			}
+		} else {
+			/* requesting to enable timeouts */
+			if (g->timeouts_disabled_by_user == true) {
+				nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
+				g->timeouts_disabled_by_user = false;
+			}
+		}
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+	}
+
+	return count;
+}
+
+static const struct file_operations timeouts_enabled_fops = {
+	.open =		simple_open,
+	.read =		timeouts_enabled_read,
+	.write =	timeouts_enabled_write,
+};
+
+void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct device *dev = dev_from_gk20a(g);
+
+	l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
+	if (!l->debugfs)
+		return;
+
+	if (debugfs_symlink)
+		l->debugfs_alias =
+			debugfs_create_symlink(debugfs_symlink,
+					NULL, dev_name(dev));
+
+	debugfs_create_file("status", S_IRUGO, l->debugfs,
+		dev, &gk20a_debug_fops);
+	debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
+		dev, &gk20a_gr_debug_fops);
+	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
+		l->debugfs, &gk20a_debug_trace_cmdbuf);
+
+	debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
+		l->debugfs, &g->ch_wdt_timeout_ms);
+
+	debugfs_create_u32("disable_syncpoints", S_IRUGO,
+		l->debugfs, &g->disable_syncpoints);
+
+	/* New debug logging API. */
+	debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
+		l->debugfs, &g->log_mask);
+	debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
+		l->debugfs, &g->log_trace);
+
+	l->debugfs_ltc_enabled =
+			debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
+				 l->debugfs,
+				 &g->mm.ltc_enabled_target);
+
+	l->debugfs_gr_idle_timeout_default =
+			debugfs_create_u32("gr_idle_timeout_default_us",
+					S_IRUGO|S_IWUSR, l->debugfs,
+					 &g->gr_idle_timeout_default);
+	l->debugfs_timeouts_enabled =
+			debugfs_create_file("timeouts_enabled",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					g,
+					&timeouts_enabled_fops);
+
+	l->debugfs_disable_bigpage =
+			debugfs_create_file("disable_bigpage",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					g,
+					&disable_bigpage_fops);
+
+	l->debugfs_timeslice_low_priority_us =
+			debugfs_create_u32("timeslice_low_priority_us",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					&g->timeslice_low_priority_us);
+	l->debugfs_timeslice_medium_priority_us =
+			debugfs_create_u32("timeslice_medium_priority_us",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					&g->timeslice_medium_priority_us);
+	l->debugfs_timeslice_high_priority_us =
+			debugfs_create_u32("timeslice_high_priority_us",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					&g->timeslice_high_priority_us);
+	l->debugfs_runlist_interleave =
+			debugfs_create_bool("runlist_interleave",
+					S_IRUGO|S_IWUSR,
+					l->debugfs,
+					&g->runlist_interleave);
+	l->debugfs_force_preemption_gfxp =
+		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+		l->debugfs,
+		&g->gr.ctx_vars.force_preemption_gfxp);
+
+	l->debugfs_force_preemption_cilp =
+		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+		l->debugfs,
+		&g->gr.ctx_vars.force_preemption_cilp);
+
+	l->debugfs_dump_ctxsw_stats =
+		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+			S_IRUGO|S_IWUSR, l->debugfs,
+			&g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
+
+	gr_gk20a_debugfs_init(g);
+	gk20a_pmu_debugfs_init(g);
+	gk20a_railgating_debugfs_init(g);
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	gk20a_cde_debugfs_init(g);
+#endif
+	gk20a_ce_debugfs_init(g);
+	nvgpu_alloc_debugfs_init(g);
+	nvgpu_hal_debugfs_init(g);
+	gk20a_fifo_debugfs_init(g);
+	gk20a_sched_debugfs_init(g);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	nvgpu_kmem_debugfs_init(g);
+#endif
+	if (g->pci_vendor_id)
+		nvgpu_xve_debugfs_init(g);
+}
+
+void gk20a_debug_deinit(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (!l->debugfs)
+		return;
+
+	gk20a_fifo_debugfs_deinit(g);
+
+	debugfs_remove_recursive(l->debugfs);
+	debugfs_remove(l->debugfs_alias);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.c b/drivers/gpu/nvgpu/os/linux/debug_allocator.c
new file mode 100644
index 00000000..d63a9030
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_allocator.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/allocator.h>
+
+static int __alloc_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_allocator *a = s->private;
+
+	nvgpu_alloc_print_stats(a, s, 1);
+
+	return 0;
+}
+
+static int __alloc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __alloc_show, inode->i_private);
+}
+
+static const struct file_operations __alloc_fops = {
+	.open = __alloc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (!l->debugfs_allocators)
+		return;
+
+	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
+					       l->debugfs_allocators,
+					       a, &__alloc_fops);
+}
+
+void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
+{
+}
+
+void nvgpu_alloc_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
+	if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
+		l->debugfs_allocators = NULL;
+		return;
+	}
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.h b/drivers/gpu/nvgpu/os/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
+#define __NVGPU_DEBUG_ALLOCATOR_H__
+
+struct gk20a;
+void nvgpu_alloc_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.c b/drivers/gpu/nvgpu/os/linux/debug_cde.c
new file mode 100644
index 00000000..f0afa6ee
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_cde.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "platform_gk20a.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+
+
+static ssize_t gk20a_cde_reload_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct nvgpu_os_linux *l = file->private_data;
+	gk20a_cde_reload(l);
+	return count;
+}
+
+static const struct file_operations gk20a_cde_reload_fops = {
+	.open		= simple_open,
+	.write		= gk20a_cde_reload_write,
+};
+
+void gk20a_cde_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	if (!platform->has_cde)
+		return;
+
+	debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
+			   l->debugfs, &l->cde_app.shader_parameter);
+	debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
+			   l->debugfs, &l->cde_app.ctx_count);
+	debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
+			   l->debugfs, &l->cde_app.ctx_usecount);
+	debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
+			   l->debugfs, &l->cde_app.ctx_count_top);
+	debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
+			    l, &gk20a_cde_reload_fops);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.h b/drivers/gpu/nvgpu/os/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_cde.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CDE_H__
+#define __NVGPU_DEBUG_CDE_H__
+
+struct gk20a;
+void gk20a_cde_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.c b/drivers/gpu/nvgpu/os/linux/debug_ce.c
new file mode 100644
index 00000000..cea0bb47
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_ce.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_ce.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+
+void gk20a_ce_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
+			   l->debugfs, &g->ce_app.ctx_count);
+	debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
+			   l->debugfs, &g->ce_app.app_state);
+	debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
+			   l->debugfs, &g->ce_app.next_ctx_id);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.h b/drivers/gpu/nvgpu/os/linux/debug_ce.h
new file mode 100644
index 00000000..2a8750c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_ce.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CE_H__
+#define __NVGPU_DEBUG_CE_H__
+
+struct gk20a;
+void gk20a_ce_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_clk.c b/drivers/gpu/nvgpu/os/linux/debug_clk.c
new file mode 100644
index 00000000..2484d44b
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_clk.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "gm20b/clk_gm20b.h"
+#include "os_linux.h"
+#include "platform_gk20a.h"
+
+static int rate_get(void *data, u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	struct clk_gk20a *clk = &g->clk;
+
+	*val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
+	return 0;
+}
+static int rate_set(void *data, u64 val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
+}
+DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
+
+static int pll_reg_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_clk_pll_debug_data d;
+	u32 reg, m, n, pl, f;
+	int err = 0;
+
+	if (g->ops.clk.get_pll_debug_data) {
+		err = g->ops.clk.get_pll_debug_data(g, &d);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	seq_printf(s, "bypassctrl = %s, ",
+			d.trim_sys_bypassctrl_val ? "bypass" : "vco");
+	seq_printf(s, "sel_vco = %s, ",
+			d.trim_sys_sel_vco_val ? "vco" : "bypass");
+
+	seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
+		d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
+		d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
+		d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
+
+	reg = d.trim_sys_gpcpll_coeff_val;
+	m = d.trim_sys_gpcpll_coeff_mdiv;
+	n = d.trim_sys_gpcpll_coeff_ndiv;
+	pl = d.trim_sys_gpcpll_coeff_pldiv;
+	f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
+	seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
+	seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
+
+	seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
+		d.trim_sys_gpcpll_dvfs0_val,
+		d.trim_sys_gpcpll_dvfs0_dfs_coeff,
+		d.trim_sys_gpcpll_dvfs0_dfs_det_max,
+		d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
+
+	return 0;
+}
+
+static int pll_reg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pll_reg_show, inode->i_private);
+}
+
+static const struct file_operations pll_reg_fops = {
+	.open		= pll_reg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int pll_reg_raw_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_clk_pll_debug_data d;
+	u32 reg;
+	int err = 0;
+
+	if (g->ops.clk.get_pll_debug_data) {
+		err = g->ops.clk.get_pll_debug_data(g, &d);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	seq_puts(s, "GPCPLL REGISTERS:\n");
+	for (reg = d.trim_sys_gpcpll_cfg_reg;
+	     reg <= d.trim_sys_gpcpll_dvfs2_reg;
+	     reg += sizeof(u32))
+		seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
+
+	seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
+
+	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
+					     d.trim_sys_sel_vco_val);
+	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
+					     d.trim_sys_gpc2clk_out_val);
+	seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
+					     d.trim_sys_bypassctrl_val);
+
+	return 0;
+}
+
+static int pll_reg_raw_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pll_reg_raw_show, inode->i_private);
+}
+
+static ssize_t pll_reg_raw_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct gk20a *g = file->f_path.dentry->d_inode->i_private;
+	char buf[80];
+	u32 reg, val;
+	int err = 0;
+
+	if (sizeof(buf) <= count)
+		return -EINVAL;
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	/* terminate buffer and trim - white spaces may be appended
+	 *  at the end when invoked from shell command line */
+	buf[count] = '\0';
+	strim(buf);
+
+	if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
+		return -EINVAL;
+
+	if (g->ops.clk.pll_reg_write(g, reg, val))
+		err = g->ops.clk.pll_reg_write(g, reg, val);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
+static const struct file_operations pll_reg_raw_fops = {
+	.open		= pll_reg_raw_open,
+	.read		= seq_read,
+	.write		= pll_reg_raw_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int monitor_get(void *data, u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err = 0;
+
+	if (g->ops.clk.get_gpcclk_clock_counter)
+		err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
+
+static int voltage_get(void *data, u64 *val)
+{
+	struct gk20a *g = (struct gk20a *)data;
+	int err = 0;
+
+	if (g->ops.clk.get_voltage)
+		err = g->ops.clk.get_voltage(&g->clk, val);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
+
+static int pll_param_show(struct seq_file *s, void *data)
+{
+	struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
+
+	seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
+		   gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
+		   gpc_pll_params->vco_ctrl);
+	return 0;
+}
+
+static int pll_param_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pll_param_show, inode->i_private);
+}
+
+static const struct file_operations pll_param_fops = {
+	.open		= pll_param_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int gm20b_clk_init_debugfs(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *d;
+
+	if (!l->debugfs)
+		return -EINVAL;
+
+	d = debugfs_create_file(
+		"rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file("pll_reg_raw",
+		S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
+			       (u32 *)&g->clk.gpc_pll.mode);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
+		       l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
+	if (!d)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	return -ENOMEM;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.c b/drivers/gpu/nvgpu/os/linux/debug_fifo.c
new file mode 100644
index 00000000..2b5674c0
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2017-2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_fifo.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/sort.h>
+#include <nvgpu/timers.h>
+
+void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
+
+static void *gk20a_fifo_sched_debugfs_seq_start(
+		struct seq_file *s, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void *gk20a_fifo_sched_debugfs_seq_next(
+		struct seq_file *s, void *v, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	++(*pos);
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void gk20a_fifo_sched_debugfs_seq_stop(
+		struct seq_file *s, void *v)
+{
+}
+
+static int gk20a_fifo_sched_debugfs_seq_show(
+		struct seq_file *s, void *v)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+	struct channel_gk20a *ch = v;
+	struct tsg_gk20a *tsg = NULL;
+
+	struct fifo_engine_info_gk20a *engine_info;
+	struct fifo_runlist_info_gk20a *runlist;
+	u32 runlist_id;
+	int ret = SEQ_SKIP;
+	u32 engine_id;
+
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	engine_info = (f->engine_info + engine_id);
+	runlist_id = engine_info->runlist_id;
+	runlist = &f->runlist_info[runlist_id];
+
+	if (ch == f->channel) {
+		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
+		seq_puts(s, "                            (usecs)   (msecs)\n");
+		ret = 0;
+	}
+
+	if (!test_bit(ch->chid, runlist->active_channels))
+		return ret;
+
+	if (gk20a_channel_get(ch)) {
+		tsg = tsg_gk20a_from_ch(ch);
+
+		if (tsg)
+			seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
+				ch->chid,
+				ch->tsgid,
+				ch->tgid,
+				tsg->timeslice_us,
+				ch->timeout_ms_max,
+				tsg->interleave_level,
+				tsg->gr_ctx.graphics_preempt_mode,
+				tsg->gr_ctx.compute_preempt_mode);
+		gk20a_channel_put(ch);
+	}
+	return 0;
+}
+
+static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
+	.start = gk20a_fifo_sched_debugfs_seq_start,
+	.next = gk20a_fifo_sched_debugfs_seq_next,
+	.stop = gk20a_fifo_sched_debugfs_seq_stop,
+	.show = gk20a_fifo_sched_debugfs_seq_show
+};
+
+static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
+	struct file *file)
+{
+	struct gk20a *g = inode->i_private;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
+	if (err)
+		return err;
+
+	nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
+
+	((struct seq_file *)file->private_data)->private = inode->i_private;
+	return 0;
+};
+
+/*
+ * The file operations structure contains our open function along with
+ * set of the canned seq_ ops.
+ */
+static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = gk20a_fifo_sched_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+
+static int gk20a_fifo_profile_enable(void *data, u64 val)
+{
+	struct gk20a *g = (struct gk20a *) data;
+	struct fifo_gk20a *f = &g->fifo;
+
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (val == 0) {
+		if (f->profile.enabled) {
+			f->profile.enabled = false;
+			nvgpu_ref_put(&f->profile.ref,
+				__gk20a_fifo_profile_free);
+		}
+	} else {
+		if (!f->profile.enabled) {
+			/* not kref init as it can have a running condition if
+			 * we enable/disable/enable while kickoff is happening
+			 */
+			if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
+				f->profile.data = nvgpu_vzalloc(g,
+					FIFO_PROFILING_ENTRIES *
+					sizeof(struct fifo_profile_gk20a));
+				f->profile.sorted  = nvgpu_vzalloc(g,
+					FIFO_PROFILING_ENTRIES *
+					sizeof(u64));
+				if (!(f->profile.data && f->profile.sorted)) {
+					nvgpu_vfree(g, f->profile.data);
+					nvgpu_vfree(g, f->profile.sorted);
+					nvgpu_mutex_release(&f->profile.lock);
+					return -ENOMEM;
+				}
+				nvgpu_ref_init(&f->profile.ref);
+			}
+			atomic_set(&f->profile.get.atomic_var, 0);
+			f->profile.enabled = true;
+		}
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(
+	gk20a_fifo_profile_enable_debugfs_fops,
+	NULL,
+	gk20a_fifo_profile_enable,
+	"%llu\n"
+);
+
+static int __profile_cmp(const void *a, const void *b)
+{
+	return *((unsigned long long *) a) - *((unsigned long long *) b);
+}
+
+/*
+ * This uses about 800b in the stack, but the function using it is not part
+ * of a callstack where much memory is being used, so it is fine
+ */
+#define PERCENTILE_WIDTH	5
+#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
+
+static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
+		u64 *percentiles, u32 index_end, u32 index_start)
+{
+	unsigned int nelem = 0;
+	unsigned int index;
+	struct fifo_profile_gk20a *profile;
+
+	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
+		profile = &g->fifo.profile.data[index];
+
+		if (profile->timestamp[index_end] >
+				profile->timestamp[index_start]) {
+			/* This is a valid element */
+			g->fifo.profile.sorted[nelem] =
+						profile->timestamp[index_end] -
+						profile->timestamp[index_start];
+			nelem++;
+		}
+	}
+
+	/* sort it */
+	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
+		__profile_cmp, NULL);
+
+	/* build ranges */
+	for (index = 0; index < PERCENTILE_RANGES; index++) {
+		percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
+			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
+						nelem)/100 - 1];
+	}
+	return nelem;
+}
+
+static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	unsigned int get, nelem, index;
+	/*
+	 * 800B in the stack, but function is declared statically and only
+	 * called from debugfs handler
+	 */
+	u64 percentiles_ioctl[PERCENTILE_RANGES];
+	u64 percentiles_kickoff[PERCENTILE_RANGES];
+	u64 percentiles_jobtracking[PERCENTILE_RANGES];
+	u64 percentiles_append[PERCENTILE_RANGES];
+	u64 percentiles_userd[PERCENTILE_RANGES];
+
+	if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
+		seq_printf(s, "Profiling disabled\n");
+		return 0;
+	}
+
+	get = atomic_read(&g->fifo.profile.get.atomic_var);
+
+	__gk20a_fifo_create_stats(g, percentiles_ioctl,
+		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_kickoff,
+		PROFILE_END, PROFILE_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
+		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_append,
+		PROFILE_APPEND, PROFILE_JOB_TRACKING);
+	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
+		PROFILE_END, PROFILE_APPEND);
+
+	seq_printf(s, "Number of kickoffs: %d\n", nelem);
+	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
+
+	for (index = 0; index < PERCENTILE_RANGES; index++)
+		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
+			PERCENTILE_WIDTH * (index+1),
+			percentiles_ioctl[index],
+			percentiles_kickoff[index],
+			percentiles_append[index],
+			percentiles_jobtracking[index],
+			percentiles_userd[index]);
+
+	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+
+	return 0;
+}
+
+static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
+}
+
+static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
+	.open		= gk20a_fifo_profile_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+void gk20a_fifo_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *gpu_root = l->debugfs;
+	struct dentry *fifo_root;
+	struct dentry *profile_root;
+
+	fifo_root = debugfs_create_dir("fifo", gpu_root);
+	if (IS_ERR_OR_NULL(fifo_root))
+		return;
+
+	nvgpu_log(g, gpu_dbg_info, "g=%p", g);
+
+	debugfs_create_file("sched", 0600, fifo_root, g,
+		&gk20a_fifo_sched_debugfs_fops);
+
+	profile_root = debugfs_create_dir("profile", fifo_root);
+	if (IS_ERR_OR_NULL(profile_root))
+		return;
+
+	nvgpu_mutex_init(&g->fifo.profile.lock);
+	g->fifo.profile.enabled = false;
+	atomic_set(&g->fifo.profile.get.atomic_var, 0);
+	atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
+
+	debugfs_create_file("enable", 0600, profile_root, g,
+		&gk20a_fifo_profile_enable_debugfs_fops);
+
+	debugfs_create_file("stats", 0600, profile_root, g,
+		&gk20a_fifo_profile_stats_debugfs_fops);
+
+}
+
+void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
+{
+	if (profile)
+		profile->timestamp[idx] = nvgpu_current_time_ns();
+}
+
+void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
+{
+	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
+						profile.ref);
+	nvgpu_vfree(f->g, f->profile.data);
+	nvgpu_vfree(f->g, f->profile.sorted);
+}
+
+/* Get the next element in the ring buffer of profile entries
+ * and grab a reference to the structure
+ */
+struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_profile_gk20a *profile;
+	unsigned int index;
+
+	/* If kref is zero, profiling is not enabled */
+	if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
+		return NULL;
+	index = atomic_inc_return(&f->profile.get.atomic_var);
+	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
+
+	return profile;
+}
+
+/* Free the reference to the structure. This allows deferred cleanups */
+void gk20a_fifo_profile_release(struct gk20a *g,
+					struct fifo_profile_gk20a *profile)
+{
+	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+}
+
+void gk20a_fifo_debugfs_deinit(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (f->profile.enabled) {
+		f->profile.enabled = false;
+		nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.h b/drivers/gpu/nvgpu/os/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_FIFO_H__
+#define __NVGPU_DEBUG_FIFO_H__
+
+struct gk20a;
+void gk20a_fifo_debugfs_init(struct gk20a *g);
+void gk20a_fifo_debugfs_deinit(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.c b/drivers/gpu/nvgpu/os/linux/debug_gr.c
new file mode 100644
index 00000000..d54c6d63
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_gr.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_gr.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+
+int gr_gk20a_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->debugfs_gr_default_attrib_cb_size =
+		debugfs_create_u32("gr_default_attrib_cb_size",
+				   S_IRUGO|S_IWUSR, l->debugfs,
+				   &g->gr.attrib_cb_default_size);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.h b/drivers/gpu/nvgpu/os/linux/debug_gr.h
new file mode 100644
index 00000000..4b46acbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_gr.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_GR_H__
+#define __NVGPU_DEBUG_GR_H__
+
+struct gk20a;
+int gr_gk20a_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.c b/drivers/gpu/nvgpu/os/linux/debug_hal.c
new file mode 100644
index 00000000..031e335e
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_hal.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_hal.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/* Format and print a single function pointer to the specified seq_file. */
+static void __hal_print_op(struct seq_file *s, void *op_ptr)
+{
+	seq_printf(s, "%pF\n", op_ptr);
+}
+
+/*
+ * Prints an array of function pointer addresses in op_ptrs to the
+ * specified seq_file
+ */
+static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
+{
+	int i;
+
+	for (i = 0; i < num_ops; i++)
+		__hal_print_op(s, op_ptrs[i]);
+}
+
+/*
+ * Show file operation, which generates content of the file once. Prints a list
+ * of gpu operations as defined by gops and the corresponding function pointer
+ * destination addresses. Relies on no compiler reordering of struct fields and
+ * assumption that all members are function pointers.
+ */
+static int __hal_show(struct seq_file *s, void *unused)
+{
+	struct gpu_ops *gops = s->private;
+
+	__hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
+
+	return 0;
+}
+
+static int __hal_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __hal_show, inode->i_private);
+}
+
+static const struct file_operations __hal_fops = {
+	.open = __hal_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_hal_debugfs_fini(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (!(l->debugfs_hal == NULL))
+		debugfs_remove_recursive(l->debugfs_hal);
+}
+
+void nvgpu_hal_debugfs_init(struct gk20a *g)
+{
+	struct dentry *d;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (!l->debugfs)
+		return;
+	l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
+	if (IS_ERR_OR_NULL(l->debugfs_hal)) {
+		l->debugfs_hal = NULL;
+		return;
+	}
+
+	/* Pass along reference to the gpu_ops struct as private data */
+	d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
+		&g->ops, &__hal_fops);
+	if (!d) {
+		nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
+		debugfs_remove_recursive(l->debugfs_hal);
+		return;
+	}
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.h b/drivers/gpu/nvgpu/os/linux/debug_hal.h
new file mode 100644
index 00000000..eee6f234
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_hal.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_HAL_H__
+#define __NVGPU_DEBUG_HAL_H__
+
+struct gk20a;
+void nvgpu_hal_debugfs_fini(struct gk20a *g);
+void nvgpu_hal_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_HAL_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.c b/drivers/gpu/nvgpu/os/linux/debug_kmem.c
new file mode 100644
index 00000000..a0c7d47d
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "os_linux.h"
+#include "debug_kmem.h"
+#include "kmem_priv.h"
+
+/**
+ * to_human_readable_bytes - Determine  suffix for passed size.
+ *
+ * @bytes - Number of bytes to generate a suffix for.
+ * @hr_bytes [out] - The human readable number of bytes.
+ * @hr_suffix [out] - The suffix for the HR number of bytes.
+ *
+ * Computes a human readable decomposition of the passed number of bytes. The
+ * suffix for the bytes is passed back through the @hr_suffix pointer. The right
+ * number of bytes is then passed back in @hr_bytes. This returns the following
+ * ranges:
+ *
+ *   0 - 1023 B
+ *   1 - 1023 KB
+ *   1 - 1023 MB
+ *   1 - 1023 GB
+ *   1 - 1023 TB
+ *   1 - ...  PB
+ */
+static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
+				      const char **hr_suffix)
+{
+	static const char *suffixes[] =
+		{ "B", "KB", "MB", "GB", "TB", "PB" };
+
+	u64 suffix_ind = 0;
+
+	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
+		bytes >>= 10;
+		suffix_ind++;
+	}
+
+	/*
+	 * Handle case where bytes > 1023PB.
+	 */
+	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
+		suffix_ind : ARRAY_SIZE(suffixes) - 1;
+
+	*hr_bytes = bytes;
+	*hr_suffix = suffixes[suffix_ind];
+}
+
+/**
+ * print_hr_bytes - Print human readable bytes
+ *
+ * @s - A seq_file to print to. May be NULL.
+ * @msg - A message to print before the bytes.
+ * @bytes - Number of bytes.
+ *
+ * Print @msg followed by the human readable decomposition of the passed number
+ * of bytes.
+ *
+ * If @s is NULL then this prints will be made to the kernel log.
+ */
+static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
+{
+	u64 hr_bytes;
+	const char *hr_suffix;
+
+	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
+	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
+}
+
+/**
+ * print_histogram - Build a histogram of the memory usage.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ */
+static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	int i;
+	u64 pot_min, pot_max;
+	u64 nr_buckets;
+	unsigned int *buckets;
+	unsigned int total_allocs;
+	struct nvgpu_rbtree_node *node;
+	static const char histogram_line[] =
+		"++++++++++++++++++++++++++++++++++++++++";
+
+	/*
+	 * pot_min is essentially a round down to the nearest power of 2. This
+	 * is the start of the histogram. pot_max is just a round up to the
+	 * nearest power of two. Each histogram bucket is one power of two so
+	 * the histogram buckets are exponential.
+	 */
+	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
+	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
+
+	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
+
+	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
+	if (!buckets) {
+		__pstat(s, "OOM: could not allocate bucket storage!?\n");
+		return;
+	}
+
+	/*
+	 * Iterate across all of the allocs and determine what bucket they
+	 * should go in. Round the size down to the nearest power of two to
+	 * find the right bucket.
+	 */
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		int b;
+		u64 bucket_min;
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
+		if (bucket_min < tracker->min_alloc)
+			bucket_min = tracker->min_alloc;
+
+		b = __ffs(bucket_min) - __ffs(pot_min);
+
+		/*
+		 * Handle the one case were there's an alloc exactly as big as
+		 * the maximum bucket size of the largest bucket. Most of the
+		 * buckets have an inclusive minimum and exclusive maximum. But
+		 * the largest bucket needs to have an _inclusive_ maximum as
+		 * well.
+		 */
+		if (b == (int)nr_buckets)
+			b--;
+
+		buckets[b]++;
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	total_allocs = 0;
+	for (i = 0; i < (int)nr_buckets; i++)
+		total_allocs += buckets[i];
+
+	__pstat(s, "Alloc histogram:\n");
+
+	/*
+	 * Actually compute the histogram lines.
+	 */
+	for (i = 0; i < (int)nr_buckets; i++) {
+		char this_line[sizeof(histogram_line) + 1];
+		u64 line_length;
+		u64 hr_bytes;
+		const char *hr_suffix;
+
+		memset(this_line, 0, sizeof(this_line));
+
+		/*
+		 * Compute the normalized line length. Cant use floating point
+		 * so we will just multiply everything by 1000 and use fixed
+		 * point.
+		 */
+		line_length = (1000 * buckets[i]) / total_allocs;
+		line_length *= sizeof(histogram_line);
+		line_length /= 1000;
+
+		memset(this_line, '+', line_length);
+
+		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
+					  &hr_bytes, &hr_suffix);
+		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
+			hr_bytes, hr_bytes << 1,
+			hr_suffix, buckets[i], this_line);
+	}
+}
+
+/**
+ * nvgpu_kmem_print_stats - Print kmem tracking stats.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ *
+ * Print stats from a tracker. If @s is non-null then seq_printf() will be
+ * used with @s. Otherwise the stats are pr_info()ed.
+ */
+void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	nvgpu_lock_tracker(tracker);
+
+	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
+
+	__pstat(s, "Basic Stats:\n");
+	__pstat(s,        "  Number of allocs        %lld\n",
+		tracker->nr_allocs);
+	__pstat(s,        "  Number of frees         %lld\n",
+		tracker->nr_frees);
+	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
+	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
+	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
+	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
+	print_hr_bytes(s, "  Bytes allocated (real)  ",
+		       tracker->bytes_alloced_real);
+	print_hr_bytes(s, "  Bytes freed (real)      ",
+		       tracker->bytes_freed_real);
+	__pstat(s, "\n");
+
+	print_histogram(tracker, s);
+
+	nvgpu_unlock_tracker(tracker);
+}
+
+static int __kmem_tracking_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_mem_alloc_tracker *tracker = s->private;
+
+	nvgpu_kmem_print_stats(tracker, s);
+
+	return 0;
+}
+
+static int __kmem_tracking_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_tracking_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_tracking_fops = {
+	.open = __kmem_tracking_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __kmem_traces_dump_tracker(struct gk20a *g,
+				      struct nvgpu_mem_alloc_tracker *tracker,
+				      struct seq_file *s)
+{
+	struct nvgpu_rbtree_node *node;
+
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		kmem_print_mem_alloc(g, alloc, s);
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return 0;
+}
+
+static int __kmem_traces_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+
+	nvgpu_lock_tracker(g->vmallocs);
+	seq_puts(s, "Oustanding vmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->vmallocs, s);
+	seq_puts(s, "\n");
+	nvgpu_unlock_tracker(g->vmallocs);
+
+	nvgpu_lock_tracker(g->kmallocs);
+	seq_puts(s, "Oustanding kmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->kmallocs, s);
+	nvgpu_unlock_tracker(g->kmallocs);
+
+	return 0;
+}
+
+static int __kmem_traces_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_traces_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_traces_fops = {
+	.open = __kmem_traces_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_kmem_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *node;
+
+	l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
+	if (IS_ERR_OR_NULL(l->debugfs_kmem))
+		return;
+
+	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
+				   l->debugfs_kmem,
+				   g->vmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
+				   l->debugfs_kmem,
+				   g->kmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file("traces", S_IRUGO,
+				   l->debugfs_kmem,
+				   g, &__kmem_traces_fops);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.h b/drivers/gpu/nvgpu/os/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_KMEM_H__
+#define __NVGPU_DEBUG_KMEM_H__
+
+struct gk20a;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_kmem_debugfs_init(struct gk20a *g);
+#endif
+
+#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.c b/drivers/gpu/nvgpu/os/linux/debug_pmu.c
new file mode 100644
index 00000000..f4ed992d
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <nvgpu/enabled.h>
+#include "debug_pmu.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+static int lpwr_debug_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	if (g->ops.pmu.pmu_pg_engines_feature_list &&
+		g->ops.pmu.pmu_pg_engines_feature_list(g,
+		PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+		NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
+		seq_printf(s, "PSTATE: %u\n"
+			"RPPG Enabled: %u\n"
+			"RPPG ref count: %u\n"
+			"RPPG state: %u\n"
+			"MSCG Enabled: %u\n"
+			"MSCG pstate state: %u\n"
+			"MSCG transition state: %u\n",
+			g->ops.clk_arb.get_current_pstate(g),
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat, g->mscg_enabled,
+			g->pmu.mscg_stat, g->pmu.mscg_transition_state);
+
+	} else
+		seq_printf(s, "ELPG Enabled: %u\n"
+			"ELPG ref count: %u\n"
+			"ELPG state: %u\n",
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat);
+
+	return 0;
+
+}
+
+static int lpwr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lpwr_debug_show, inode->i_private);
+}
+
+static const struct file_operations lpwr_debug_fops = {
+	.open		= lpwr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		nvgpu_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in MSCG: %llu us\n"
+			"Time out of MSCG: %llu us\n"
+			"MSCG residency ratio: %llu\n"
+			"MSCG Entry Count: %u\n"
+			"MSCG Avg Entry latency %u\n"
+			"MSCG Avg Exit latency %u\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int mscg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_stat_show, inode->i_private);
+}
+
+static const struct file_operations mscg_stat_fops = {
+	.open		= mscg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		nvgpu_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int mscg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations mscg_transitions_fops = {
+	.open		= mscg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		nvgpu_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in ELPG: %llu us\n"
+			"Time out of ELPG: %llu us\n"
+			"ELPG residency ratio: %llu\n"
+			"ELPG Entry Count: %u\n"
+			"ELPG Avg Entry latency %u us\n"
+			"ELPG Avg Exit latency %u us\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int elpg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_stat_show, inode->i_private);
+}
+
+static const struct file_operations elpg_stat_fops = {
+	.open		= elpg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		nvgpu_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int elpg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations elpg_transitions_fops = {
+	.open		= elpg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int falc_trace_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_pmu *pmu = &g->pmu;
+	u32 i = 0, j = 0, k, l, m;
+	char part_str[40];
+	void *tracebuffer;
+	char *trace;
+	u32 *trace1;
+
+	/* allocate system memory to copy pmu trace buffer */
+	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
+	if (tracebuffer == NULL)
+		return -ENOMEM;
+
+	/* read pmu traces into system memory buffer */
+	nvgpu_mem_rd_n(g, &pmu->trace_buf,
+		       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
+
+	trace = (char *)tracebuffer;
+	trace1 = (u32 *)tracebuffer;
+
+	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
+		for (j = 0; j < 0x40; j++)
+			if (trace1[(i / 4) + j])
+				break;
+		if (j == 0x40)
+			break;
+		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
+		l = 0;
+		m = 0;
+		while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
+			if (k >= 40)
+				break;
+			strncpy(part_str, (trace+i+20+m), k);
+			part_str[k] = 0;
+			seq_printf(s, "%s0x%x", part_str,
+					trace1[(i / 4) + 1 + l]);
+			l++;
+			m += k + 2;
+		}
+		seq_printf(s, "%s", (trace+i+20+m));
+	}
+
+	nvgpu_kfree(g, tracebuffer);
+	return 0;
+}
+
+static int falc_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, falc_trace_show, inode->i_private);
+}
+
+static const struct file_operations falc_trace_fops = {
+	.open		= falc_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_enable_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
+	return 0;
+
+}
+
+static int perfmon_events_enable_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_enable_show, inode->i_private);
+}
+
+static ssize_t perfmon_events_enable_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct gk20a *g = s->private;
+	unsigned long val = 0;
+	char buf[40];
+	int buf_size;
+	int err;
+
+	memset(buf, 0, sizeof(buf));
+	buf_size = min(count, (sizeof(buf)-1));
+
+	if (copy_from_user(buf, userbuf, buf_size))
+		return -EFAULT;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	/* Don't turn on gk20a unnecessarily */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		if (val && !g->pmu.perfmon_sampling_enabled &&
+				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
+			g->pmu.perfmon_sampling_enabled = true;
+			g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
+		} else if (!val && g->pmu.perfmon_sampling_enabled &&
+				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
+			g->pmu.perfmon_sampling_enabled = false;
+			g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
+		}
+		gk20a_idle(g);
+	} else {
+		g->pmu.perfmon_sampling_enabled = val ? true : false;
+	}
+
+	return count;
+}
+
+static const struct file_operations perfmon_events_enable_fops = {
+	.open		= perfmon_events_enable_open,
+	.read		= seq_read,
+	.write		= perfmon_events_enable_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_count_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
+	return 0;
+
+}
+
+static int perfmon_events_count_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_count_show, inode->i_private);
+}
+
+static const struct file_operations perfmon_events_count_fops = {
+	.open		= perfmon_events_count_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int security_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%d\n", g->pmu.pmu_mode);
+	return 0;
+
+}
+
+static int security_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, security_show, inode->i_private);
+}
+
+static const struct file_operations security_fops = {
+	.open		= security_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int gk20a_pmu_debugfs_init(struct gk20a *g)
+{
+	struct dentry *d;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	d = debugfs_create_file(
+		"lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
+						&lpwr_debug_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
+						&mscg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_transitions", S_IRUGO, l->debugfs, g,
+						&mscg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
+						&elpg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_transitions", S_IRUGO, l->debugfs, g,
+						&elpg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"falc_trace", S_IRUGO, l->debugfs, g,
+						&falc_trace_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_enable", S_IRUGO, l->debugfs, g,
+						&perfmon_events_enable_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_count", S_IRUGO, l->debugfs, g,
+						&perfmon_events_count_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"pmu_security", S_IRUGO, l->debugfs, g,
+						&security_fops);
+	if (!d)
+		goto err_out;
+	return 0;
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	return -ENOMEM;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.h b/drivers/gpu/nvgpu/os/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_PMU_H__
+#define __NVGPU_DEBUG_PMU_H__
+
+struct gk20a;
+int gk20a_pmu_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.c b/drivers/gpu/nvgpu/os/linux/debug_sched.c
new file mode 100644
index 00000000..5b7cbddf
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_sched.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_sched.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	bool sched_busy = true;
+
+	int n = sched->bitmap_size / sizeof(u64);
+	int i;
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+		sched_busy = false;
+		nvgpu_mutex_release(&sched->busy_lock);
+	}
+
+	seq_printf(s, "control_locked=%d\n", sched->control_locked);
+	seq_printf(s, "busy=%d\n", sched_busy);
+	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+
+	seq_puts(s, "active_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
+
+	seq_puts(s, "recent_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
+
+	nvgpu_mutex_release(&sched->status_lock);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_sched_debugfs_fops = {
+	.open		= gk20a_sched_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_sched_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
+			g, &gk20a_sched_debugfs_fops);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.h b/drivers/gpu/nvgpu/os/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_sched.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_SCHED_H__
+#define __NVGPU_DEBUG_SCHED_H__
+
+struct gk20a;
+void gk20a_sched_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.c b/drivers/gpu/nvgpu/os/linux/debug_xve.c
new file mode 100644
index 00000000..743702a2
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_xve.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/xve.h>
+
+#include "debug_xve.h"
+#include "os_linux.h"
+
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+static ssize_t xve_link_speed_write(struct file *filp,
+				    const char __user *buff,
+				    size_t len, loff_t *off)
+{
+	struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
+	char kbuff[16];
+	u32 buff_size, check_len;
+	u32 link_speed = 0;
+	int ret;
+
+	buff_size = min_t(size_t, 16, len);
+
+	memset(kbuff, 0, 16);
+	if (copy_from_user(kbuff, buff, buff_size))
+		return -EFAULT;
+
+	check_len = strlen("Gen1");
+	if (strncmp(kbuff, "Gen1", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_2P5;
+	else if (strncmp(kbuff, "Gen2", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_5P0;
+	else if (strncmp(kbuff, "Gen3", check_len) == 0)
+		link_speed = GPU_XVE_SPEED_8P0;
+	else
+		nvgpu_err(g, "%s: Unknown PCIe speed: %s",
+			  __func__, kbuff);
+
+	if (!link_speed)
+		return -EINVAL;
+
+	/* Brief pause... To help rate limit this. */
+	nvgpu_msleep(250);
+
+	/*
+	 * And actually set the speed. Yay.
+	 */
+	ret = g->ops.xve.set_speed(g, link_speed);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+static int xve_link_speed_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 speed;
+	int err;
+
+	err = g->ops.xve.get_speed(g, &speed);
+	if (err)
+		return err;
+
+	seq_printf(s, "Current PCIe speed:\n  %s\n", xve_speed_to_str(speed));
+
+	return 0;
+}
+
+static int xve_link_speed_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_link_speed_show, inode->i_private);
+}
+
+static const struct file_operations xve_link_speed_fops = {
+	.open = xve_link_speed_open,
+	.read = seq_read,
+	.write = xve_link_speed_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int xve_available_speeds_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 available_speeds;
+
+	g->ops.xve.available_speeds(g, &available_speeds);
+
+	seq_puts(s, "Available PCIe bus speeds:\n");
+	if (available_speeds & GPU_XVE_SPEED_2P5)
+		seq_puts(s, "  Gen1\n");
+	if (available_speeds & GPU_XVE_SPEED_5P0)
+		seq_puts(s, "  Gen2\n");
+	if (available_speeds & GPU_XVE_SPEED_8P0)
+		seq_puts(s, "  Gen3\n");
+
+	return 0;
+}
+
+static int xve_available_speeds_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_available_speeds_show, inode->i_private);
+}
+
+static const struct file_operations xve_available_speeds_fops = {
+	.open = xve_available_speeds_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int xve_link_control_status_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	u32 link_status;
+
+	link_status = g->ops.xve.get_link_control_status(g);
+	seq_printf(s, "0x%08x\n", link_status);
+
+	return 0;
+}
+
+static int xve_link_control_status_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xve_link_control_status_show, inode->i_private);
+}
+
+static const struct file_operations xve_link_control_status_fops = {
+	.open = xve_link_control_status_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int nvgpu_xve_debugfs_init(struct gk20a *g)
+{
+	int err = -ENODEV;
+
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *gpu_root = l->debugfs;
+
+	l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
+	if (IS_ERR_OR_NULL(l->debugfs_xve))
+		goto fail;
+
+	/*
+	 * These are just debug nodes. If they fail to get made it's not worth
+	 * worrying the higher level SW.
+	 */
+	debugfs_create_file("link_speed", S_IRUGO,
+			    l->debugfs_xve, g,
+			    &xve_link_speed_fops);
+	debugfs_create_file("available_speeds", S_IRUGO,
+			    l->debugfs_xve, g,
+			    &xve_available_speeds_fops);
+	debugfs_create_file("link_control_status", S_IRUGO,
+			    l->debugfs_xve, g,
+			    &xve_link_control_status_fops);
+
+	err = 0;
+fail:
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.h b/drivers/gpu/nvgpu/os/linux/debug_xve.h
new file mode 100644
index 00000000..f3b1ac54
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/debug_xve.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_XVE_H__
+#define __NVGPU_DEBUG_XVE_H__
+
+struct gk20a;
+int nvgpu_xve_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_SVE_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/dma.c b/drivers/gpu/nvgpu/os/linux/dma.c
new file mode 100644
index 00000000..f513dcd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/dma.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/version.h>
+
+#include <nvgpu/log.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/vidmem.h>
+
+#include <nvgpu/linux/dma.h>
+#include <nvgpu/linux/vidmem.h>
+
+#include "gk20a/gk20a.h"
+
+#include "platform_gk20a.h"
+#include "os_linux.h"
+
+#ifdef __DMA_ATTRS_LONGS
+#define NVGPU_DEFINE_DMA_ATTRS(x)                                     \
+        struct dma_attrs x = {                                  \
+                .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 },   \
+        }
+#define NVGPU_DMA_ATTR(attrs) &attrs
+#else
+#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
+#define NVGPU_DMA_ATTR(attrs) attrs
+#endif
+
+/*
+ * Enough to hold all the possible flags in string form. When a new flag is
+ * added it must be added here as well!!
+ */
+#define NVGPU_DMA_STR_SIZE					\
+	sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
+
+/*
+ * The returned string is kmalloc()ed here but must be freed by the caller.
+ */
+static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
+{
+	char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
+	int bytes_available = NVGPU_DMA_STR_SIZE;
+
+	/*
+	 * Return the empty buffer if there's no flags. Makes it easier on the
+	 * calling code to just print it instead of any if (NULL) type logic.
+	 */
+	if (!flags)
+		return buf;
+
+#define APPEND_FLAG(flag, str_flag)					\
+	do {								\
+		if (flags & flag) {					\
+			strncat(buf, str_flag, bytes_available);	\
+			bytes_available -= strlen(str_flag);		\
+		}							\
+	} while (0)
+
+	APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
+	APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS,  "FORCE_CONTIGUOUS ");
+#undef APPEND_FLAG
+
+	return buf;
+}
+
+/**
+ * __dma_dbg - Debug print for DMA allocs and frees.
+ *
+ * @g     - The GPU.
+ * @size  - The requested size of the alloc (size_t).
+ * @flags - The flags (unsigned long).
+ * @type  - A string describing the type (i.e: sysmem or vidmem).
+ * @what  - A string with 'alloc' or 'free'.
+ *
+ * @flags is the DMA flags. If there are none or it doesn't make sense to print
+ * flags just pass 0.
+ *
+ * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
+ */
+static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
+		      const char *type, const char *what)
+{
+	char *flags_str = NULL;
+
+	/*
+	 * Don't bother making the flags_str if debugging is
+	 * not enabled. This saves a malloc and a free.
+	 */
+	if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
+		return;
+
+	flags_str = nvgpu_dma_flags_to_str(g, flags);
+
+	__nvgpu_log_dbg(g, gpu_dbg_dma,
+			__func__, __LINE__,
+			"DMA %s: [%s] size=%-7zu "
+			"aligned=%-7zu total=%-10llukB %s",
+			what, type,
+			size, PAGE_ALIGN(size),
+			g->dma_memory_used >> 10,
+			flags_str);
+
+	if (flags_str)
+		nvgpu_kfree(g, flags_str);
+}
+
+#define dma_dbg_alloc(g, size, flags, type)				\
+	__dma_dbg(g, size, flags, type, "alloc")
+#define dma_dbg_free(g, size, flags, type)				\
+	__dma_dbg(g, size, flags, type, "free")
+
+/*
+ * For after the DMA alloc is done.
+ */
+#define __dma_dbg_done(g, size, type, what)				\
+	nvgpu_log(g, gpu_dbg_dma,					\
+		  "DMA %s: [%s] size=%-7zu Done!",			\
+		  what, type, size);					\
+
+#define dma_dbg_alloc_done(g, size, type)				\
+	__dma_dbg_done(g, size, type, "alloc")
+#define dma_dbg_free_done(g, size, type)				\
+	__dma_dbg_done(g, size, type, "free")
+
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
+				size_t size)
+{
+	u64 addr = 0;
+
+	if (at)
+		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+	else
+		addr = nvgpu_alloc(allocator, size);
+
+	return addr;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+
+int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_flags(g, 0, size, mem);
+}
+
+int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem)
+{
+	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 *
+		 * Incoming flags are ignored here, since bits other than the
+		 * no-kernel-mapping flag are ignored by the vidmem mapping
+		 * functions anyway.
+		 */
+		int err = nvgpu_dma_alloc_flags_vid(g,
+				NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
+}
+
+int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
+}
+
+int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+	int err;
+	dma_addr_t iova;
+	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+	void *alloc_ret;
+
+	if (nvgpu_mem_is_valid(mem)) {
+		nvgpu_warn(g, "memory leak !!");
+		WARN_ON(1);
+	}
+
+	/*
+	 * WAR for IO coherent chips: the DMA API does not seem to generate
+	 * mappings that work correctly. Unclear why - Bug ID: 2040115.
+	 *
+	 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
+	 * and then make a vmap() ourselves.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
+
+	/*
+	 * Before the debug print so we see this in the total. But during
+	 * cleanup in the fail path this has to be subtracted.
+	 */
+	g->dma_memory_used += PAGE_ALIGN(size);
+
+	dma_dbg_alloc(g, size, flags, "sysmem");
+
+	/*
+	 * Save the old size but for actual allocation purposes the size is
+	 * going to be page aligned.
+	 */
+	mem->size = size;
+	size = PAGE_ALIGN(size);
+
+	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
+
+	alloc_ret = dma_alloc_attrs(d, size, &iova,
+				    GFP_KERNEL|__GFP_ZERO,
+				    NVGPU_DMA_ATTR(dma_attrs));
+	if (!alloc_ret)
+		return -ENOMEM;
+
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+		mem->priv.pages = alloc_ret;
+		err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
+						   mem->priv.pages,
+						   iova, size);
+	} else {
+		mem->cpu_va = alloc_ret;
+		err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
+					iova, size, flags);
+	}
+	if (err)
+		goto fail_free_dma;
+
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
+		mem->cpu_va = vmap(mem->priv.pages,
+				   size >> PAGE_SHIFT,
+				   0, PAGE_KERNEL);
+		if (!mem->cpu_va) {
+			err = -ENOMEM;
+			goto fail_free_sgt;
+		}
+	}
+
+	mem->aligned_size = size;
+	mem->aperture = APERTURE_SYSMEM;
+	mem->priv.flags = flags;
+
+	dma_dbg_alloc_done(g, mem->size, "sysmem");
+
+	return 0;
+
+fail_free_sgt:
+	nvgpu_free_sgtable(g, &mem->priv.sgt);
+fail_free_dma:
+	dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
+	mem->cpu_va = NULL;
+	mem->priv.sgt = NULL;
+	mem->size = 0;
+	g->dma_memory_used -= mem->aligned_size;
+	return err;
+}
+
+int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_flags_vid(g,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
+}
+
+int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, u64 at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	u64 addr;
+	int err;
+	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+		&g->mm.vidmem.allocator :
+		&g->mm.vidmem.bootstrap_allocator;
+	int before_pending;
+
+	if (nvgpu_mem_is_valid(mem)) {
+		nvgpu_warn(g, "memory leak !!");
+		WARN_ON(1);
+	}
+
+	dma_dbg_alloc(g, size, flags, "vidmem");
+
+	mem->size = size;
+	size = PAGE_ALIGN(size);
+
+	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+		return -ENOSYS;
+
+	/*
+	 * Our own allocator doesn't have any flags yet, and we can't
+	 * kernel-map these, so require explicit flags.
+	 */
+	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
+	addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+	if (!addr) {
+		/*
+		 * If memory is known to be freed soon, let the user know that
+		 * it may be available after a while.
+		 */
+		if (before_pending)
+			return -EAGAIN;
+		else
+			return -ENOMEM;
+	}
+
+	if (at)
+		mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
+
+	mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!mem->priv.sgt) {
+		err = -ENOMEM;
+		goto fail_physfree;
+	}
+
+	err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
+	if (err)
+		goto fail_kfree;
+
+	nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
+	sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
+
+	mem->aligned_size = size;
+	mem->aperture = APERTURE_VIDMEM;
+	mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
+	mem->allocator = vidmem_alloc;
+	mem->priv.flags = flags;
+
+	nvgpu_init_list_node(&mem->clear_list_entry);
+
+	dma_dbg_alloc_done(g, mem->size, "vidmem");
+
+	return 0;
+
+fail_kfree:
+	nvgpu_kfree(g, mem->priv.sgt);
+fail_physfree:
+	nvgpu_free(&g->mm.vidmem.allocator, addr);
+	mem->size = 0;
+	return err;
+#else
+	return -ENOSYS;
+#endif
+}
+
+int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
+}
+
+int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using nvgpu_dma_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = nvgpu_dma_alloc_map_flags_vid(vm,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
+}
+
+int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
+}
+
+int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	nvgpu_dma_free(vm->mm->g, mem);
+	return err;
+}
+
+int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_map_flags_vid(vm,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	nvgpu_dma_free(vm->mm->g, mem);
+	return err;
+}
+
+static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+
+	g->dma_memory_used -= mem->aligned_size;
+
+	dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
+
+	if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
+	    !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
+	    (mem->cpu_va || mem->priv.pages)) {
+		/*
+		 * Free side of WAR for bug 2040115.
+		 */
+		if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+			vunmap(mem->cpu_va);
+
+		if (mem->priv.flags) {
+			NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+
+			nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
+
+			if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+				dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
+					sg_dma_address(mem->priv.sgt->sgl),
+					NVGPU_DMA_ATTR(dma_attrs));
+			} else {
+				dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
+					sg_dma_address(mem->priv.sgt->sgl),
+					NVGPU_DMA_ATTR(dma_attrs));
+			}
+		} else {
+			dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
+					sg_dma_address(mem->priv.sgt->sgl));
+		}
+		mem->cpu_va = NULL;
+		mem->priv.pages = NULL;
+	}
+
+	/*
+	 * When this flag is set we expect that pages is still populated but not
+	 * by the DMA API.
+	 */
+	if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
+		nvgpu_kfree(g, mem->priv.pages);
+
+	if (mem->priv.sgt)
+		nvgpu_free_sgtable(g, &mem->priv.sgt);
+
+	dma_dbg_free_done(g, mem->size, "sysmem");
+
+	mem->size = 0;
+	mem->aligned_size = 0;
+	mem->aperture = APERTURE_INVALID;
+}
+
+static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	size_t mem_size = mem->size;
+
+	dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
+
+	/* Sanity check - only this supported when allocating. */
+	WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
+		int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
+
+		/*
+		 * If there's an error here then that means we can't clear the
+		 * vidmem. That's too bad; however, we still own the nvgpu_mem
+		 * buf so we have to free that.
+		 *
+		 * We don't need to worry about the vidmem allocator itself
+		 * since when that gets cleaned up in the driver shutdown path
+		 * all the outstanding allocs are force freed.
+		 */
+		if (err)
+			nvgpu_kfree(g, mem);
+	} else {
+		nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
+		nvgpu_free(mem->allocator,
+			   (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
+		nvgpu_free_sgtable(g, &mem->priv.sgt);
+
+		mem->size = 0;
+		mem->aligned_size = 0;
+		mem->aperture = APERTURE_INVALID;
+	}
+
+	dma_dbg_free_done(g, mem_size, "vidmem");
+#endif
+}
+
+void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	switch (mem->aperture) {
+	case APERTURE_SYSMEM:
+		return nvgpu_dma_free_sys(g, mem);
+	case APERTURE_VIDMEM:
+		return nvgpu_dma_free_vid(g, mem);
+	default:
+		break; /* like free() on "null" memory */
+	}
+}
+
+void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
+{
+	if (mem->gpu_va)
+		nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
+	mem->gpu_va = 0;
+
+	nvgpu_dma_free(vm->mm->g, mem);
+}
+
+int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
+		      void *cpuva, u64 iova, size_t size, unsigned long flags)
+{
+	int err = 0;
+	struct sg_table *tbl;
+	NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
+
+	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!tbl) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
+	err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
+					size, NVGPU_DMA_ATTR(dma_attrs));
+	if (err)
+		goto fail;
+
+	sg_dma_address(tbl->sgl) = iova;
+	*sgt = tbl;
+
+	return 0;
+
+fail:
+	if (tbl)
+		nvgpu_kfree(g, tbl);
+
+	return err;
+}
+
+int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
+		      void *cpuva, u64 iova, size_t size)
+{
+	return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
+}
+
+int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
+				 struct page **pages, u64 iova, size_t size)
+{
+	int err = 0;
+	struct sg_table *tbl;
+
+	tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!tbl) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	err = sg_alloc_table_from_pages(tbl, pages,
+					DIV_ROUND_UP(size, PAGE_SIZE),
+					0, size, GFP_KERNEL);
+	if (err)
+		goto fail;
+
+	sg_dma_address(tbl->sgl) = iova;
+	*sgt = tbl;
+
+	return 0;
+
+fail:
+	if (tbl)
+		nvgpu_kfree(g, tbl);
+
+	return err;
+}
+
+void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
+{
+	sg_free_table(*sgt);
+	nvgpu_kfree(g, *sgt);
+	*sgt = NULL;
+}
+
+bool nvgpu_iommuable(struct gk20a *g)
+{
+#ifdef CONFIG_TEGRA_GK20A
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	/*
+	 * Check against the nvgpu device to see if it's been marked as
+	 * IOMMU'able.
+	 */
+	if (!device_is_iommuable(l->dev))
+		return false;
+#endif
+
+	return true;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c
new file mode 100644
index 00000000..129739f0
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c
@@ -0,0 +1,218 @@
+/*
+* Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+
+#include <nvgpu/comptags.h>
+#include <nvgpu/enabled.h>
+
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/linux/vidmem.h>
+
+#include "gk20a/gk20a.h"
+
+#include "platform_gk20a.h"
+#include "dmabuf.h"
+#include "os_linux.h"
+
+static void gk20a_mm_delete_priv(void *_priv)
+{
+	struct gk20a_buffer_state *s, *s_tmp;
+	struct gk20a_dmabuf_priv *priv = _priv;
+	struct gk20a *g;
+
+	if (!priv)
+		return;
+
+	g = priv->g;
+
+	if (priv->comptags.allocated && priv->comptags.lines) {
+		BUG_ON(!priv->comptag_allocator);
+		gk20a_comptaglines_free(priv->comptag_allocator,
+				priv->comptags.offset,
+				priv->comptags.lines);
+	}
+
+	/* Free buffer states */
+	nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
+				gk20a_buffer_state, list) {
+		gk20a_fence_put(s->fence);
+		nvgpu_list_del(&s->list);
+		nvgpu_kfree(g, s);
+	}
+
+	nvgpu_kfree(g, priv);
+}
+
+enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
+					  struct dma_buf *dmabuf)
+{
+	struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
+	bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
+
+	if (buf_owner == NULL) {
+		/* Not nvgpu-allocated, assume system memory */
+		return APERTURE_SYSMEM;
+	} else if (WARN_ON(buf_owner == g && unified_memory)) {
+		/* Looks like our video memory, but this gpu doesn't support
+		 * it. Warn about a bug and bail out */
+		nvgpu_warn(g,
+			"dmabuf is our vidmem but we don't have local vidmem");
+		return APERTURE_INVALID;
+	} else if (buf_owner != g) {
+		/* Someone else's vidmem */
+		return APERTURE_INVALID;
+	} else {
+		/* Yay, buf_owner == g */
+		return APERTURE_VIDMEM;
+	}
+}
+
+struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
+			      struct dma_buf_attachment **attachment)
+{
+	struct gk20a_dmabuf_priv *priv;
+
+	priv = dma_buf_get_drvdata(dmabuf, dev);
+	if (WARN_ON(!priv))
+		return ERR_PTR(-EINVAL);
+
+	nvgpu_mutex_acquire(&priv->lock);
+
+	if (priv->pin_count == 0) {
+		priv->attach = dma_buf_attach(dmabuf, dev);
+		if (IS_ERR(priv->attach)) {
+			nvgpu_mutex_release(&priv->lock);
+			return (struct sg_table *)priv->attach;
+		}
+
+		priv->sgt = dma_buf_map_attachment(priv->attach,
+						   DMA_BIDIRECTIONAL);
+		if (IS_ERR(priv->sgt)) {
+			dma_buf_detach(dmabuf, priv->attach);
+			nvgpu_mutex_release(&priv->lock);
+			return priv->sgt;
+		}
+	}
+
+	priv->pin_count++;
+	nvgpu_mutex_release(&priv->lock);
+	*attachment = priv->attach;
+	return priv->sgt;
+}
+
+void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
+		    struct dma_buf_attachment *attachment,
+		    struct sg_table *sgt)
+{
+	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
+	dma_addr_t dma_addr;
+
+	if (IS_ERR(priv) || !priv)
+		return;
+
+	nvgpu_mutex_acquire(&priv->lock);
+	WARN_ON(priv->sgt != sgt);
+	WARN_ON(priv->attach != attachment);
+	priv->pin_count--;
+	WARN_ON(priv->pin_count < 0);
+	dma_addr = sg_dma_address(priv->sgt->sgl);
+	if (priv->pin_count == 0) {
+		dma_buf_unmap_attachment(priv->attach, priv->sgt,
+					 DMA_BIDIRECTIONAL);
+		dma_buf_detach(dmabuf, priv->attach);
+	}
+	nvgpu_mutex_release(&priv->lock);
+}
+
+int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
+{
+	struct gk20a *g = gk20a_get_platform(dev)->g;
+	struct gk20a_dmabuf_priv *priv;
+
+	priv = dma_buf_get_drvdata(dmabuf, dev);
+	if (likely(priv))
+		return 0;
+
+	nvgpu_mutex_acquire(&g->mm.priv_lock);
+	priv = dma_buf_get_drvdata(dmabuf, dev);
+	if (priv)
+		goto priv_exist_or_err;
+
+	priv = nvgpu_kzalloc(g, sizeof(*priv));
+	if (!priv) {
+		priv = ERR_PTR(-ENOMEM);
+		goto priv_exist_or_err;
+	}
+
+	nvgpu_mutex_init(&priv->lock);
+	nvgpu_init_list_node(&priv->states);
+	priv->g = g;
+	dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
+
+priv_exist_or_err:
+	nvgpu_mutex_release(&g->mm.priv_lock);
+	if (IS_ERR(priv))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
+			   u64 offset, struct gk20a_buffer_state **state)
+{
+	int err = 0;
+	struct gk20a_dmabuf_priv *priv;
+	struct gk20a_buffer_state *s;
+	struct device *dev = dev_from_gk20a(g);
+
+	if (WARN_ON(offset >= (u64)dmabuf->size))
+		return -EINVAL;
+
+	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
+	if (err)
+		return err;
+
+	priv = dma_buf_get_drvdata(dmabuf, dev);
+	if (WARN_ON(!priv))
+		return -ENOSYS;
+
+	nvgpu_mutex_acquire(&priv->lock);
+
+	nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
+		if (s->offset == offset)
+			goto out;
+
+	/* State not found, create state. */
+	s = nvgpu_kzalloc(g, sizeof(*s));
+	if (!s) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	s->offset = offset;
+	nvgpu_init_list_node(&s->list);
+	nvgpu_mutex_init(&s->lock);
+	nvgpu_list_add_tail(&s->list, &priv->states);
+
+out:
+	nvgpu_mutex_release(&priv->lock);
+	if (!err)
+		*state = s;
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.h b/drivers/gpu/nvgpu/os/linux/dmabuf.h
new file mode 100644
index 00000000..8399eaaf
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf.h
@@ -0,0 +1,62 @@
+/*
+* Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __COMMON_LINUX_DMABUF_H__
+#define __COMMON_LINUX_DMABUF_H__
+
+#include <nvgpu/comptags.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/gmmu.h>
+
+struct sg_table;
+struct dma_buf;
+struct dma_buf_attachment;
+struct device;
+
+struct gk20a;
+struct gk20a_buffer_state;
+
+struct gk20a_dmabuf_priv {
+	struct nvgpu_mutex lock;
+
+	struct gk20a *g;
+
+	struct gk20a_comptag_allocator *comptag_allocator;
+	struct gk20a_comptags comptags;
+
+	struct dma_buf_attachment *attach;
+	struct sg_table *sgt;
+
+	int pin_count;
+
+	struct nvgpu_list_node states;
+
+	u64 buffer_id;
+};
+
+struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
+			      struct dma_buf_attachment **attachment);
+void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
+		    struct dma_buf_attachment *attachment,
+		    struct sg_table *sgt);
+
+int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
+
+int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
+			   u64 offset, struct gk20a_buffer_state **state);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
new file mode 100644
index 00000000..8f33c5d2
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/reboot.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/defaults.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/sizes.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "module.h"
+#include "os_linux.h"
+#include "sysfs.h"
+#include "ioctl.h"
+#include "gk20a/regops_gk20a.h"
+
+#define EMC3D_DEFAULT_RATIO 750
+
+void nvgpu_kernel_restart(void *cmd)
+{
+	kernel_restart(cmd);
+}
+
+static void nvgpu_init_vars(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
+	nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
+
+	init_rwsem(&l->busy_lock);
+	nvgpu_rwsem_init(&g->deterministic_busy);
+
+	nvgpu_spinlock_init(&g->mc_enable_lock);
+
+	nvgpu_mutex_init(&platform->railgate_lock);
+	nvgpu_mutex_init(&g->dbg_sessions_lock);
+	nvgpu_mutex_init(&g->client_lock);
+	nvgpu_mutex_init(&g->poweron_lock);
+	nvgpu_mutex_init(&g->poweroff_lock);
+	nvgpu_mutex_init(&g->ctxsw_disable_lock);
+
+	l->regs_saved = l->regs;
+	l->bar1_saved = l->bar1;
+
+	g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
+
+	/* Set DMA parameters to allow larger sgt lists */
+	dev->dma_parms = &l->dma_parms;
+	dma_set_max_seg_size(dev, UINT_MAX);
+
+	/*
+	 * A default of 16GB is the largest supported DMA size that is
+	 * acceptable to all currently supported Tegra SoCs.
+	 */
+	if (!platform->dma_mask)
+		platform->dma_mask = DMA_BIT_MASK(34);
+
+	dma_set_mask(dev, platform->dma_mask);
+	dma_set_coherent_mask(dev, platform->dma_mask);
+
+	nvgpu_init_list_node(&g->profiler_objects);
+
+	nvgpu_init_list_node(&g->boardobj_head);
+	nvgpu_init_list_node(&g->boardobjgrp_head);
+}
+
+static void nvgpu_init_gr_vars(struct gk20a *g)
+{
+	gk20a_init_gr(g);
+
+	nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
+	g->gr.max_comptag_mem = totalram_pages
+				 >> (10 - (PAGE_SHIFT - 10));
+}
+
+static void nvgpu_init_timeout(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	g->timeouts_disabled_by_user = false;
+	nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
+
+	if (nvgpu_platform_is_silicon(g)) {
+		g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
+	} else if (nvgpu_platform_is_fpga(g)) {
+		g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
+	} else {
+		g->gr_idle_timeout_default = (u32)ULONG_MAX;
+	}
+	g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
+	g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
+}
+
+static void nvgpu_init_timeslice(struct gk20a *g)
+{
+	g->runlist_interleave = true;
+
+	g->timeslice_low_priority_us = 1300;
+	g->timeslice_medium_priority_us = 2600;
+	g->timeslice_high_priority_us = 5200;
+
+	g->min_timeslice_us = 1000;
+	g->max_timeslice_us = 50000;
+}
+
+static void nvgpu_init_pm_vars(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	/*
+	 * Set up initial power settings. For non-slicon platforms, disable
+	 * power features and for silicon platforms, read from platform data
+	 */
+	g->slcg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
+	g->blcg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
+	g->elcg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
+	g->elpg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
+	g->aelpg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
+	g->mscg_enabled =
+		nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
+	g->can_elpg =
+		nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
+
+	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
+		nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
+	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
+		nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
+	__nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
+		nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
+
+	g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
+	g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
+	g->has_syncpoints = platform->has_syncpoints;
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	g->has_cde = platform->has_cde;
+#endif
+	g->ptimer_src_freq = platform->ptimer_src_freq;
+	g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
+	g->can_railgate = platform->can_railgate_init;
+	g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
+	/* if default delay is not set, set default delay to 500msec */
+	if (platform->railgate_delay_init)
+		g->railgate_delay = platform->railgate_delay_init;
+	else
+		g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
+	__nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
+
+	/* set default values to aelpg parameters */
+	g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
+	g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
+	g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
+	g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
+	g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
+
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
+}
+
+static void nvgpu_init_vbios_vars(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	__nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
+	g->vbios_min_version = platform->vbios_min_version;
+}
+
+static void  nvgpu_init_ltc_vars(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	g->ltc_streamid = platform->ltc_streamid;
+}
+
+static void nvgpu_init_mm_vars(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
+
+	g->mm.disable_bigpage = platform->disable_bigpage;
+	__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
+			    platform->honors_aperture);
+	__nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
+			    platform->unified_memory);
+	__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
+			    platform->unify_address_spaces);
+
+	nvgpu_mutex_init(&g->mm.tlb_lock);
+	nvgpu_mutex_init(&g->mm.priv_lock);
+}
+
+int nvgpu_probe(struct gk20a *g,
+		const char *debugfs_symlink,
+		const char *interface_name,
+		struct class *class)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int err = 0;
+
+	nvgpu_init_vars(g);
+	nvgpu_init_gr_vars(g);
+	nvgpu_init_timeout(g);
+	nvgpu_init_timeslice(g);
+	nvgpu_init_pm_vars(g);
+	nvgpu_init_vbios_vars(g);
+	nvgpu_init_ltc_vars(g);
+	err = nvgpu_init_soc_vars(g);
+	if (err) {
+		nvgpu_err(g, "init soc vars failed");
+		return err;
+	}
+
+	/* Initialize the platform interface. */
+	err = platform->probe(dev);
+	if (err) {
+		if (err == -EPROBE_DEFER)
+			nvgpu_info(g, "platform probe failed");
+		else
+			nvgpu_err(g, "platform probe failed");
+		return err;
+	}
+
+	nvgpu_init_mm_vars(g);
+
+	/* platform probe can defer do user init only if probe succeeds */
+	err = gk20a_user_init(dev, interface_name, class);
+	if (err)
+		return err;
+
+	if (platform->late_probe) {
+		err = platform->late_probe(dev);
+		if (err) {
+			nvgpu_err(g, "late probe failed");
+			return err;
+		}
+	}
+
+	nvgpu_create_sysfs(dev);
+	gk20a_debug_init(g, debugfs_symlink);
+
+	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
+	if (!g->dbg_regops_tmp_buf) {
+		nvgpu_err(g, "couldn't allocate regops tmp buf");
+		return -ENOMEM;
+	}
+	g->dbg_regops_tmp_buf_ops =
+		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
+
+	g->remove_support = gk20a_remove_support;
+
+	nvgpu_ref_init(&g->refcount);
+
+	return 0;
+}
+
+/**
+ * cyclic_delta - Returns delta of cyclic integers a and b.
+ *
+ * @a - First integer
+ * @b - Second integer
+ *
+ * Note: if a is ahead of b, delta is positive.
+ */
+static int cyclic_delta(int a, int b)
+{
+	return a - b;
+}
+
+/**
+ * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
+ *
+ * @g - The GPU to wait on.
+ *
+ * Waits until all interrupt handlers that have been scheduled to run have
+ * completed.
+ */
+void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
+	int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
+
+	/* wait until all stalling irqs are handled */
+	NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
+		   cyclic_delta(stall_irq_threshold,
+				atomic_read(&l->sw_irq_stall_last_handled))
+		   <= 0, 0);
+
+	/* wait until all non-stalling irqs are handled */
+	NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
+		   cyclic_delta(nonstall_irq_threshold,
+				atomic_read(&l->sw_irq_nonstall_last_handled))
+		   <= 0, 0);
+}
+
+static void nvgpu_free_gk20a(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	kfree(l);
+}
+
+void nvgpu_init_gk20a(struct gk20a *g)
+{
+	g->free = nvgpu_free_gk20a;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.h b/drivers/gpu/nvgpu/os/linux/driver_common.h
new file mode 100644
index 00000000..6f42f775
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVGPU_LINUX_DRIVER_COMMON
+#define NVGPU_LINUX_DRIVER_COMMON
+
+void nvgpu_init_gk20a(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/dt.c b/drivers/gpu/nvgpu/os/linux/dt.c
new file mode 100644
index 00000000..88e391e3
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/dt.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/dt.h>
+#include <linux/of.h>
+
+#include "os_linux.h"
+
+int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
+				u32 index, u32 *value)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct device_node *np = dev->of_node;
+
+	return of_property_read_u32_index(np, name, index, value);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/firmware.c b/drivers/gpu/nvgpu/os/linux/firmware.c
new file mode 100644
index 00000000..9a4dc653
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/firmware.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/firmware.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/firmware.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "os_linux.h"
+
+static const struct firmware *do_request_firmware(struct device *dev,
+		const char *prefix, const char *fw_name, int flags)
+{
+	const struct firmware *fw;
+	char *fw_path = NULL;
+	int path_len, err;
+
+	if (prefix) {
+		path_len = strlen(prefix) + strlen(fw_name);
+		path_len += 2; /* for the path separator and zero terminator*/
+
+		fw_path = nvgpu_kzalloc(get_gk20a(dev),
+					sizeof(*fw_path) * path_len);
+		if (!fw_path)
+			return NULL;
+
+		sprintf(fw_path, "%s/%s", prefix, fw_name);
+		fw_name = fw_path;
+	}
+
+	if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
+		err = request_firmware_direct(&fw, fw_name, dev);
+	else
+		err = request_firmware(&fw, fw_name, dev);
+
+	nvgpu_kfree(get_gk20a(dev), fw_path);
+	if (err)
+		return NULL;
+	return fw;
+}
+
+/* This is a simple wrapper around request_firmware that takes 'fw_name' and
+ * applies an IP specific relative path prefix to it. The caller is
+ * responsible for calling nvgpu_release_firmware later. */
+struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
+					      const char *fw_name,
+					      int flags)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct nvgpu_firmware *fw;
+	const struct firmware *linux_fw;
+
+	/* current->fs is NULL when calling from SYS_EXIT.
+	   Add a check here to prevent crash in request_firmware */
+	if (!current->fs || !fw_name)
+		return NULL;
+
+	fw = nvgpu_kzalloc(g, sizeof(*fw));
+	if (!fw)
+		return NULL;
+
+	linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
+
+#ifdef CONFIG_TEGRA_GK20A
+	/* TO BE REMOVED - Support loading from legacy SOC specific path. */
+	if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
+		struct gk20a_platform *platform = gk20a_get_platform(dev);
+		linux_fw = do_request_firmware(dev,
+				platform->soc_name, fw_name, flags);
+	}
+#endif
+
+	if (!linux_fw)
+		goto err;
+
+	fw->data = nvgpu_kmalloc(g, linux_fw->size);
+	if (!fw->data)
+		goto err_release;
+
+	memcpy(fw->data, linux_fw->data, linux_fw->size);
+	fw->size = linux_fw->size;
+
+	release_firmware(linux_fw);
+
+	return fw;
+
+err_release:
+	release_firmware(linux_fw);
+err:
+	nvgpu_kfree(g, fw);
+	return NULL;
+}
+
+void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
+{
+	if(!fw)
+		return;
+
+	nvgpu_kfree(g, fw->data);
+	nvgpu_kfree(g, fw);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/fuse.c b/drivers/gpu/nvgpu/os/linux/fuse.c
new file mode 100644
index 00000000..27851f92
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/fuse.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <soc/tegra/fuse.h>
+
+#include <nvgpu/fuse.h>
+
+int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
+{
+	return tegra_sku_info.gpu_speedo_id;
+}
+
+/*
+ * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
+ * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
+ */
+void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
+{
+	tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
+}
+
+void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
+{
+	tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
+}
+
+void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
+{
+	tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
+}
+
+void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
+{
+	tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
+}
+
+int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
+{
+	return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
+}
+
+int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
+{
+	return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/intr.c b/drivers/gpu/nvgpu/os/linux/intr.c
new file mode 100644
index 00000000..7ffc7e87
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/intr.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <trace/events/gk20a.h>
+#include <linux/irqreturn.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mc_gk20a.h"
+
+#include <nvgpu/atomic.h>
+#include <nvgpu/unit.h>
+#include "os_linux.h"
+
+irqreturn_t nvgpu_intr_stall(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 mc_intr_0;
+
+	trace_mc_gk20a_intr_stall(g->name);
+
+	if (!g->power_on)
+		return IRQ_NONE;
+
+	/* not from gpu when sharing irq with others */
+	mc_intr_0 = g->ops.mc.intr_stall(g);
+	if (unlikely(!mc_intr_0))
+		return IRQ_NONE;
+
+	g->ops.mc.intr_stall_pause(g);
+
+	atomic_inc(&l->hw_irq_stall_count);
+
+	trace_mc_gk20a_intr_stall_done(g->name);
+
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int hw_irq_count;
+
+	nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
+
+	trace_mc_gk20a_intr_thread_stall(g->name);
+
+	hw_irq_count = atomic_read(&l->hw_irq_stall_count);
+	g->ops.mc.isr_stall(g);
+	g->ops.mc.intr_stall_resume(g);
+	/* sync handled irq counter before re-enabling interrupts */
+	atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
+
+	nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
+
+	trace_mc_gk20a_intr_thread_stall_done(g->name);
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
+{
+	u32 non_stall_intr_val;
+	u32 hw_irq_count;
+	int ops_old, ops_new, ops = 0;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (!g->power_on)
+		return IRQ_NONE;
+
+	/* not from gpu when sharing irq with others */
+	non_stall_intr_val = g->ops.mc.intr_nonstall(g);
+	if (unlikely(!non_stall_intr_val))
+		return IRQ_NONE;
+
+	g->ops.mc.intr_nonstall_pause(g);
+
+	ops = g->ops.mc.isr_nonstall(g);
+	if (ops) {
+		do {
+			ops_old = atomic_read(&l->nonstall_ops);
+			ops_new  = ops_old | ops;
+		} while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
+						ops_old, ops_new));
+
+		queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
+	}
+
+	hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
+
+	/* sync handled irq counter before re-enabling interrupts */
+	atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
+
+	g->ops.mc.intr_nonstall_resume(g);
+
+	nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
+
+	return IRQ_HANDLED;
+}
+
+void nvgpu_intr_nonstall_cb(struct work_struct *work)
+{
+	struct nvgpu_os_linux *l =
+		container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
+	struct gk20a *g = &l->g;
+
+	do {
+		u32 ops;
+
+		ops = atomic_xchg(&l->nonstall_ops, 0);
+		mc_gk20a_handle_intr_nonstall(g, ops);
+	} while (atomic_read(&l->nonstall_ops) != 0);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/intr.h b/drivers/gpu/nvgpu/os/linux/intr.h
new file mode 100644
index 00000000..d43cdccb
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/intr.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVGPU_LINUX_INTR_H__
+#define __NVGPU_LINUX_INTR_H__
+struct gk20a;
+
+irqreturn_t nvgpu_intr_stall(struct gk20a *g);
+irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
+irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
+void nvgpu_intr_nonstall_cb(struct work_struct *work);
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/io.c b/drivers/gpu/nvgpu/os/linux/io.c
new file mode 100644
index 00000000..c06512a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/io.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <nvgpu/io.h>
+#include <nvgpu/types.h>
+
+#include "os_linux.h"
+#include "gk20a/gk20a.h"
+
+void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (unlikely(!l->regs)) {
+		__gk20a_warn_on_no_regs();
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
+	} else {
+		writel_relaxed(v, l->regs + r);
+		nvgpu_wmb();
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
+	}
+}
+
+u32 nvgpu_readl(struct gk20a *g, u32 r)
+{
+	u32 v = __nvgpu_readl(g, r);
+
+	if (v == 0xffffffff)
+		__nvgpu_check_gpu_state(g);
+
+	return v;
+}
+
+u32 __nvgpu_readl(struct gk20a *g, u32 r)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 v = 0xffffffff;
+
+	if (unlikely(!l->regs)) {
+		__gk20a_warn_on_no_regs();
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
+	} else {
+		v = readl(l->regs + r);
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
+	}
+
+	return v;
+}
+
+void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (unlikely(!l->regs)) {
+		__gk20a_warn_on_no_regs();
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
+	} else {
+		nvgpu_wmb();
+		do {
+			writel_relaxed(v, l->regs + r);
+		} while (readl(l->regs + r) != v);
+		nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
+	}
+}
+
+void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (unlikely(!l->bar1)) {
+		__gk20a_warn_on_no_regs();
+		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
+	} else {
+		nvgpu_wmb();
+		writel_relaxed(v, l->bar1 + b);
+		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
+	}
+}
+
+u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 v = 0xffffffff;
+
+	if (unlikely(!l->bar1)) {
+		__gk20a_warn_on_no_regs();
+		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
+	} else {
+		v = readl(l->bar1 + b);
+		nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
+	}
+
+	return v;
+}
+
+bool nvgpu_io_exists(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	return l->regs != NULL;
+}
+
+bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	return r < resource_size(l->regs);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/io_usermode.c b/drivers/gpu/nvgpu/os/linux/io_usermode.c
new file mode 100644
index 00000000..ce7c9e75
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/io_usermode.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <nvgpu/io.h>
+#include <nvgpu/types.h>
+
+#include "os_linux.h"
+#include "gk20a/gk20a.h"
+
+#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
+
+void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
+
+	writel_relaxed(v, reg);
+	nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.c b/drivers/gpu/nvgpu/os/linux/ioctl.c
new file mode 100644
index 00000000..359e5103
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl.c
@@ -0,0 +1,296 @@
+/*
+ * NVGPU IOCTLs
+ *
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/file.h>
+
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/ctxsw_trace.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+
+#include "ioctl_channel.h"
+#include "ioctl_ctrl.h"
+#include "ioctl_as.h"
+#include "ioctl_tsg.h"
+#include "ioctl_dbg.h"
+#include "module.h"
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+#include "platform_gk20a.h"
+
+#define GK20A_NUM_CDEVS 7
+
+const struct file_operations gk20a_channel_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_channel_release,
+	.open = gk20a_channel_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_channel_ioctl,
+#endif
+	.unlocked_ioctl = gk20a_channel_ioctl,
+};
+
+static const struct file_operations gk20a_ctrl_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_ctrl_dev_release,
+	.open = gk20a_ctrl_dev_open,
+	.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_ctrl_dev_ioctl,
+#endif
+};
+
+static const struct file_operations gk20a_dbg_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_dbg_gpu_dev_release,
+	.open = gk20a_dbg_gpu_dev_open,
+	.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
+	.poll = gk20a_dbg_gpu_dev_poll,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#endif
+};
+
+static const struct file_operations gk20a_as_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_as_dev_release,
+	.open = gk20a_as_dev_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_as_dev_ioctl,
+#endif
+	.unlocked_ioctl = gk20a_as_dev_ioctl,
+};
+
+/*
+ * Note: We use a different 'open' to trigger handling of the profiler session.
+ * Most of the code is shared between them...  Though, at some point if the
+ * code does get too tangled trying to handle each in the same path we can
+ * separate them cleanly.
+ */
+static const struct file_operations gk20a_prof_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_dbg_gpu_dev_release,
+	.open = gk20a_prof_gpu_dev_open,
+	.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
+#endif
+};
+
+static const struct file_operations gk20a_tsg_ops = {
+	.owner = THIS_MODULE,
+	.release = nvgpu_ioctl_tsg_dev_release,
+	.open = nvgpu_ioctl_tsg_dev_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
+#endif
+	.unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
+};
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+static const struct file_operations gk20a_ctxsw_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_ctxsw_dev_release,
+	.open = gk20a_ctxsw_dev_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_ctxsw_dev_ioctl,
+#endif
+	.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
+	.poll = gk20a_ctxsw_dev_poll,
+	.read = gk20a_ctxsw_dev_read,
+	.mmap = gk20a_ctxsw_dev_mmap,
+};
+#endif
+
+static const struct file_operations gk20a_sched_ops = {
+	.owner = THIS_MODULE,
+	.release = gk20a_sched_dev_release,
+	.open = gk20a_sched_dev_open,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = gk20a_sched_dev_ioctl,
+#endif
+	.unlocked_ioctl = gk20a_sched_dev_ioctl,
+	.poll = gk20a_sched_dev_poll,
+	.read = gk20a_sched_dev_read,
+};
+
+static int gk20a_create_device(
+	struct device *dev, int devno,
+	const char *interface_name, const char *cdev_name,
+	struct cdev *cdev, struct device **out,
+	const struct file_operations *ops,
+	struct class *class)
+{
+	struct device *subdev;
+	int err;
+	struct gk20a *g = gk20a_from_dev(dev);
+
+	nvgpu_log_fn(g, " ");
+
+	cdev_init(cdev, ops);
+	cdev->owner = THIS_MODULE;
+
+	err = cdev_add(cdev, devno, 1);
+	if (err) {
+		dev_err(dev, "failed to add %s cdev\n", cdev_name);
+		return err;
+	}
+
+	subdev = device_create(class, NULL, devno, NULL,
+		interface_name, cdev_name);
+
+	if (IS_ERR(subdev)) {
+		err = PTR_ERR(dev);
+		cdev_del(cdev);
+		dev_err(dev, "failed to create %s device for %s\n",
+			cdev_name, dev_name(dev));
+		return err;
+	}
+
+	*out = subdev;
+	return 0;
+}
+
+void gk20a_user_deinit(struct device *dev, struct class *class)
+{
+	struct gk20a *g = gk20a_from_dev(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (l->channel.node) {
+		device_destroy(class, l->channel.cdev.dev);
+		cdev_del(&l->channel.cdev);
+	}
+
+	if (l->as_dev.node) {
+		device_destroy(class, l->as_dev.cdev.dev);
+		cdev_del(&l->as_dev.cdev);
+	}
+
+	if (l->ctrl.node) {
+		device_destroy(class, l->ctrl.cdev.dev);
+		cdev_del(&l->ctrl.cdev);
+	}
+
+	if (l->dbg.node) {
+		device_destroy(class, l->dbg.cdev.dev);
+		cdev_del(&l->dbg.cdev);
+	}
+
+	if (l->prof.node) {
+		device_destroy(class, l->prof.cdev.dev);
+		cdev_del(&l->prof.cdev);
+	}
+
+	if (l->tsg.node) {
+		device_destroy(class, l->tsg.cdev.dev);
+		cdev_del(&l->tsg.cdev);
+	}
+
+	if (l->ctxsw.node) {
+		device_destroy(class, l->ctxsw.cdev.dev);
+		cdev_del(&l->ctxsw.cdev);
+	}
+
+	if (l->sched.node) {
+		device_destroy(class, l->sched.cdev.dev);
+		cdev_del(&l->sched.cdev);
+	}
+
+	if (l->cdev_region)
+		unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
+}
+
+int gk20a_user_init(struct device *dev, const char *interface_name,
+		    struct class *class)
+{
+	int err;
+	dev_t devno;
+	struct gk20a *g = gk20a_from_dev(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
+	if (err) {
+		dev_err(dev, "failed to allocate devno\n");
+		goto fail;
+	}
+	l->cdev_region = devno;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "",
+				  &l->channel.cdev, &l->channel.node,
+				  &gk20a_channel_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-as",
+				  &l->as_dev.cdev, &l->as_dev.node,
+				  &gk20a_as_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
+				  &l->ctrl.cdev, &l->ctrl.node,
+				  &gk20a_ctrl_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
+				  &l->dbg.cdev, &l->dbg.node,
+				  &gk20a_dbg_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-prof",
+				  &l->prof.cdev, &l->prof.node,
+				  &gk20a_prof_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
+				  &l->tsg.cdev, &l->tsg.node,
+				  &gk20a_tsg_ops,
+				  class);
+	if (err)
+		goto fail;
+
+#if defined(CONFIG_GK20A_CTXSW_TRACE)
+	err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
+				  &l->ctxsw.cdev, &l->ctxsw.node,
+				  &gk20a_ctxsw_ops,
+				  class);
+	if (err)
+		goto fail;
+#endif
+
+	err = gk20a_create_device(dev, devno++, interface_name, "-sched",
+				  &l->sched.cdev, &l->sched.node,
+				  &gk20a_sched_ops,
+				  class);
+	if (err)
+		goto fail;
+
+	return 0;
+fail:
+	gk20a_user_deinit(dev, &nvgpu_class);
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.h b/drivers/gpu/nvgpu/os/linux/ioctl.h
new file mode 100644
index 00000000..7bf16711
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __NVGPU_IOCTL_H__
+#define __NVGPU_IOCTL_H__
+
+struct device;
+struct class;
+
+int gk20a_user_init(struct device *dev, const char *interface_name,
+		    struct class *class);
+void gk20a_user_deinit(struct device *dev, struct class *class);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
new file mode 100644
index 00000000..47f612cc
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -0,0 +1,423 @@
+/*
+ * GK20A Address Spaces
+ *
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+
+#include <trace/events/gk20a.h>
+
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/log2.h>
+
+#include <nvgpu/linux/vm.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "ioctl_as.h"
+#include "os_linux.h"
+
+static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
+{
+	u32 core_flags = 0;
+
+	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
+		core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
+	if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
+		core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
+
+	return core_flags;
+}
+
+static int gk20a_as_ioctl_bind_channel(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_bind_channel_args *args)
+{
+	int err = 0;
+	struct channel_gk20a *ch;
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+
+	nvgpu_log_fn(g, " ");
+
+	ch = gk20a_get_channel_from_file(args->channel_fd);
+	if (!ch)
+		return -EINVAL;
+
+	if (gk20a_channel_as_bound(ch)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* this will set channel_gk20a->vm */
+	err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
+
+out:
+	gk20a_channel_put(ch);
+	return err;
+}
+
+static int gk20a_as_ioctl_alloc_space(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_alloc_space_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+
+	nvgpu_log_fn(g, " ");
+	return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
+				   &args->o_a.offset,
+				   gk20a_as_translate_as_alloc_space_flags(g,
+								args->flags));
+}
+
+static int gk20a_as_ioctl_free_space(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_free_space_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+
+	nvgpu_log_fn(g, " ");
+	return nvgpu_vm_area_free(as_share->vm, args->offset);
+}
+
+static int gk20a_as_ioctl_map_buffer_ex(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_map_buffer_ex_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+
+	nvgpu_log_fn(g, " ");
+
+	/* unsupported, direct kind control must be used */
+	if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
+		struct gk20a *g = as_share->vm->mm->g;
+		nvgpu_log_info(g, "Direct kind control must be requested");
+		return -EINVAL;
+	}
+
+	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
+				   &args->offset, args->flags,
+				   args->compr_kind,
+				   args->incompr_kind,
+				   args->buffer_offset,
+				   args->mapping_size,
+				   NULL);
+}
+
+static int gk20a_as_ioctl_unmap_buffer(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_unmap_buffer_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
+
+	return 0;
+}
+
+static int gk20a_as_ioctl_map_buffer_batch(
+	struct gk20a_as_share *as_share,
+	struct nvgpu_as_map_buffer_batch_args *args)
+{
+	struct gk20a *g = gk20a_from_vm(as_share->vm);
+	u32 i;
+	int err = 0;
+
+	struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
+		(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
+		args->unmaps;
+	struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
+		(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
+		args->maps;
+
+	struct vm_gk20a_mapping_batch batch;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
+	    args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
+		return -EINVAL;
+
+	nvgpu_vm_mapping_batch_start(&batch);
+
+	for (i = 0; i < args->num_unmaps; ++i) {
+		struct nvgpu_as_unmap_buffer_args unmap_args;
+
+		if (copy_from_user(&unmap_args, &user_unmap_args[i],
+				   sizeof(unmap_args))) {
+			err = -EFAULT;
+			break;
+		}
+
+		nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
+	}
+
+	if (err) {
+		nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
+
+		args->num_unmaps = i;
+		args->num_maps = 0;
+		return err;
+	}
+
+	for (i = 0; i < args->num_maps; ++i) {
+		s16 compressible_kind;
+		s16 incompressible_kind;
+
+		struct nvgpu_as_map_buffer_ex_args map_args;
+		memset(&map_args, 0, sizeof(map_args));
+
+		if (copy_from_user(&map_args, &user_map_args[i],
+				   sizeof(map_args))) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (map_args.flags &
+		    NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
+			compressible_kind = map_args.compr_kind;
+			incompressible_kind = map_args.incompr_kind;
+		} else {
+			/* direct kind control must be used */
+			err = -EINVAL;
+			break;
+		}
+
+		err = nvgpu_vm_map_buffer(
+			as_share->vm, map_args.dmabuf_fd,
+			&map_args.offset, map_args.flags,
+			compressible_kind, incompressible_kind,
+			map_args.buffer_offset,
+			map_args.mapping_size,
+			&batch);
+		if (err)
+			break;
+	}
+
+	nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
+
+	if (err)
+		args->num_maps = i;
+	/* note: args->num_unmaps will be unmodified, which is ok
+	 * since all unmaps are done */
+
+	return err;
+}
+
+static int gk20a_as_ioctl_get_va_regions(
+		struct gk20a_as_share *as_share,
+		struct nvgpu_as_get_va_regions_args *args)
+{
+	unsigned int i;
+	unsigned int write_entries;
+	struct nvgpu_as_va_region __user *user_region_ptr;
+	struct vm_gk20a *vm = as_share->vm;
+	struct gk20a *g = gk20a_from_vm(vm);
+	unsigned int page_sizes = gmmu_page_size_kernel;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!vm->big_pages)
+		page_sizes--;
+
+	write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
+	if (write_entries > page_sizes)
+		write_entries = page_sizes;
+
+	user_region_ptr =
+		(struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
+
+	for (i = 0; i < write_entries; ++i) {
+		struct nvgpu_as_va_region region;
+		struct nvgpu_allocator *vma = vm->vma[i];
+
+		memset(&region, 0, sizeof(struct nvgpu_as_va_region));
+
+		region.page_size = vm->gmmu_page_sizes[i];
+		region.offset = nvgpu_alloc_base(vma);
+		/* No __aeabi_uldivmod() on some platforms... */
+		region.pages = (nvgpu_alloc_end(vma) -
+			nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
+
+		if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
+			return -EFAULT;
+	}
+
+	args->buf_size =
+		page_sizes * sizeof(struct nvgpu_as_va_region);
+
+	return 0;
+}
+
+static int nvgpu_as_ioctl_get_sync_ro_map(
+	struct gk20a_as_share *as_share,
+	struct nvgpu_as_get_sync_ro_map_args *args)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct vm_gk20a *vm = as_share->vm;
+	struct gk20a *g = gk20a_from_vm(vm);
+	u64 base_gpuva;
+	u32 sync_size;
+	int err = 0;
+
+	if (!g->ops.fifo.get_sync_ro_map)
+		return -EINVAL;
+
+	if (!gk20a_platform_has_syncpoints(g))
+		return -EINVAL;
+
+	err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
+	if (err)
+		return err;
+
+	args->base_gpuva = base_gpuva;
+	args->sync_size = sync_size;
+
+	return err;
+#else
+	return -EINVAL;
+#endif
+}
+
+int gk20a_as_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l;
+	struct gk20a_as_share *as_share;
+	struct gk20a *g;
+	int err;
+
+	l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
+	g = &l->g;
+
+	nvgpu_log_fn(g, " ");
+
+	err = gk20a_as_alloc_share(g, 0, 0, &as_share);
+	if (err) {
+		nvgpu_log_fn(g, "failed to alloc share");
+		return err;
+	}
+
+	filp->private_data = as_share;
+	return 0;
+}
+
+int gk20a_as_dev_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_as_share *as_share = filp->private_data;
+
+	if (!as_share)
+		return 0;
+
+	return gk20a_as_release_share(as_share);
+}
+
+long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct gk20a_as_share *as_share = filp->private_data;
+	struct gk20a *g = gk20a_from_as(as_share->as);
+
+	u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
+
+	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	switch (cmd) {
+	case NVGPU_AS_IOCTL_BIND_CHANNEL:
+		trace_gk20a_as_ioctl_bind_channel(g->name);
+		err = gk20a_as_ioctl_bind_channel(as_share,
+			       (struct nvgpu_as_bind_channel_args *)buf);
+
+		break;
+	case NVGPU32_AS_IOCTL_ALLOC_SPACE:
+	{
+		struct nvgpu32_as_alloc_space_args *args32 =
+			(struct nvgpu32_as_alloc_space_args *)buf;
+		struct nvgpu_as_alloc_space_args args;
+
+		args.pages = args32->pages;
+		args.page_size = args32->page_size;
+		args.flags = args32->flags;
+		args.o_a.offset = args32->o_a.offset;
+		trace_gk20a_as_ioctl_alloc_space(g->name);
+		err = gk20a_as_ioctl_alloc_space(as_share, &args);
+		args32->o_a.offset = args.o_a.offset;
+		break;
+	}
+	case NVGPU_AS_IOCTL_ALLOC_SPACE:
+		trace_gk20a_as_ioctl_alloc_space(g->name);
+		err = gk20a_as_ioctl_alloc_space(as_share,
+				(struct nvgpu_as_alloc_space_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_FREE_SPACE:
+		trace_gk20a_as_ioctl_free_space(g->name);
+		err = gk20a_as_ioctl_free_space(as_share,
+				(struct nvgpu_as_free_space_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
+		trace_gk20a_as_ioctl_map_buffer(g->name);
+		err = gk20a_as_ioctl_map_buffer_ex(as_share,
+				(struct nvgpu_as_map_buffer_ex_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_UNMAP_BUFFER:
+		trace_gk20a_as_ioctl_unmap_buffer(g->name);
+		err = gk20a_as_ioctl_unmap_buffer(as_share,
+				(struct nvgpu_as_unmap_buffer_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_GET_VA_REGIONS:
+		trace_gk20a_as_ioctl_get_va_regions(g->name);
+		err = gk20a_as_ioctl_get_va_regions(as_share,
+				(struct nvgpu_as_get_va_regions_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
+		err = gk20a_as_ioctl_map_buffer_batch(as_share,
+				(struct nvgpu_as_map_buffer_batch_args *)buf);
+		break;
+	case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
+		err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
+			(struct nvgpu_as_get_sync_ro_map_args *)buf);
+		break;
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	gk20a_idle(g);
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
+			err = -EFAULT;
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.h b/drivers/gpu/nvgpu/os/linux/ioctl_as.h
new file mode 100644
index 00000000..b3de3782
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.h
@@ -0,0 +1,30 @@
+/*
+ * GK20A Address Spaces
+ *
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __NVGPU_COMMON_LINUX_AS_H__
+#define __NVGPU_COMMON_LINUX_AS_H__
+
+struct inode;
+struct file;
+
+/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
+ * num_maps */
+#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT	256
+
+/* struct file_operations driver interface */
+int gk20a_as_dev_open(struct inode *inode, struct file *filp);
+int gk20a_as_dev_release(struct inode *inode, struct file *filp);
+long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
new file mode 100644
index 00000000..b04bb9de
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -0,0 +1,1388 @@
+/*
+ * GK20A Graphics channel
+ *
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <trace/events/gk20a.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/dma-buf.h>
+#include <linux/poll.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/semaphore.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/list.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/nvhost.h>
+#include <nvgpu/os_sched.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/fence_gk20a.h"
+
+#include "platform_gk20a.h"
+#include "ioctl_channel.h"
+#include "channel.h"
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+
+/* the minimal size of client buffer */
+#define CSS_MIN_CLIENT_SNAPSHOT_SIZE				\
+		(sizeof(struct gk20a_cs_snapshot_fifo) +	\
+		sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
+
+static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
+{
+	switch (graphics_preempt_mode) {
+	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
+		return "WFI";
+	default:
+		return "?";
+	}
+}
+
+static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode)
+{
+	switch (compute_preempt_mode) {
+	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
+		return "WFI";
+	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
+		return "CTA";
+	default:
+		return "?";
+	}
+}
+
+static void gk20a_channel_trace_sched_param(
+	void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	struct channel_gk20a *ch)
+{
+	struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
+
+	if (!tsg)
+		return;
+
+	(trace)(ch->chid, ch->tsgid, ch->pid,
+		tsg_gk20a_from_ch(ch)->timeslice_us,
+		ch->timeout_ms_max,
+		gk20a_fifo_interleave_level_name(tsg->interleave_level),
+		gr_gk20a_graphics_preempt_mode_name(
+			tsg->gr_ctx.graphics_preempt_mode),
+		gr_gk20a_compute_preempt_mode_name(
+			tsg->gr_ctx.compute_preempt_mode));
+}
+
+/*
+ * Although channels do have pointers back to the gk20a struct that they were
+ * created under in cases where the driver is killed that pointer can be bad.
+ * The channel memory can be freed before the release() function for a given
+ * channel is called. This happens when the driver dies and userspace doesn't
+ * get a chance to call release() until after the entire gk20a driver data is
+ * unloaded and freed.
+ */
+struct channel_priv {
+	struct gk20a *g;
+	struct channel_gk20a *c;
+};
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+
+void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	/* disable existing cyclestats buffer */
+	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
+	if (priv->cyclestate_buffer_handler) {
+		dma_buf_vunmap(priv->cyclestate_buffer_handler,
+				ch->cyclestate.cyclestate_buffer);
+		dma_buf_put(priv->cyclestate_buffer_handler);
+		priv->cyclestate_buffer_handler = NULL;
+		ch->cyclestate.cyclestate_buffer = NULL;
+		ch->cyclestate.cyclestate_buffer_size = 0;
+	}
+	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
+}
+
+static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
+		       struct nvgpu_cycle_stats_args *args)
+{
+	struct dma_buf *dmabuf;
+	void *virtual_address;
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	/* is it allowed to handle calls for current GPU? */
+	if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS))
+		return -ENOSYS;
+
+	if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
+
+		/* set up new cyclestats buffer */
+		dmabuf = dma_buf_get(args->dmabuf_fd);
+		if (IS_ERR(dmabuf))
+			return PTR_ERR(dmabuf);
+		virtual_address = dma_buf_vmap(dmabuf);
+		if (!virtual_address)
+			return -ENOMEM;
+
+		priv->cyclestate_buffer_handler = dmabuf;
+		ch->cyclestate.cyclestate_buffer = virtual_address;
+		ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
+		return 0;
+
+	} else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) {
+		gk20a_channel_free_cycle_stats_buffer(ch);
+		return 0;
+
+	} else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) {
+		/* no requst from GL */
+		return 0;
+
+	} else {
+		pr_err("channel already has cyclestats buffer\n");
+		return -EINVAL;
+	}
+}
+
+static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
+{
+	int ret;
+
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
+	if (ch->cs_client)
+		ret = gr_gk20a_css_flush(ch, ch->cs_client);
+	else
+		ret = -EBADF;
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+
+	return ret;
+}
+
+static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
+				u32 dmabuf_fd,
+				u32 perfmon_id_count,
+				u32 *perfmon_id_start)
+{
+	int ret = 0;
+	struct gk20a *g = ch->g;
+	struct gk20a_cs_snapshot_client_linux *client_linux;
+	struct gk20a_cs_snapshot_client *client;
+
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
+	if (ch->cs_client) {
+		nvgpu_mutex_release(&ch->cs_client_mutex);
+		return -EEXIST;
+	}
+
+	client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
+	if (!client_linux) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	client_linux->dmabuf_fd   = dmabuf_fd;
+	client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
+	if (IS_ERR(client_linux->dma_handler)) {
+		ret = PTR_ERR(client_linux->dma_handler);
+		client_linux->dma_handler = NULL;
+		goto err_free;
+	}
+
+	client = &client_linux->cs_client;
+	client->snapshot_size = client_linux->dma_handler->size;
+	if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
+		ret = -ENOMEM;
+		goto err_put;
+	}
+
+	client->snapshot = (struct gk20a_cs_snapshot_fifo *)
+					dma_buf_vmap(client_linux->dma_handler);
+	if (!client->snapshot) {
+		ret = -ENOMEM;
+		goto err_put;
+	}
+
+	ch->cs_client = client;
+
+	ret = gr_gk20a_css_attach(ch,
+				perfmon_id_count,
+				perfmon_id_start,
+				ch->cs_client);
+
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+
+	return ret;
+
+err_put:
+	dma_buf_put(client_linux->dma_handler);
+err_free:
+	nvgpu_kfree(g, client_linux);
+err:
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+	return ret;
+}
+
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
+{
+	int ret;
+	struct gk20a_cs_snapshot_client_linux *client_linux;
+
+	nvgpu_mutex_acquire(&ch->cs_client_mutex);
+	if (!ch->cs_client) {
+		nvgpu_mutex_release(&ch->cs_client_mutex);
+		return 0;
+	}
+
+	client_linux = container_of(ch->cs_client,
+				struct gk20a_cs_snapshot_client_linux,
+				cs_client);
+
+	ret = gr_gk20a_css_detach(ch, ch->cs_client);
+
+	if (client_linux->dma_handler) {
+		if (ch->cs_client->snapshot)
+			dma_buf_vunmap(client_linux->dma_handler,
+					ch->cs_client->snapshot);
+		dma_buf_put(client_linux->dma_handler);
+	}
+
+	ch->cs_client = NULL;
+	nvgpu_kfree(ch->g, client_linux);
+
+	nvgpu_mutex_release(&ch->cs_client_mutex);
+
+	return ret;
+}
+
+static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
+			struct nvgpu_cycle_stats_snapshot_args *args)
+{
+	int ret;
+
+	/* is it allowed to handle calls for current GPU? */
+	if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT))
+		return -ENOSYS;
+
+	if (!args->dmabuf_fd)
+		return -EINVAL;
+
+	/* handle the command (most frequent cases first) */
+	switch (args->cmd) {
+	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
+		ret = gk20a_flush_cycle_stats_snapshot(ch);
+		args->extra = 0;
+		break;
+
+	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
+		ret = gk20a_attach_cycle_stats_snapshot(ch,
+						args->dmabuf_fd,
+						args->extra,
+						&args->extra);
+		break;
+
+	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
+		ret = gk20a_channel_free_cycle_stats_snapshot(ch);
+		args->extra = 0;
+		break;
+
+	default:
+		pr_err("cyclestats: unknown command %u\n", args->cmd);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+#endif
+
+static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
+		struct nvgpu_channel_wdt_args *args)
+{
+	u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT |
+			NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
+
+	if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
+		ch->timeout.enabled = false;
+	else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
+		ch->timeout.enabled = true;
+	else
+		return -EINVAL;
+
+	if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
+		ch->timeout.limit_ms = args->timeout_ms;
+
+	ch->timeout.debug_dump = (args->wdt_status &
+			NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
+
+	return 0;
+}
+
+static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch)
+{
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	if (priv->error_notifier.dmabuf) {
+		dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr);
+		dma_buf_put(priv->error_notifier.dmabuf);
+		priv->error_notifier.dmabuf = NULL;
+		priv->error_notifier.notification = NULL;
+		priv->error_notifier.vaddr = NULL;
+	}
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+}
+
+static int gk20a_init_error_notifier(struct channel_gk20a *ch,
+		struct nvgpu_set_error_notifier *args)
+{
+	struct dma_buf *dmabuf;
+	void *va;
+	u64 end = args->offset + sizeof(struct nvgpu_notification);
+	struct nvgpu_channel_linux *priv = ch->os_priv;
+
+	if (!args->mem) {
+		pr_err("gk20a_init_error_notifier: invalid memory handle\n");
+		return -EINVAL;
+	}
+
+	dmabuf = dma_buf_get(args->mem);
+
+	gk20a_channel_free_error_notifiers(ch);
+
+	if (IS_ERR(dmabuf)) {
+		pr_err("Invalid handle: %d\n", args->mem);
+		return -EINVAL;
+	}
+
+	if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
+		dma_buf_put(dmabuf);
+		nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset");
+		return -EINVAL;
+	}
+
+	nvgpu_speculation_barrier();
+
+	/* map handle */
+	va = dma_buf_vmap(dmabuf);
+	if (!va) {
+		dma_buf_put(dmabuf);
+		pr_err("Cannot map notifier handle\n");
+		return -ENOMEM;
+	}
+
+	priv->error_notifier.notification = va + args->offset;
+	priv->error_notifier.vaddr = va;
+	memset(priv->error_notifier.notification, 0,
+		sizeof(struct nvgpu_notification));
+
+	/* set channel notifiers pointer */
+	nvgpu_mutex_acquire(&priv->error_notifier.mutex);
+	priv->error_notifier.dmabuf = dmabuf;
+	nvgpu_mutex_release(&priv->error_notifier.mutex);
+
+	return 0;
+}
+
+/*
+ * This returns the channel with a reference. The caller must
+ * gk20a_channel_put() the ref back after use.
+ *
+ * NULL is returned if the channel was not found.
+ */
+struct channel_gk20a *gk20a_get_channel_from_file(int fd)
+{
+	struct channel_gk20a *ch;
+	struct channel_priv *priv;
+	struct file *f = fget(fd);
+
+	if (!f)
+		return NULL;
+
+	if (f->f_op != &gk20a_channel_ops) {
+		fput(f);
+		return NULL;
+	}
+
+	priv = (struct channel_priv *)f->private_data;
+	ch = gk20a_channel_get(priv->c);
+	fput(f);
+	return ch;
+}
+
+int gk20a_channel_release(struct inode *inode, struct file *filp)
+{
+	struct channel_priv *priv = filp->private_data;
+	struct channel_gk20a *ch;
+	struct gk20a *g;
+
+	int err;
+
+	/* We could still end up here even if the channel_open failed, e.g.
+	 * if we ran out of hw channel IDs.
+	 */
+	if (!priv)
+		return 0;
+
+	ch = priv->c;
+	g = priv->g;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to release a channel!");
+		goto channel_release;
+	}
+
+	trace_gk20a_channel_release(dev_name(dev_from_gk20a(g)));
+
+	gk20a_channel_close(ch);
+	gk20a_channel_free_error_notifiers(ch);
+
+	gk20a_idle(g);
+
+channel_release:
+	gk20a_put(g);
+	nvgpu_kfree(g, filp->private_data);
+	filp->private_data = NULL;
+	return 0;
+}
+
+/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
+static int __gk20a_channel_open(struct gk20a *g,
+				struct file *filp, s32 runlist_id)
+{
+	int err;
+	struct channel_gk20a *ch;
+	struct channel_priv *priv;
+
+	nvgpu_log_fn(g, " ");
+
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
+	trace_gk20a_channel_open(dev_name(dev_from_gk20a(g)));
+
+	priv = nvgpu_kzalloc(g, sizeof(*priv));
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on, %d", err);
+		goto fail_busy;
+	}
+	/* All the user space channel should be non privilege */
+	ch = gk20a_open_new_channel(g, runlist_id, false,
+				nvgpu_current_pid(g), nvgpu_current_tid(g));
+	gk20a_idle(g);
+	if (!ch) {
+		nvgpu_err(g,
+			"failed to get f");
+		err = -ENOMEM;
+		goto fail_busy;
+	}
+
+	gk20a_channel_trace_sched_param(
+		trace_gk20a_channel_sched_defaults, ch);
+
+	priv->g = g;
+	priv->c = ch;
+
+	filp->private_data = priv;
+	return 0;
+
+fail_busy:
+	nvgpu_kfree(g, priv);
+free_ref:
+	gk20a_put(g);
+	return err;
+}
+
+int gk20a_channel_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
+			struct nvgpu_os_linux, channel.cdev);
+	struct gk20a *g = &l->g;
+	int ret;
+
+	nvgpu_log_fn(g, "start");
+	ret = __gk20a_channel_open(g, filp, -1);
+
+	nvgpu_log_fn(g, "end");
+	return ret;
+}
+
+int gk20a_channel_open_ioctl(struct gk20a *g,
+		struct nvgpu_channel_open_args *args)
+{
+	int err;
+	int fd;
+	struct file *file;
+	char name[64];
+	s32 runlist_id = args->in.runlist_id;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	err = get_unused_fd_flags(O_RDWR);
+	if (err < 0)
+		return err;
+	fd = err;
+
+	snprintf(name, sizeof(name), "nvhost-%s-fd%d",
+		 dev_name(dev_from_gk20a(g)), fd);
+
+	file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto clean_up;
+	}
+
+	err = __gk20a_channel_open(g, file, runlist_id);
+	if (err)
+		goto clean_up_file;
+
+	fd_install(fd, file);
+	args->out.channel_fd = fd;
+	return 0;
+
+clean_up_file:
+	fput(file);
+clean_up:
+	put_unused_fd(fd);
+	return err;
+}
+
+static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags)
+{
+	u32 flags = 0;
+
+	if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
+		flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR;
+
+	if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
+		flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC;
+
+	if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
+		flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE;
+
+	return flags;
+}
+
+static void nvgpu_get_gpfifo_ex_args(
+		struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args,
+		struct nvgpu_gpfifo_args *gpfifo_args)
+{
+	gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries;
+	gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs;
+	gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
+							alloc_gpfifo_ex_args->flags);
+}
+
+static void nvgpu_get_gpfifo_args(
+		struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args,
+		struct nvgpu_gpfifo_args *gpfifo_args)
+{
+	/*
+	 * Kernel can insert one extra gpfifo entry before user
+	 * submitted gpfifos and another one after, for internal usage.
+	 * Triple the requested size.
+	 */
+	gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3;
+	gpfifo_args->num_inflight_jobs = 0;
+	gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags(
+							alloc_gpfifo_args->flags);
+}
+
+static void nvgpu_get_fence_args(
+		struct nvgpu_fence *fence_args_in,
+		struct nvgpu_channel_fence *fence_args_out)
+{
+	fence_args_out->id = fence_args_in->id;
+	fence_args_out->value = fence_args_in->value;
+}
+
+static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
+					ulong id, u32 offset,
+					u32 payload, u32 timeout)
+{
+	struct dma_buf *dmabuf;
+	void *data;
+	u32 *semaphore;
+	int ret = 0;
+
+	/* do not wait if channel has timed out */
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	dmabuf = dma_buf_get(id);
+	if (IS_ERR(dmabuf)) {
+		nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id);
+		return -EINVAL;
+	}
+
+	data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
+	if (!data) {
+		nvgpu_err(ch->g, "failed to map notifier memory");
+		ret = -EINVAL;
+		goto cleanup_put;
+	}
+
+	semaphore = data + (offset & ~PAGE_MASK);
+
+	ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+			&ch->semaphore_wq,
+			*semaphore == payload || ch->has_timedout,
+			timeout);
+
+	dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
+cleanup_put:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
+static int gk20a_channel_wait(struct channel_gk20a *ch,
+			      struct nvgpu_wait_args *args)
+{
+	struct dma_buf *dmabuf;
+	struct gk20a *g = ch->g;
+	struct notification *notif;
+	struct timespec tv;
+	u64 jiffies;
+	ulong id;
+	u32 offset;
+	int remain, ret = 0;
+	u64 end;
+
+	nvgpu_log_fn(g, " ");
+
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	switch (args->type) {
+	case NVGPU_WAIT_TYPE_NOTIFIER:
+		id = args->condition.notifier.dmabuf_fd;
+		offset = args->condition.notifier.offset;
+		end = offset + sizeof(struct notification);
+
+		dmabuf = dma_buf_get(id);
+		if (IS_ERR(dmabuf)) {
+			nvgpu_err(g, "invalid notifier nvmap handle 0x%lx",
+				   id);
+			return -EINVAL;
+		}
+
+		if (end > dmabuf->size || end < sizeof(struct notification)) {
+			dma_buf_put(dmabuf);
+			nvgpu_err(g, "invalid notifier offset");
+			return -EINVAL;
+		}
+
+		nvgpu_speculation_barrier();
+
+		notif = dma_buf_vmap(dmabuf);
+		if (!notif) {
+			nvgpu_err(g, "failed to map notifier memory");
+			return -ENOMEM;
+		}
+
+		notif = (struct notification *)((uintptr_t)notif + offset);
+
+		/* user should set status pending before
+		 * calling this ioctl */
+		remain = NVGPU_COND_WAIT_INTERRUPTIBLE(
+				&ch->notifier_wq,
+				notif->status == 0 || ch->has_timedout,
+				args->timeout);
+
+		if (remain == 0 && notif->status != 0) {
+			ret = -ETIMEDOUT;
+			goto notif_clean_up;
+		} else if (remain < 0) {
+			ret = -EINTR;
+			goto notif_clean_up;
+		}
+
+		/* TBD: fill in correct information */
+		jiffies = get_jiffies_64();
+		jiffies_to_timespec(jiffies, &tv);
+		notif->timestamp.nanoseconds[0] = tv.tv_nsec;
+		notif->timestamp.nanoseconds[1] = tv.tv_sec;
+		notif->info32 = 0xDEADBEEF; /* should be object name */
+		notif->info16 = ch->chid; /* should be method offset */
+
+notif_clean_up:
+		dma_buf_vunmap(dmabuf, notif);
+		return ret;
+
+	case NVGPU_WAIT_TYPE_SEMAPHORE:
+		ret = gk20a_channel_wait_semaphore(ch,
+				args->condition.semaphore.dmabuf_fd,
+				args->condition.semaphore.offset,
+				args->condition.semaphore.payload,
+				args->timeout);
+
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
+			    struct nvgpu_zcull_bind_args *args)
+{
+	struct gk20a *g = ch->g;
+	struct gr_gk20a *gr = &g->gr;
+
+	nvgpu_log_fn(gr->g, " ");
+
+	return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
+				args->gpu_va, args->mode);
+}
+
+static int gk20a_ioctl_channel_submit_gpfifo(
+	struct channel_gk20a *ch,
+	struct nvgpu_submit_gpfifo_args *args)
+{
+	struct nvgpu_channel_fence fence;
+	struct gk20a_fence *fence_out;
+	struct fifo_profile_gk20a *profile = NULL;
+	u32 submit_flags = 0;
+	int fd = -1;
+	struct gk20a *g = ch->g;
+
+	int ret = 0;
+	nvgpu_log_fn(g, " ");
+
+	profile = gk20a_fifo_profile_acquire(ch->g);
+	gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY);
+
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	nvgpu_get_fence_args(&args->fence, &fence);
+	submit_flags =
+		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
+
+	/* Try and allocate an fd here*/
+	if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+		&& (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
+			fd = get_unused_fd_flags(O_RDWR);
+			if (fd < 0)
+				return fd;
+	}
+
+	ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
+					  submit_flags, &fence,
+					  &fence_out, profile);
+
+	if (ret) {
+		if (fd != -1)
+			put_unused_fd(fd);
+		goto clean_up;
+	}
+
+	/* Convert fence_out to something we can pass back to user space. */
+	if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
+		if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+			ret = gk20a_fence_install_fd(fence_out, fd);
+			if (ret)
+				put_unused_fd(fd);
+			else
+				args->fence.id = fd;
+		} else {
+			args->fence.id = fence_out->syncpt_id;
+			args->fence.value = fence_out->syncpt_value;
+		}
+	}
+	gk20a_fence_put(fence_out);
+
+	gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
+	if (profile)
+		gk20a_fifo_profile_release(ch->g, profile);
+
+clean_up:
+	return ret;
+}
+
+/*
+ * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_*
+ * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_*
+ */
+u32 nvgpu_get_common_runlist_level(u32 level)
+{
+	switch (level) {
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
+		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
+		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM;
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
+		return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH;
+	default:
+		pr_err("%s: incorrect runlist level\n", __func__);
+	}
+
+	return level;
+}
+
+static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags)
+{
+	u32 flags = 0;
+
+	if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP)
+		flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP;
+
+	if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP)
+		flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP;
+
+	return flags;
+}
+
+static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch,
+	u32 class_num, u32 user_flags)
+{
+	return ch->g->ops.gr.alloc_obj_ctx(ch, class_num,
+			nvgpu_obj_ctx_user_flags_to_common_flags(user_flags));
+}
+
+/*
+ * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
+ * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
+ */
+u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags)
+{
+	u32 flags = 0;
+
+	if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI)
+		flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
+	if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)
+		flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
+
+	return flags;
+}
+
+/*
+ * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
+ * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
+ */
+u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags)
+{
+	u32 flags = 0;
+
+	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI)
+		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
+	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA)
+		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
+	if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP)
+		flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+
+	return flags;
+}
+
+/*
+ * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
+ * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
+ */
+u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode)
+{
+	switch (graphics_preempt_mode) {
+	case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
+		return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI;
+	case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
+		return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
+	}
+
+	return graphics_preempt_mode;
+}
+
+/*
+ * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
+ * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
+ */
+u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
+{
+	switch (compute_preempt_mode) {
+	case NVGPU_PREEMPTION_MODE_COMPUTE_WFI:
+		return NVGPU_COMPUTE_PREEMPTION_MODE_WFI;
+	case NVGPU_PREEMPTION_MODE_COMPUTE_CTA:
+		return NVGPU_COMPUTE_PREEMPTION_MODE_CTA;
+	case NVGPU_PREEMPTION_MODE_COMPUTE_CILP:
+		return NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
+	}
+
+	return compute_preempt_mode;
+}
+
+/*
+ * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_*
+ * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_*
+ */
+static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
+{
+	switch (graphics_preempt_mode) {
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
+		return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
+	case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP:
+		return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
+	}
+
+	return graphics_preempt_mode;
+}
+
+/*
+ * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_*
+ * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_*
+ */
+static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
+{
+	switch (compute_preempt_mode) {
+	case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
+		return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
+	case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
+		return NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
+	case NVGPU_COMPUTE_PREEMPTION_MODE_CILP:
+		return NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
+	}
+
+	return compute_preempt_mode;
+}
+
+static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch,
+	u32 graphics_preempt_mode, u32 compute_preempt_mode)
+{
+	int err;
+
+	if (ch->g->ops.gr.set_preemption_mode) {
+		err = gk20a_busy(ch->g);
+		if (err) {
+			nvgpu_err(ch->g, "failed to power on, %d", err);
+			return err;
+		}
+		err = ch->g->ops.gr.set_preemption_mode(ch,
+			nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode),
+			nvgpu_get_common_compute_preempt_mode(compute_preempt_mode));
+		gk20a_idle(ch->g);
+	} else {
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch,
+	struct nvgpu_get_user_syncpoint_args *args)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a *g = ch->g;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
+		nvgpu_err(g, "user syncpoints not supported");
+		return -EINVAL;
+	}
+
+	if (!gk20a_platform_has_syncpoints(g)) {
+		nvgpu_err(g, "syncpoints not supported");
+		return -EINVAL;
+	}
+
+	if (g->aggressive_sync_destroy_thresh) {
+		nvgpu_err(g, "sufficient syncpoints not available");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&ch->sync_lock);
+	if (ch->user_sync) {
+		nvgpu_mutex_release(&ch->sync_lock);
+	} else {
+		ch->user_sync = gk20a_channel_sync_create(ch, true);
+		if (!ch->user_sync) {
+			nvgpu_mutex_release(&ch->sync_lock);
+			return -ENOMEM;
+		}
+		nvgpu_mutex_release(&ch->sync_lock);
+
+		if (g->ops.fifo.resetup_ramfc) {
+			err = g->ops.fifo.resetup_ramfc(ch);
+			if (err)
+				return err;
+		}
+	}
+
+	args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync);
+	args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev,
+						args->syncpoint_id);
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS))
+		args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync);
+	else
+		args->gpu_va = 0;
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+long gk20a_channel_ioctl(struct file *filp,
+	unsigned int cmd, unsigned long arg)
+{
+	struct channel_priv *priv = filp->private_data;
+	struct channel_gk20a *ch = priv->c;
+	struct device *dev = dev_from_gk20a(ch->g);
+	u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
+	int err = 0;
+	struct gk20a *g = ch->g;
+
+	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	/* take a ref or return timeout if channel refs can't be taken */
+	ch = gk20a_channel_get(ch);
+	if (!ch)
+		return -ETIMEDOUT;
+
+	/* protect our sanity for threaded userspace - most of the channel is
+	 * not thread safe */
+	nvgpu_mutex_acquire(&ch->ioctl_lock);
+
+	/* this ioctl call keeps a ref to the file which keeps a ref to the
+	 * channel */
+
+	switch (cmd) {
+	case NVGPU_IOCTL_CHANNEL_OPEN:
+		err = gk20a_channel_open_ioctl(ch->g,
+			(struct nvgpu_channel_open_args *)buf);
+		break;
+	case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
+		break;
+	case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
+	{
+		struct nvgpu_alloc_obj_ctx_args *args =
+				(struct nvgpu_alloc_obj_ctx_args *)buf;
+
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags);
+		gk20a_idle(ch->g);
+		break;
+	}
+	case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
+	{
+		struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args =
+			(struct nvgpu_alloc_gpfifo_ex_args *)buf;
+		struct nvgpu_gpfifo_args gpfifo_args;
+
+		nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args);
+
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+
+		if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
+			err = -EINVAL;
+			gk20a_idle(ch->g);
+			break;
+		}
+		err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
+		gk20a_idle(ch->g);
+		break;
+	}
+	case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
+	{
+		struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
+			(struct nvgpu_alloc_gpfifo_args *)buf;
+		struct nvgpu_gpfifo_args gpfifo_args;
+
+		nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args);
+
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+
+		err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
+		gk20a_idle(ch->g);
+		break;
+	}
+	case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
+		err = gk20a_ioctl_channel_submit_gpfifo(ch,
+				(struct nvgpu_submit_gpfifo_args *)buf);
+		break;
+	case NVGPU_IOCTL_CHANNEL_WAIT:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+
+		/* waiting is thread-safe, not dropping this mutex could
+		 * deadlock in certain conditions */
+		nvgpu_mutex_release(&ch->ioctl_lock);
+
+		err = gk20a_channel_wait(ch,
+				(struct nvgpu_wait_args *)buf);
+
+		nvgpu_mutex_acquire(&ch->ioctl_lock);
+
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_channel_zcull_bind(ch,
+				(struct nvgpu_zcull_bind_args *)buf);
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_init_error_notifier(ch,
+				(struct nvgpu_set_error_notifier *)buf);
+		gk20a_idle(ch->g);
+		break;
+#ifdef CONFIG_GK20A_CYCLE_STATS
+	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_channel_cycle_stats(ch,
+				(struct nvgpu_cycle_stats_args *)buf);
+		gk20a_idle(ch->g);
+		break;
+#endif
+	case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
+	{
+		u32 timeout =
+			(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
+		nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+			   timeout, ch->chid);
+		ch->timeout_ms_max = timeout;
+		gk20a_channel_trace_sched_param(
+			trace_gk20a_channel_set_timeout, ch);
+		break;
+	}
+	case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
+	{
+		u32 timeout =
+			(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
+		bool timeout_debug_dump = !((u32)
+			((struct nvgpu_set_timeout_ex_args *)buf)->flags &
+			(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
+		nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+			   timeout, ch->chid);
+		ch->timeout_ms_max = timeout;
+		ch->timeout_debug_dump = timeout_debug_dump;
+		gk20a_channel_trace_sched_param(
+			trace_gk20a_channel_set_timeout, ch);
+		break;
+	}
+	case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
+		((struct nvgpu_get_param_args *)buf)->value =
+			ch->has_timedout;
+		break;
+	case NVGPU_IOCTL_CHANNEL_ENABLE:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		if (ch->g->ops.fifo.enable_channel)
+			ch->g->ops.fifo.enable_channel(ch);
+		else
+			err = -ENOSYS;
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_DISABLE:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		if (ch->g->ops.fifo.disable_channel)
+			ch->g->ops.fifo.disable_channel(ch);
+		else
+			err = -ENOSYS;
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_PREEMPT:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_fifo_preempt(ch->g, ch);
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST:
+		if (!capable(CAP_SYS_NICE)) {
+			err = -EPERM;
+			break;
+		}
+		if (!ch->g->ops.fifo.reschedule_runlist) {
+			err = -ENOSYS;
+			break;
+		}
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = ch->g->ops.fifo.reschedule_runlist(ch,
+			NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT &
+			((struct nvgpu_reschedule_runlist_args *)buf)->flags);
+		gk20a_idle(ch->g);
+		break;
+	case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = ch->g->ops.fifo.force_reset_ch(ch,
+				NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
+		gk20a_idle(ch->g);
+		break;
+#ifdef CONFIG_GK20A_CYCLE_STATS
+	case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_channel_cycle_stats_snapshot(ch,
+				(struct nvgpu_cycle_stats_snapshot_args *)buf);
+		gk20a_idle(ch->g);
+		break;
+#endif
+	case NVGPU_IOCTL_CHANNEL_WDT:
+		err = gk20a_channel_set_wdt_status(ch,
+				(struct nvgpu_channel_wdt_args *)buf);
+		break;
+	case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
+		err = nvgpu_ioctl_channel_set_preemption_mode(ch,
+		     ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
+		     ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
+		break;
+	case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
+		if (ch->g->ops.gr.set_boosted_ctx) {
+			bool boost =
+				((struct nvgpu_boosted_ctx_args *)buf)->boost;
+
+			err = gk20a_busy(ch->g);
+			if (err) {
+				dev_err(dev,
+					"%s: failed to host gk20a for ioctl cmd: 0x%x",
+					__func__, cmd);
+				break;
+			}
+			err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
+			gk20a_idle(ch->g);
+		} else {
+			err = -EINVAL;
+		}
+		break;
+	case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = nvgpu_ioctl_channel_get_user_syncpoint(ch,
+		      (struct nvgpu_get_user_syncpoint_args *)buf);
+		gk20a_idle(ch->g);
+		break;
+	default:
+		dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
+		err = -ENOTTY;
+		break;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+	nvgpu_mutex_release(&ch->ioctl_lock);
+
+	gk20a_channel_put(ch);
+
+	nvgpu_log_fn(g, "end");
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.h b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h
new file mode 100644
index 00000000..48cff1ea
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __NVGPU_IOCTL_CHANNEL_H__
+#define __NVGPU_IOCTL_CHANNEL_H__
+
+#include <linux/fs.h>
+
+#include "gk20a/css_gr_gk20a.h"
+
+struct inode;
+struct file;
+struct gk20a;
+struct nvgpu_channel_open_args;
+
+struct gk20a_cs_snapshot_client_linux {
+	struct gk20a_cs_snapshot_client cs_client;
+
+	u32			dmabuf_fd;
+	struct dma_buf		*dma_handler;
+};
+
+int gk20a_channel_open(struct inode *inode, struct file *filp);
+int gk20a_channel_release(struct inode *inode, struct file *filp);
+long gk20a_channel_ioctl(struct file *filp,
+	unsigned int cmd, unsigned long arg);
+int gk20a_channel_open_ioctl(struct gk20a *g,
+		struct nvgpu_channel_open_args *args);
+
+int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
+void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
+
+extern const struct file_operations gk20a_channel_ops;
+
+u32 nvgpu_get_common_runlist_level(u32 level);
+
+u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
+u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
+u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
+u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c
new file mode 100644
index 00000000..501b5f93
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cdev.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/bitops.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include <nvgpu/log.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/list.h>
+#include <nvgpu/clk_arb.h>
+
+#include "gk20a/gk20a.h"
+#include "clk/clk.h"
+#include "pstate/pstate.h"
+#include "lpwr/lpwr.h"
+#include "volt/volt.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include "os_linux.h"
+#endif
+
+static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
+		struct file *filp)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct nvgpu_clk_session *session = dev->session;
+
+
+	clk_arb_dbg(session->g, " ");
+
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+	return 0;
+}
+
+static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
+{
+	unsigned int poll_mask = 0;
+
+	if (nvgpu_poll_mask & NVGPU_POLLIN)
+		poll_mask |= POLLIN;
+	if (nvgpu_poll_mask & NVGPU_POLLPRI)
+		poll_mask |= POLLPRI;
+	if (nvgpu_poll_mask & NVGPU_POLLOUT)
+		poll_mask |= POLLOUT;
+	if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
+		poll_mask |= POLLRDNORM;
+	if (nvgpu_poll_mask & NVGPU_POLLHUP)
+		poll_mask |= POLLHUP;
+
+	return poll_mask;
+}
+
+static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+
+	clk_arb_dbg(dev->session->g, " ");
+
+	poll_wait(filp, &dev->readout_wq.wq, wait);
+	return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
+}
+
+void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
+{
+	nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
+}
+
+static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
+		struct file *filp)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct nvgpu_clk_session *session = dev->session;
+	struct nvgpu_clk_arb *arb;
+
+	arb = session->g->clk_arb;
+
+	clk_arb_dbg(session->g, " ");
+
+	if (arb) {
+		nvgpu_spinlock_acquire(&arb->users_lock);
+		nvgpu_list_del(&dev->link);
+		nvgpu_spinlock_release(&arb->users_lock);
+		nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
+	}
+
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+
+	return 0;
+}
+
+static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
+{
+	u32 nvgpu_gpu_event;
+
+	switch (nvgpu_event) {
+	case NVGPU_EVENT_VF_UPDATE:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
+		break;
+	case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
+		break;
+	case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
+		break;
+	case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
+		break;
+	case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
+		break;
+	case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
+		break;
+	case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
+		break;
+	case NVGPU_EVENT_ALARM_GPU_LOST:
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
+		break;
+		default:
+		/* Control shouldn't come here */
+		nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
+		break;
+	}
+	return nvgpu_gpu_event;
+}
+
+static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
+		struct nvgpu_gpu_event_info *info) {
+
+	u32 tail, head;
+	u32 events = 0;
+	struct nvgpu_clk_notification *p_notif;
+
+	tail = nvgpu_atomic_read(&dev->queue.tail);
+	head = nvgpu_atomic_read(&dev->queue.head);
+
+	head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
+
+	if (_WRAPGTEQ(tail, head) && info) {
+		head++;
+		p_notif = &dev->queue.notifications[head % dev->queue.size];
+		events |= nvgpu_convert_gpu_event(p_notif->notification);
+		info->event_id = ffs(events) - 1;
+		info->timestamp = p_notif->timestamp;
+		nvgpu_atomic_set(&dev->queue.head, head);
+	}
+
+	return events;
+}
+
+static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
+					size_t size, loff_t *off)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct nvgpu_gpu_event_info info;
+	ssize_t err;
+
+	clk_arb_dbg(dev->session->g,
+			"filp=%p, buf=%p, size=%zu", filp, buf, size);
+
+	if ((size - *off) < sizeof(info))
+		return 0;
+
+	memset(&info, 0, sizeof(info));
+	/* Get the oldest event from the queue */
+	while (!__pending_event(dev, &info)) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
+				__pending_event(dev, &info), 0);
+		if (err)
+			return err;
+		if (info.timestamp)
+			break;
+	}
+
+	if (copy_to_user(buf + *off, &info, sizeof(info)))
+		return -EFAULT;
+
+	return sizeof(info);
+}
+
+static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
+		struct nvgpu_gpu_set_event_filter_args *args)
+{
+	struct gk20a *g = dev->session->g;
+	u32 mask;
+
+	nvgpu_log(g, gpu_dbg_fn, " ");
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->size != 1)
+		return -EINVAL;
+
+	if (copy_from_user(&mask, (void __user *) args->buffer,
+			args->size * sizeof(u32)))
+		return -EFAULT;
+
+	/* update alarm mask */
+	nvgpu_atomic_set(&dev->enabled_mask, mask);
+
+	return 0;
+}
+
+static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct nvgpu_clk_dev *dev = filp->private_data;
+	struct gk20a *g = dev->session->g;
+	u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
+		|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
+		return -EINVAL;
+
+	BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case NVGPU_EVENT_IOCTL_SET_FILTER:
+		err = nvgpu_clk_arb_set_event_filter(dev,
+				(struct nvgpu_gpu_set_event_filter_args *)buf);
+		break;
+	default:
+		nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
+		err = -ENOTTY;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
+
+	return err;
+}
+
+static const struct file_operations completion_dev_ops = {
+	.owner = THIS_MODULE,
+	.release = nvgpu_clk_arb_release_completion_dev,
+	.poll = nvgpu_clk_arb_poll_dev,
+};
+
+static const struct file_operations event_dev_ops = {
+	.owner = THIS_MODULE,
+	.release = nvgpu_clk_arb_release_event_dev,
+	.poll = nvgpu_clk_arb_poll_dev,
+	.read = nvgpu_clk_arb_read_event_dev,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
+#endif
+	.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
+};
+
+static int nvgpu_clk_arb_install_fd(struct gk20a *g,
+		struct nvgpu_clk_session *session,
+		const struct file_operations *fops,
+		struct nvgpu_clk_dev **_dev)
+{
+	struct file *file;
+	int fd;
+	int err;
+	int status;
+	char name[64];
+	struct nvgpu_clk_dev *dev;
+
+	clk_arb_dbg(g, " ");
+
+	dev = nvgpu_kzalloc(g, sizeof(*dev));
+	if (!dev)
+		return -ENOMEM;
+
+	status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
+		DEFAULT_EVENT_NUMBER);
+	if (status < 0)  {
+		err = status;
+		goto fail;
+	}
+
+	fd = get_unused_fd_flags(O_RDWR);
+	if (fd < 0) {
+		err = fd;
+		goto fail;
+	}
+
+	snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
+	file = anon_inode_getfile(name, fops, dev, O_RDWR);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto fail_fd;
+	}
+
+	fd_install(fd, file);
+
+	nvgpu_cond_init(&dev->readout_wq);
+
+	nvgpu_atomic_set(&dev->poll_mask, 0);
+
+	dev->session = session;
+	nvgpu_ref_init(&dev->refcount);
+
+	nvgpu_ref_get(&session->refcount);
+
+	*_dev = dev;
+
+	return fd;
+
+fail_fd:
+	put_unused_fd(fd);
+fail:
+	nvgpu_kfree(g, dev);
+
+	return err;
+}
+
+int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_dev *dev;
+	int fd;
+
+	clk_arb_dbg(g, " ");
+
+	fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
+	if (fd < 0)
+		return fd;
+
+	/* TODO: alarm mask needs to be set to default value to prevent
+	 * failures of legacy tests. This will be removed when sanity is
+	 * updated
+	 */
+	if (alarm_mask)
+		nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
+	else
+		nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
+
+	dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
+
+	nvgpu_spinlock_acquire(&arb->users_lock);
+	nvgpu_list_add_tail(&dev->link, &arb->users);
+	nvgpu_spinlock_release(&arb->users_lock);
+
+	*event_fd = fd;
+
+	return 0;
+}
+
+int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int *request_fd)
+{
+	struct nvgpu_clk_dev *dev;
+	int fd;
+
+	clk_arb_dbg(g, " ");
+
+	fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
+	if (fd < 0)
+		return fd;
+
+	*request_fd = fd;
+
+	return 0;
+}
+
+int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
+	struct nvgpu_clk_session *session, int request_fd)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_dev *dev;
+	struct fd fd;
+	int err = 0;
+
+	clk_arb_dbg(g, " ");
+
+	fd  = fdget(request_fd);
+	if (!fd.file)
+		return -EINVAL;
+
+	if (fd.file->f_op != &completion_dev_ops) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
+
+	dev = (struct nvgpu_clk_dev *) fd.file->private_data;
+
+	if (!dev || dev->session != session) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
+	nvgpu_ref_get(&dev->refcount);
+	nvgpu_spinlock_acquire(&session->session_lock);
+	nvgpu_list_add(&dev->node, &session->targets);
+	nvgpu_spinlock_release(&session->session_lock);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+
+fdput_fd:
+	fdput(fd);
+	return err;
+}
+
+int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
+		int request_fd, u32 api_domain, u16 target_mhz)
+{
+	struct nvgpu_clk_dev *dev;
+	struct fd fd;
+	int err = 0;
+
+	clk_arb_dbg(session->g,
+			"domain=0x%08x target_mhz=%u", api_domain, target_mhz);
+
+	fd = fdget(request_fd);
+	if (!fd.file)
+		return -EINVAL;
+
+	if (fd.file->f_op != &completion_dev_ops) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
+
+	dev = fd.file->private_data;
+	if (!dev || dev->session != session) {
+		err = -EINVAL;
+		goto fdput_fd;
+	}
+
+	switch (api_domain) {
+	case NVGPU_CLK_DOMAIN_MCLK:
+		dev->mclk_target_mhz = target_mhz;
+		break;
+
+	case NVGPU_CLK_DOMAIN_GPCCLK:
+		dev->gpc2clk_target_mhz = target_mhz * 2ULL;
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+fdput_fd:
+	fdput(fd);
+	return err;
+}
+
+u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
+{
+	u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
+	u32 api_domains = 0;
+
+	if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
+		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
+
+	if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
+		api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
+
+	return api_domains;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_arb_debug *debug;
+
+	u64 num;
+	s64 tmp, avg, std, max, min;
+
+	debug = NV_ACCESS_ONCE(arb->debug);
+	/* Make copy of structure and ensure no reordering */
+	nvgpu_smp_rmb();
+	if (!debug)
+		return -EINVAL;
+
+	std = debug->switch_std;
+	avg = debug->switch_avg;
+	max = debug->switch_max;
+	min = debug->switch_min;
+	num = debug->switch_num;
+
+	tmp = std;
+	do_div(tmp, num);
+	seq_printf(s, "Number of transitions: %lld\n",
+		num);
+	seq_printf(s, "max / min : %lld / %lld usec\n",
+		max, min);
+	seq_printf(s, "avg / std : %lld / %ld usec\n",
+		avg, int_sqrt(tmp));
+
+	return 0;
+}
+
+static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
+}
+
+static const struct file_operations nvgpu_clk_arb_stats_fops = {
+	.open		= nvgpu_clk_arb_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct dentry *gpu_root = l->debugfs;
+	struct dentry *d;
+
+	nvgpu_log(g, gpu_dbg_info, "g=%p", g);
+
+	d = debugfs_create_file(
+			"arb_stats",
+			S_IRUGO,
+			gpu_root,
+			g,
+			&nvgpu_clk_arb_stats_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
new file mode 100644
index 00000000..73a8131d
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -0,0 +1,1962 @@
+/*
+ * Copyright (c) 2011-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/cdev.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/fs.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/bitops.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/ptimer.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/sizes.h>
+
+#include <nvgpu/linux/vidmem.h>
+
+#include "ioctl_ctrl.h"
+#include "ioctl_dbg.h"
+#include "ioctl_as.h"
+#include "ioctl_tsg.h"
+#include "ioctl_channel.h"
+#include "gk20a/gk20a.h"
+#include "gk20a/fence_gk20a.h"
+
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "dmabuf.h"
+#include "channel.h"
+
+#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \
+	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
+#define MHZ_TO_HZ(a) ((u64)a * MHZ)
+
+struct gk20a_ctrl_priv {
+	struct device *dev;
+	struct gk20a *g;
+	struct nvgpu_clk_session *clk_session;
+};
+
+static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags)
+{
+	u32 core_flags = 0;
+
+	if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED)
+		core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED;
+
+	return core_flags;
+}
+
+int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l;
+	struct gk20a *g;
+	struct gk20a_ctrl_priv *priv;
+	int err = 0;
+
+	l = container_of(inode->i_cdev,
+			 struct nvgpu_os_linux, ctrl.cdev);
+	g = gk20a_get(&l->g);
+	if (!g)
+		return -ENODEV;
+
+	nvgpu_log_fn(g, " ");
+
+	priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv));
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
+	filp->private_data = priv;
+	priv->dev = dev_from_gk20a(g);
+	/*
+	 * We dont close the arbiter fd's after driver teardown to support
+	 * GPU_LOST events, so we store g here, instead of dereferencing the
+	 * dev structure on teardown
+	 */
+	priv->g = g;
+
+	if (!g->sw_ready) {
+		err = gk20a_busy(g);
+		if (err)
+			goto free_ref;
+		gk20a_idle(g);
+	}
+
+	err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
+free_ref:
+	if (err)
+		gk20a_put(g);
+	return err;
+}
+int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_ctrl_priv *priv = filp->private_data;
+	struct gk20a *g = priv->g;
+
+	nvgpu_log_fn(g, " ");
+
+	if (priv->clk_session)
+		nvgpu_clk_arb_release_session(g, priv->clk_session);
+
+	gk20a_put(g);
+	nvgpu_kfree(g, priv);
+
+	return 0;
+}
+
+struct nvgpu_flags_mapping {
+	u64 ioctl_flag;
+	int enabled_flag;
+};
+
+static struct nvgpu_flags_mapping flags_mapping[] = {
+	{NVGPU_GPU_FLAGS_HAS_SYNCPOINTS,
+		NVGPU_HAS_SYNCPOINTS},
+	{NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS,
+		NVGPU_SUPPORT_PARTIAL_MAPPINGS},
+	{NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS,
+		NVGPU_SUPPORT_SPARSE_ALLOCS},
+	{NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS,
+		NVGPU_SUPPORT_SYNC_FENCE_FDS},
+	{NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS,
+		NVGPU_SUPPORT_CYCLE_STATS},
+	{NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT,
+		NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT},
+	{NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS,
+		NVGPU_SUPPORT_USERSPACE_MANAGED_AS},
+	{NVGPU_GPU_FLAGS_SUPPORT_TSG,
+		NVGPU_SUPPORT_TSG},
+	{NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS,
+		NVGPU_SUPPORT_CLOCK_CONTROLS},
+	{NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE,
+		NVGPU_SUPPORT_GET_VOLTAGE},
+	{NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT,
+		NVGPU_SUPPORT_GET_CURRENT},
+	{NVGPU_GPU_FLAGS_SUPPORT_GET_POWER,
+		NVGPU_SUPPORT_GET_POWER},
+	{NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE,
+		NVGPU_SUPPORT_GET_TEMPERATURE},
+	{NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT,
+		NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT},
+	{NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS,
+		NVGPU_SUPPORT_DEVICE_EVENTS},
+	{NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE,
+		NVGPU_SUPPORT_FECS_CTXSW_TRACE},
+	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING,
+		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
+	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
+		NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
+	{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
+		NVGPU_SUPPORT_DETERMINISTIC_OPTS},
+	{NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS,
+		NVGPU_SUPPORT_SYNCPOINT_ADDRESS},
+	{NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT,
+		NVGPU_SUPPORT_USER_SYNCPOINT},
+	{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
+		NVGPU_SUPPORT_IO_COHERENCE},
+	{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
+		NVGPU_SUPPORT_RESCHEDULE_RUNLIST},
+	{NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL,
+		NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL},
+	{NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF,
+		NVGPU_ECC_ENABLED_SM_LRF},
+	{NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM,
+		NVGPU_ECC_ENABLED_SM_SHM},
+	{NVGPU_GPU_FLAGS_ECC_ENABLED_TEX,
+		NVGPU_ECC_ENABLED_TEX},
+	{NVGPU_GPU_FLAGS_ECC_ENABLED_LTC,
+		NVGPU_ECC_ENABLED_LTC},
+	{NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS,
+		NVGPU_SUPPORT_TSG_SUBCONTEXTS},
+	{NVGPU_GPU_FLAGS_SUPPORT_SCG,
+		NVGPU_SUPPORT_SCG},
+	{NVGPU_GPU_FLAGS_SUPPORT_VPR,
+		NVGPU_SUPPORT_VPR},
+};
+
+static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
+{
+	unsigned int i;
+	u64 ioctl_flags = 0;
+
+	for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) {
+		if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag))
+			ioctl_flags |= flags_mapping[i].ioctl_flag;
+	}
+
+	return ioctl_flags;
+}
+
+static void nvgpu_set_preemption_mode_flags(struct gk20a *g,
+	struct nvgpu_gpu_characteristics *gpu)
+{
+	struct nvgpu_preemption_modes_rec preemption_mode_rec;
+
+	g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec);
+
+	gpu->graphics_preemption_mode_flags =
+		nvgpu_get_ioctl_graphics_preempt_mode_flags(
+			preemption_mode_rec.graphics_preemption_mode_flags);
+	gpu->compute_preemption_mode_flags =
+		nvgpu_get_ioctl_compute_preempt_mode_flags(
+			preemption_mode_rec.compute_preemption_mode_flags);
+
+	gpu->default_graphics_preempt_mode =
+		nvgpu_get_ioctl_graphics_preempt_mode(
+			preemption_mode_rec.default_graphics_preempt_mode);
+	gpu->default_compute_preempt_mode =
+		nvgpu_get_ioctl_compute_preempt_mode(
+			preemption_mode_rec.default_compute_preempt_mode);
+}
+
+static long
+gk20a_ctrl_ioctl_gpu_characteristics(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_characteristics *request)
+{
+	struct nvgpu_gpu_characteristics gpu;
+	long err = 0;
+
+	if (gk20a_busy(g)) {
+		nvgpu_err(g, "failed to power on gpu");
+		return -EINVAL;
+	}
+
+	memset(&gpu, 0, sizeof(gpu));
+
+	gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g);
+	gpu.on_board_video_memory_size = 0; /* integrated GPU */
+
+	gpu.num_gpc = g->gr.gpc_count;
+	gpu.max_gpc_count = g->gr.max_gpc_count;
+
+	gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
+
+	gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
+
+	gpu.compression_page_size = g->ops.fb.compression_page_size(g);
+
+	gpu.gpc_mask = (1 << g->gr.gpc_count)-1;
+
+	gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g);
+
+	gpu.arch = g->params.gpu_arch;
+	gpu.impl = g->params.gpu_impl;
+	gpu.rev = g->params.gpu_rev;
+	gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT;
+	gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ?
+		NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0;
+	gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS);
+	gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS);
+	gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS);
+	gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS);
+	gpu.inline_to_memory_class =
+		g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS);
+	gpu.dma_copy_class =
+		g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
+
+	gpu.vbios_version = g->bios.vbios_version;
+	gpu.vbios_oem_version = g->bios.vbios_oem_version;
+
+	gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
+	gpu.pde_coverage_bit_count =
+		g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
+	gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
+
+	gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
+	gpu.sm_arch_spa_version = g->params.sm_arch_spa_version;
+	gpu.sm_arch_warp_count = g->params.sm_arch_warp_count;
+
+	gpu.max_css_buffer_size = g->gr.max_css_buffer_size;
+
+	gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
+	gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
+	gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
+	gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
+	gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
+	gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST;
+	gpu.gpu_va_bit_count = 40;
+
+	strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
+	gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g);
+	gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
+	gpu.max_ltc_per_fbp =  g->ops.gr.get_max_ltc_per_fbp(g);
+	gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
+	gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
+	gpu.gr_gobs_per_comptagline_per_slice =
+		g->gr.gobs_per_comptagline_per_slice;
+	gpu.num_ltc = g->ltc_count;
+	gpu.lts_per_ltc = g->gr.slices_per_ltc;
+	gpu.cbc_cache_line_size = g->gr.cacheline_size;
+	gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline;
+
+	if (g->ops.clk.get_maxrate)
+		gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK);
+
+	gpu.local_video_memory_size = g->mm.vidmem.size;
+
+	gpu.pci_vendor_id = g->pci_vendor_id;
+	gpu.pci_device_id = g->pci_device_id;
+	gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id;
+	gpu.pci_subsystem_device_id = g->pci_subsystem_device_id;
+	gpu.pci_class = g->pci_class;
+	gpu.pci_revision = g->pci_revision;
+
+	nvgpu_set_preemption_mode_flags(g, &gpu);
+
+	if (request->gpu_characteristics_buf_size > 0) {
+		size_t write_size = sizeof(gpu);
+
+		if (write_size > request->gpu_characteristics_buf_size)
+			write_size = request->gpu_characteristics_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   request->gpu_characteristics_buf_addr,
+				   &gpu, write_size);
+	}
+
+	if (err == 0)
+		request->gpu_characteristics_buf_size = sizeof(gpu);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int gk20a_ctrl_prepare_compressible_read(
+		struct gk20a *g,
+		struct nvgpu_gpu_prepare_compressible_read_args *args)
+{
+	int ret = -ENOSYS;
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct nvgpu_channel_fence fence;
+	struct gk20a_fence *fence_out = NULL;
+	int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
+		args->submit_flags);
+	int fd = -1;
+
+	fence.id = args->fence.syncpt_id;
+	fence.value = args->fence.syncpt_value;
+
+	/* Try and allocate an fd here*/
+	if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
+		&& (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) {
+			fd = get_unused_fd_flags(O_RDWR);
+			if (fd < 0)
+				return fd;
+	}
+
+	ret = gk20a_prepare_compressible_read(l, args->handle,
+			args->request_compbits, args->offset,
+			args->compbits_hoffset, args->compbits_voffset,
+			args->scatterbuffer_offset,
+			args->width, args->height, args->block_height_log2,
+			submit_flags, &fence, &args->valid_compbits,
+			&args->zbc_color, &fence_out);
+
+	if (ret) {
+		if (fd != -1)
+			put_unused_fd(fd);
+		return ret;
+	}
+
+	/* Convert fence_out to something we can pass back to user space. */
+	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
+		if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
+			if (fence_out) {
+				ret = gk20a_fence_install_fd(fence_out, fd);
+				if (ret)
+					put_unused_fd(fd);
+				else
+					args->fence.fd = fd;
+			} else {
+				args->fence.fd = -1;
+				put_unused_fd(fd);
+			}
+		} else {
+			if (fence_out) {
+				args->fence.syncpt_id = fence_out->syncpt_id;
+				args->fence.syncpt_value =
+						fence_out->syncpt_value;
+			} else {
+				args->fence.syncpt_id = -1;
+				args->fence.syncpt_value = 0;
+			}
+		}
+	}
+	gk20a_fence_put(fence_out);
+#endif
+
+	return ret;
+}
+
+static int gk20a_ctrl_mark_compressible_write(
+		struct gk20a *g,
+		struct nvgpu_gpu_mark_compressible_write_args *args)
+{
+	int ret = -ENOSYS;
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	ret = gk20a_mark_compressible_write(g, args->handle,
+			args->valid_compbits, args->offset, args->zbc_color);
+#endif
+
+	return ret;
+}
+
+static int gk20a_ctrl_alloc_as(
+		struct gk20a *g,
+		struct nvgpu_alloc_as_args *args)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_as_share *as_share;
+	int err;
+	int fd;
+	struct file *file;
+	char name[64];
+
+	err = get_unused_fd_flags(O_RDWR);
+	if (err < 0)
+		return err;
+	fd = err;
+
+	snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd);
+
+	file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto clean_up;
+	}
+
+	err = gk20a_as_alloc_share(g, args->big_page_size,
+				   gk20a_as_translate_as_alloc_flags(g,
+					   args->flags),
+				   &as_share);
+	if (err)
+		goto clean_up_file;
+
+	fd_install(fd, file);
+	file->private_data = as_share;
+
+	args->as_fd = fd;
+	return 0;
+
+clean_up_file:
+	fput(file);
+clean_up:
+	put_unused_fd(fd);
+	return err;
+}
+
+static int gk20a_ctrl_open_tsg(struct gk20a *g,
+			       struct nvgpu_gpu_open_tsg_args *args)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int err;
+	int fd;
+	struct file *file;
+	char name[64];
+
+	err = get_unused_fd_flags(O_RDWR);
+	if (err < 0)
+		return err;
+	fd = err;
+
+	snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd);
+
+	file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto clean_up;
+	}
+
+	err = nvgpu_ioctl_tsg_open(g, file);
+	if (err)
+		goto clean_up_file;
+
+	fd_install(fd, file);
+	args->tsg_fd = fd;
+	return 0;
+
+clean_up_file:
+	fput(file);
+clean_up:
+	put_unused_fd(fd);
+	return err;
+}
+
+static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
+				    struct nvgpu_gpu_get_tpc_masks_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
+
+	if (args->mask_buf_size > 0) {
+		size_t write_size = gpc_tpc_mask_size;
+
+		if (write_size > args->mask_buf_size)
+			write_size = args->mask_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   args->mask_buf_addr,
+				   gr->gpc_tpc_mask, write_size);
+	}
+
+	if (err == 0)
+		args->mask_buf_size = gpc_tpc_mask_size;
+
+	return err;
+}
+
+static int gk20a_ctrl_get_fbp_l2_masks(
+	struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+	const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
+
+	if (args->mask_buf_size > 0) {
+		size_t write_size = fbp_l2_mask_size;
+
+		if (write_size > args->mask_buf_size)
+			write_size = args->mask_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   args->mask_buf_addr,
+				   gr->fbp_rop_l2_en_mask, write_size);
+	}
+
+	if (err == 0)
+		args->mask_buf_size = fbp_l2_mask_size;
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
+		struct nvgpu_gpu_l2_fb_args *args)
+{
+	int err = 0;
+
+	if ((!args->l2_flush && !args->fb_flush) ||
+	    (!args->l2_flush && args->l2_invalidate))
+		return -EINVAL;
+
+	if (args->l2_flush)
+		g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
+
+	if (args->fb_flush)
+		g->ops.mm.fb_flush(g);
+
+	return err;
+}
+
+/* Invalidate i-cache for kepler & maxwell */
+static int nvgpu_gpu_ioctl_inval_icache(
+		struct gk20a *g,
+		struct nvgpu_gpu_inval_icache_args *args)
+{
+	struct channel_gk20a *ch;
+	int err;
+
+	ch = gk20a_get_channel_from_file(args->channel_fd);
+	if (!ch)
+		return -EINVAL;
+
+	/* Take the global lock, since we'll be doing global regops */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	err = g->ops.gr.inval_icache(g, ch);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_channel_put(ch);
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
+		struct gk20a *g,
+		struct nvgpu_gpu_mmu_debug_mode_args *args)
+{
+	if (gk20a_busy(g)) {
+		nvgpu_err(g, "failed to power on gpu");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	g->ops.fb.set_debug_mode(g, args->state == 1);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_idle(g);
+	return 0;
+}
+
+static int nvgpu_gpu_ioctl_set_debug_mode(
+		struct gk20a *g,
+		struct nvgpu_gpu_sm_debug_mode_args *args)
+{
+	struct channel_gk20a *ch;
+	int err;
+
+	ch = gk20a_get_channel_from_file(args->channel_fd);
+	if (!ch)
+		return -EINVAL;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	if (g->ops.gr.set_sm_debug_mode)
+		err = g->ops.gr.set_sm_debug_mode(g, ch,
+				args->sms, !!args->enable);
+	else
+		err = -ENOSYS;
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_channel_put(ch);
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
+{
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+	    return err;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	err = g->ops.gr.trigger_suspend(g);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
+		struct nvgpu_gpu_wait_pause_args *args)
+{
+	int err;
+	struct warpstate *ioctl_w_state;
+	struct nvgpu_warpstate *w_state = NULL;
+	u32 sm_count, ioctl_size, size, sm_id;
+
+	sm_count = g->gr.gpc_count * g->gr.tpc_count;
+
+	ioctl_size = sm_count * sizeof(struct warpstate);
+	ioctl_w_state = nvgpu_kzalloc(g, ioctl_size);
+	if (!ioctl_w_state)
+		return -ENOMEM;
+
+	size = sm_count * sizeof(struct nvgpu_warpstate);
+	w_state = nvgpu_kzalloc(g, size);
+	if (!w_state) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	err = gk20a_busy(g);
+	if (err)
+		goto out_free;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	g->ops.gr.wait_for_pause(g, w_state);
+
+	for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
+		ioctl_w_state[sm_id].valid_warps[0] =
+			w_state[sm_id].valid_warps[0];
+		ioctl_w_state[sm_id].valid_warps[1] =
+			w_state[sm_id].valid_warps[1];
+		ioctl_w_state[sm_id].trapped_warps[0] =
+			w_state[sm_id].trapped_warps[0];
+		ioctl_w_state[sm_id].trapped_warps[1] =
+			w_state[sm_id].trapped_warps[1];
+		ioctl_w_state[sm_id].paused_warps[0] =
+			w_state[sm_id].paused_warps[0];
+		ioctl_w_state[sm_id].paused_warps[1] =
+			w_state[sm_id].paused_warps[1];
+	}
+	/* Copy to user space - pointed by "args->pwarpstate" */
+	if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate,
+	    w_state, ioctl_size)) {
+		nvgpu_log_fn(g, "copy_to_user failed!");
+		err = -EFAULT;
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_idle(g);
+
+out_free:
+	nvgpu_kfree(g, w_state);
+	nvgpu_kfree(g, ioctl_w_state);
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
+{
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+	    return err;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	err = g->ops.gr.resume_from_pause(g);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
+{
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = g->ops.gr.clear_sm_errors(g);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_ioctl_has_any_exception(
+		struct gk20a *g,
+		struct nvgpu_gpu_tpc_exception_en_status_args *args)
+{
+	u32 tpc_exception_en;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	args->tpc_exception_en_sm_mask = tpc_exception_en;
+
+	return 0;
+}
+
+static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
+				    struct nvgpu_gpu_num_vsms *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	args->num_vsms = gr->no_of_sm;
+	return 0;
+}
+
+static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
+				    struct nvgpu_gpu_vsms_mapping *args)
+{
+	int err = 0;
+	struct gr_gk20a *gr = &g->gr;
+	size_t write_size = gr->no_of_sm *
+				sizeof(struct nvgpu_gpu_vsms_mapping_entry);
+	struct nvgpu_gpu_vsms_mapping_entry *vsms_buf;
+	u32 i;
+
+	vsms_buf = nvgpu_kzalloc(g, write_size);
+	if (vsms_buf == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < gr->no_of_sm; i++) {
+		vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index;
+		if (g->ops.gr.get_nonpes_aware_tpc)
+			vsms_buf[i].tpc_index =
+				g->ops.gr.get_nonpes_aware_tpc(g,
+					gr->sm_to_cluster[i].gpc_index,
+					gr->sm_to_cluster[i].tpc_index);
+		else
+			vsms_buf[i].tpc_index =
+				gr->sm_to_cluster[i].tpc_index;
+	}
+
+	err = copy_to_user((void __user *)(uintptr_t)
+			   args->vsms_map_buf_addr,
+			   vsms_buf, write_size);
+	nvgpu_kfree(g, vsms_buf);
+
+	return err;
+}
+
+static int nvgpu_gpu_get_cpu_time_correlation_info(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_cpu_time_correlation_info_args *args)
+{
+	struct nvgpu_cpu_time_correlation_sample *samples;
+	int err;
+	u32 i;
+
+	if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT ||
+	    args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC)
+		return -EINVAL;
+
+	samples = nvgpu_kzalloc(g, args->count *
+		sizeof(struct nvgpu_cpu_time_correlation_sample));
+	if (!samples) {
+		return -ENOMEM;
+	}
+
+	err = g->ops.ptimer.get_timestamps_zipper(g,
+			args->source_id, args->count, samples);
+	if (!err) {
+		for (i = 0; i < args->count; i++) {
+			args->samples[i].cpu_timestamp = samples[i].cpu_timestamp;
+			args->samples[i].gpu_timestamp = samples[i].gpu_timestamp;
+		}
+	}
+
+	nvgpu_kfree(g, samples);
+
+	return err;
+}
+
+static int nvgpu_gpu_get_gpu_time(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_gpu_time_args *args)
+{
+	u64 time;
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = g->ops.ptimer.read_ptimer(g, &time);
+	if (!err)
+		args->gpu_timestamp = time;
+
+	gk20a_idle(g);
+	return err;
+}
+
+static int nvgpu_gpu_get_engine_info(
+	struct gk20a *g,
+	struct nvgpu_gpu_get_engine_info_args *args)
+{
+	int err = 0;
+	u32 engine_enum = ENGINE_INVAL_GK20A;
+	u32 report_index = 0;
+	u32 engine_id_idx;
+	const u32 max_buffer_engines = args->engine_info_buf_size /
+		sizeof(struct nvgpu_gpu_get_engine_info_item);
+	struct nvgpu_gpu_get_engine_info_item __user *dst_item_list =
+		(void __user *)(uintptr_t)args->engine_info_buf_addr;
+
+	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines;
+		++engine_id_idx) {
+		u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx];
+		const struct fifo_engine_info_gk20a *src_info =
+			&g->fifo.engine_info[active_engine_id];
+		struct nvgpu_gpu_get_engine_info_item dst_info;
+
+		memset(&dst_info, 0, sizeof(dst_info));
+
+		engine_enum = src_info->engine_enum;
+
+		switch (engine_enum) {
+		case ENGINE_GR_GK20A:
+			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR;
+			break;
+
+		case ENGINE_GRCE_GK20A:
+			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY;
+			break;
+
+		case ENGINE_ASYNC_CE_GK20A:
+			dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY;
+			break;
+
+		default:
+			nvgpu_err(g, "Unmapped engine enum %u",
+				  engine_enum);
+			continue;
+		}
+
+		dst_info.engine_instance = src_info->inst_id;
+		dst_info.runlist_id = src_info->runlist_id;
+
+		if (report_index < max_buffer_engines) {
+			err = copy_to_user(&dst_item_list[report_index],
+					   &dst_info, sizeof(dst_info));
+			if (err)
+				goto clean_up;
+		}
+
+		++report_index;
+	}
+
+	args->engine_info_buf_size =
+		report_index * sizeof(struct nvgpu_gpu_get_engine_info_item);
+
+clean_up:
+	return err;
+}
+
+static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
+			struct nvgpu_gpu_alloc_vidmem_args *args)
+{
+	u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
+	int fd;
+
+	nvgpu_log_fn(g, " ");
+
+	/* not yet supported */
+	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
+		return -EINVAL;
+
+	/* not yet supported */
+	if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
+		return -EINVAL;
+
+	if (args->in.size & (SZ_4K - 1))
+		return -EINVAL;
+
+	if (!args->in.size)
+		return -EINVAL;
+
+	if (align & (align - 1))
+		return -EINVAL;
+
+	if (align > roundup_pow_of_two(args->in.size)) {
+		/* log this special case, buddy allocator detail */
+		nvgpu_warn(g,
+			"alignment larger than buffer size rounded up to power of 2 is not supported");
+		return -EINVAL;
+	}
+
+	fd = nvgpu_vidmem_export_linux(g, args->in.size);
+	if (fd < 0)
+		return fd;
+
+	args->out.dmabuf_fd = fd;
+
+	nvgpu_log_fn(g, "done, fd=%d", fd);
+
+	return 0;
+}
+
+static int nvgpu_gpu_get_memory_state(struct gk20a *g,
+			struct nvgpu_gpu_get_memory_state_args *args)
+{
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved[0] || args->reserved[1] ||
+	    args->reserved[2] || args->reserved[3])
+		return -EINVAL;
+
+	err = nvgpu_vidmem_get_space(g, &args->total_free_bytes);
+
+	nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes);
+
+	return err;
+}
+
+static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain)
+{
+	u32 domain = 0;
+
+	if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK)
+		domain = NVGPU_CLK_DOMAIN_MCLK;
+	else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK)
+		domain = NVGPU_CLK_DOMAIN_GPCCLK;
+	else
+		domain = NVGPU_CLK_DOMAIN_MAX + 1;
+
+	return domain;
+}
+
+static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
+		struct gk20a_ctrl_priv *priv,
+		struct nvgpu_gpu_clk_vf_points_args *args)
+{
+	struct nvgpu_gpu_clk_vf_point clk_point;
+	struct nvgpu_gpu_clk_vf_point __user *entry;
+	struct nvgpu_clk_session *session = priv->clk_session;
+	u32 clk_domains = 0;
+	int err;
+	u16 last_mhz;
+	u16 *fpoints;
+	u32 i;
+	u32 max_points = 0;
+	u32 num_points = 0;
+	u16 min_mhz;
+	u16 max_mhz;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!session || args->flags)
+		return -EINVAL;
+
+	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
+	args->num_entries = 0;
+
+	if (!nvgpu_clk_arb_is_valid_domain(g,
+				nvgpu_gpu_convert_clk_domain(args->clk_domain)))
+		return -EINVAL;
+
+	err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
+			nvgpu_gpu_convert_clk_domain(args->clk_domain),
+			&max_points, NULL);
+	if (err)
+		return err;
+
+	if (!args->max_entries) {
+		args->max_entries = max_points;
+		return 0;
+	}
+
+	if (args->max_entries < max_points)
+		return -EINVAL;
+
+	err = nvgpu_clk_arb_get_arbiter_clk_range(g,
+			nvgpu_gpu_convert_clk_domain(args->clk_domain),
+			&min_mhz, &max_mhz);
+	if (err)
+		return err;
+
+	fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16));
+	if (!fpoints)
+		return -ENOMEM;
+
+	err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
+			nvgpu_gpu_convert_clk_domain(args->clk_domain),
+			&max_points, fpoints);
+	if (err)
+		goto fail;
+
+	entry = (struct nvgpu_gpu_clk_vf_point __user *)
+			(uintptr_t)args->clk_vf_point_entries;
+
+	last_mhz = 0;
+	num_points = 0;
+	for (i = 0; (i < max_points) && !err; i++) {
+
+		/* filter out duplicate frequencies */
+		if (fpoints[i] == last_mhz)
+			continue;
+
+		/* filter out out-of-range frequencies */
+		if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz))
+			continue;
+
+		last_mhz = fpoints[i];
+		clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]);
+
+		err = copy_to_user((void __user *)entry, &clk_point,
+				sizeof(clk_point));
+
+		num_points++;
+		entry++;
+	}
+
+	args->num_entries = num_points;
+
+fail:
+	nvgpu_kfree(g, fpoints);
+	return err;
+}
+
+static int nvgpu_gpu_clk_get_range(struct gk20a *g,
+		struct gk20a_ctrl_priv *priv,
+		struct nvgpu_gpu_clk_range_args *args)
+{
+	struct nvgpu_gpu_clk_range clk_range;
+	struct nvgpu_gpu_clk_range __user *entry;
+	struct nvgpu_clk_session *session = priv->clk_session;
+
+	u32 clk_domains = 0;
+	u32 num_domains;
+	u32 num_entries;
+	u32 i;
+	int bit;
+	int err;
+	u16 min_mhz, max_mhz;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!session)
+		return -EINVAL;
+
+	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
+	num_domains = hweight_long(clk_domains);
+
+	if (!args->flags) {
+		if (!args->num_entries) {
+			args->num_entries = num_domains;
+			return 0;
+		}
+
+		if (args->num_entries < num_domains)
+			return -EINVAL;
+
+		args->num_entries = 0;
+		num_entries = num_domains;
+
+	} else {
+		if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
+			return -EINVAL;
+
+		num_entries = args->num_entries;
+		if (num_entries > num_domains)
+			return -EINVAL;
+	}
+
+	entry = (struct nvgpu_gpu_clk_range __user *)
+			(uintptr_t)args->clk_range_entries;
+
+	for (i = 0; i < num_entries; i++, entry++) {
+
+		if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
+			if (copy_from_user(&clk_range, (void __user *)entry,
+					sizeof(clk_range)))
+				return -EFAULT;
+		} else {
+			bit = ffs(clk_domains) - 1;
+			clk_range.clk_domain = bit;
+			clk_domains &= ~BIT(bit);
+		}
+
+		clk_range.flags = 0;
+		err = nvgpu_clk_arb_get_arbiter_clk_range(g,
+				nvgpu_gpu_convert_clk_domain(clk_range.clk_domain),
+				&min_mhz, &max_mhz);
+		clk_range.min_hz = MHZ_TO_HZ(min_mhz);
+		clk_range.max_hz = MHZ_TO_HZ(max_mhz);
+
+		if (err)
+			return err;
+
+		err = copy_to_user(entry, &clk_range, sizeof(clk_range));
+		if (err)
+			return -EFAULT;
+	}
+
+	args->num_entries = num_entries;
+
+	return 0;
+}
+
+static int nvgpu_gpu_clk_set_info(struct gk20a *g,
+		struct gk20a_ctrl_priv *priv,
+		struct nvgpu_gpu_clk_set_info_args *args)
+{
+	struct nvgpu_gpu_clk_info clk_info;
+	struct nvgpu_gpu_clk_info __user *entry;
+	struct nvgpu_clk_session *session = priv->clk_session;
+
+	int fd;
+	u32 clk_domains = 0;
+	u16 freq_mhz;
+	int i;
+	int ret;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!session || args->flags)
+		return -EINVAL;
+
+	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
+	if (!clk_domains)
+		return -EINVAL;
+
+	entry = (struct nvgpu_gpu_clk_info __user *)
+			(uintptr_t)args->clk_info_entries;
+
+	for (i = 0; i < args->num_entries; i++, entry++) {
+
+		if (copy_from_user(&clk_info, entry, sizeof(clk_info)))
+			return -EFAULT;
+
+		if (!nvgpu_clk_arb_is_valid_domain(g,
+					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
+			return -EINVAL;
+	}
+
+	entry = (struct nvgpu_gpu_clk_info __user *)
+			(uintptr_t)args->clk_info_entries;
+
+	ret = nvgpu_clk_arb_install_request_fd(g, session, &fd);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->num_entries; i++, entry++) {
+
+		if (copy_from_user(&clk_info, (void __user *)entry,
+				sizeof(clk_info)))
+			return -EFAULT;
+		freq_mhz = HZ_TO_MHZ(clk_info.freq_hz);
+
+		nvgpu_clk_arb_set_session_target_mhz(session, fd,
+				nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
+	}
+
+	ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
+	if (ret < 0)
+		return ret;
+
+	args->completion_fd = fd;
+
+	return ret;
+}
+
+static int nvgpu_gpu_clk_get_info(struct gk20a *g,
+		struct gk20a_ctrl_priv *priv,
+		struct nvgpu_gpu_clk_get_info_args *args)
+{
+	struct nvgpu_gpu_clk_info clk_info;
+	struct nvgpu_gpu_clk_info __user *entry;
+	struct nvgpu_clk_session *session = priv->clk_session;
+	u32 clk_domains = 0;
+	u32 num_domains;
+	u32 num_entries;
+	u32 i;
+	u16 freq_mhz;
+	int err;
+	int bit;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!session)
+		return -EINVAL;
+
+	clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
+	num_domains = hweight_long(clk_domains);
+
+	if (!args->flags) {
+		if (!args->num_entries) {
+			args->num_entries = num_domains;
+			return 0;
+		}
+
+		if (args->num_entries < num_domains)
+			return -EINVAL;
+
+		args->num_entries = 0;
+		num_entries = num_domains;
+
+	} else {
+		if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS)
+			return -EINVAL;
+
+		num_entries = args->num_entries;
+		if (num_entries > num_domains * 3)
+			return -EINVAL;
+	}
+
+	entry = (struct nvgpu_gpu_clk_info __user *)
+			(uintptr_t)args->clk_info_entries;
+
+	for (i = 0; i < num_entries; i++, entry++) {
+
+		if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) {
+			if (copy_from_user(&clk_info, (void __user *)entry,
+					sizeof(clk_info)))
+				return -EFAULT;
+		} else {
+			bit = ffs(clk_domains) - 1;
+			clk_info.clk_domain = bit;
+			clk_domains &= ~BIT(bit);
+			clk_info.clk_type = args->clk_type;
+		}
+
+		switch (clk_info.clk_type) {
+		case NVGPU_GPU_CLK_TYPE_TARGET:
+			err = nvgpu_clk_arb_get_session_target_mhz(session,
+					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
+					&freq_mhz);
+			break;
+		case NVGPU_GPU_CLK_TYPE_ACTUAL:
+			err = nvgpu_clk_arb_get_arbiter_actual_mhz(g,
+					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
+					&freq_mhz);
+			break;
+		case NVGPU_GPU_CLK_TYPE_EFFECTIVE:
+			err = nvgpu_clk_arb_get_arbiter_effective_mhz(g,
+					nvgpu_gpu_convert_clk_domain(clk_info.clk_domain),
+					&freq_mhz);
+			break;
+		default:
+			freq_mhz = 0;
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			return err;
+
+		clk_info.flags = 0;
+		clk_info.freq_hz = MHZ_TO_HZ(freq_mhz);
+
+		err = copy_to_user((void __user *)entry, &clk_info,
+				sizeof(clk_info));
+		if (err)
+			return -EFAULT;
+	}
+
+	args->num_entries = num_entries;
+
+	return 0;
+}
+
+static int nvgpu_gpu_get_event_fd(struct gk20a *g,
+	struct gk20a_ctrl_priv *priv,
+	struct nvgpu_gpu_get_event_fd_args *args)
+{
+	struct nvgpu_clk_session *session = priv->clk_session;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!session)
+		return -EINVAL;
+
+	return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd,
+		args->flags);
+}
+
+static int nvgpu_gpu_get_voltage(struct gk20a *g,
+		struct nvgpu_gpu_get_voltage_args *args)
+{
+	int err = -EINVAL;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved)
+		return -EINVAL;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE))
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+	    return err;
+
+	switch (args->which) {
+	case NVGPU_GPU_VOLTAGE_CORE:
+		err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
+		break;
+	case NVGPU_GPU_VOLTAGE_SRAM:
+		err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage);
+		break;
+	case NVGPU_GPU_VOLTAGE_BUS:
+		err = pmgr_pwr_devices_get_voltage(g, &args->voltage);
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_get_current(struct gk20a *g,
+		struct nvgpu_gpu_get_current_args *args)
+{
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
+		return -EINVAL;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT))
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = pmgr_pwr_devices_get_current(g, &args->currnt);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_get_power(struct gk20a *g,
+		struct nvgpu_gpu_get_power_args *args)
+{
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
+		return -EINVAL;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER))
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = pmgr_pwr_devices_get_power(g, &args->power);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_get_temperature(struct gk20a *g,
+		struct nvgpu_gpu_get_temperature_args *args)
+{
+	int err;
+	u32 temp_f24_8;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
+		return -EINVAL;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE))
+		return -EINVAL;
+
+	if (!g->ops.therm.get_internal_sensor_curr_temp)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8);
+
+	gk20a_idle(g);
+
+	args->temp_f24_8 = (s32)temp_f24_8;
+
+	return err;
+}
+
+static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
+		struct nvgpu_gpu_set_therm_alert_limit_args *args)
+{
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (args->reserved[0] || args->reserved[1] || args->reserved[2])
+		return -EINVAL;
+
+	if (!g->ops.therm.configure_therm_alert)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8);
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
+		u32 flags)
+{
+	int err = 0;
+	bool allow;
+	bool disallow;
+
+	allow = flags &
+		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
+
+	disallow = flags &
+		NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
+
+	/* Can't be both at the same time */
+	if (allow && disallow)
+		return -EINVAL;
+
+	/* Nothing to do */
+	if (!allow && !disallow)
+		return 0;
+
+	/*
+	 * Moving into explicit idle or back from it? A call that doesn't
+	 * change the status is a no-op.
+	 */
+	if (!ch->deterministic_railgate_allowed &&
+			allow) {
+		gk20a_idle(ch->g);
+	} else if (ch->deterministic_railgate_allowed &&
+			!allow) {
+		err = gk20a_busy(ch->g);
+		if (err) {
+			nvgpu_warn(ch->g,
+				"cannot busy to restore deterministic ch");
+			return err;
+		}
+	}
+	ch->deterministic_railgate_allowed = allow;
+
+	return err;
+}
+
+static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
+{
+	if (!ch->deterministic)
+		return -EINVAL;
+
+	return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
+}
+
+static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
+		struct nvgpu_gpu_set_deterministic_opts_args *args)
+{
+	int __user *user_channels;
+	u32 i = 0;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	user_channels = (int __user *)(uintptr_t)args->channels;
+
+	/* Upper limit; prevent holding deterministic_busy for long */
+	if (args->num_channels > g->fifo.num_channels) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Trivial sanity check first */
+	if (!access_ok(VERIFY_READ, user_channels,
+				args->num_channels * sizeof(int))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	nvgpu_rwsem_down_read(&g->deterministic_busy);
+
+	/* note: we exit at the first failure */
+	for (; i < args->num_channels; i++) {
+		int ch_fd = 0;
+		struct channel_gk20a *ch;
+
+		if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
+			/* User raced with above access_ok */
+			err = -EFAULT;
+			break;
+		}
+
+		ch = gk20a_get_channel_from_file(ch_fd);
+		if (!ch) {
+			err = -EINVAL;
+			break;
+		}
+
+		err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
+
+		gk20a_channel_put(ch);
+
+		if (err)
+			break;
+	}
+
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+out:
+	args->num_channels = i;
+	return err;
+}
+
+static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
+		struct nvgpu_gpu_read_single_sm_error_state_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_gr_sm_error_state *sm_error_state;
+	struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
+	u32 sm_id;
+	int err = 0;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	sm_error_state = gr->sm_error_states + sm_id;
+	sm_error_state_record.global_esr =
+		sm_error_state->hww_global_esr;
+	sm_error_state_record.warp_esr =
+		sm_error_state->hww_warp_esr;
+	sm_error_state_record.warp_esr_pc =
+		sm_error_state->hww_warp_esr_pc;
+	sm_error_state_record.global_esr_report_mask =
+		sm_error_state->hww_global_esr_report_mask;
+	sm_error_state_record.warp_esr_report_mask =
+		sm_error_state->hww_warp_esr_report_mask;
+
+	if (args->record_size > 0) {
+		size_t write_size = sizeof(*sm_error_state);
+
+		if (write_size > args->record_size)
+			write_size = args->record_size;
+
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+		err = copy_to_user((void __user *)(uintptr_t)
+						args->record_mem,
+				   &sm_error_state_record,
+				   write_size);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		if (err) {
+			nvgpu_err(g, "copy_to_user failed!");
+			return err;
+		}
+
+		args->record_size = write_size;
+	}
+
+	return 0;
+}
+
+long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct gk20a_ctrl_priv *priv = filp->private_data;
+	struct gk20a *g = priv->g;
+	struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
+	struct nvgpu_gpu_zcull_get_info_args *get_info_args;
+	struct nvgpu_gpu_zbc_set_table_args *set_table_args;
+	struct nvgpu_gpu_zbc_query_table_args *query_table_args;
+	u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
+	struct gr_zcull_info *zcull_info;
+	struct zbc_entry *zbc_val;
+	struct zbc_query_params *zbc_tbl;
+	int i, err = 0;
+
+	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	if (!g->sw_ready) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_idle(g);
+	}
+
+	switch (cmd) {
+	case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
+		get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
+
+		get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);
+
+		break;
+	case NVGPU_GPU_IOCTL_ZCULL_GET_INFO:
+		get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf;
+
+		memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args));
+
+		zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info));
+		if (zcull_info == NULL)
+			return -ENOMEM;
+
+		err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
+		if (err) {
+			nvgpu_kfree(g, zcull_info);
+			break;
+		}
+
+		get_info_args->width_align_pixels = zcull_info->width_align_pixels;
+		get_info_args->height_align_pixels = zcull_info->height_align_pixels;
+		get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
+		get_info_args->aliquot_total = zcull_info->aliquot_total;
+		get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
+		get_info_args->region_header_size = zcull_info->region_header_size;
+		get_info_args->subregion_header_size = zcull_info->subregion_header_size;
+		get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
+		get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
+		get_info_args->subregion_count = zcull_info->subregion_count;
+
+		nvgpu_kfree(g, zcull_info);
+		break;
+	case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
+		set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;
+
+		zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry));
+		if (zbc_val == NULL)
+			return -ENOMEM;
+
+		zbc_val->format = set_table_args->format;
+		zbc_val->type = set_table_args->type;
+
+		switch (zbc_val->type) {
+		case GK20A_ZBC_TYPE_COLOR:
+			for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+				zbc_val->color_ds[i] = set_table_args->color_ds[i];
+				zbc_val->color_l2[i] = set_table_args->color_l2[i];
+			}
+			break;
+		case GK20A_ZBC_TYPE_DEPTH:
+		case T19X_ZBC:
+			zbc_val->depth = set_table_args->depth;
+			break;
+		default:
+			err = -EINVAL;
+		}
+
+		if (!err) {
+			err = gk20a_busy(g);
+			if (!err) {
+				err = g->ops.gr.zbc_set_table(g, &g->gr,
+							     zbc_val);
+				gk20a_idle(g);
+			}
+		}
+
+		if (zbc_val)
+			nvgpu_kfree(g, zbc_val);
+		break;
+	case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE:
+		query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf;
+
+		zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params));
+		if (zbc_tbl == NULL)
+			return -ENOMEM;
+
+		zbc_tbl->type = query_table_args->type;
+		zbc_tbl->index_size = query_table_args->index_size;
+
+		err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl);
+
+		if (!err) {
+			switch (zbc_tbl->type) {
+			case GK20A_ZBC_TYPE_COLOR:
+				for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
+					query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
+					query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
+				}
+				break;
+			case GK20A_ZBC_TYPE_DEPTH:
+			case T19X_ZBC:
+				query_table_args->depth = zbc_tbl->depth;
+				break;
+			case GK20A_ZBC_TYPE_INVALID:
+				query_table_args->index_size = zbc_tbl->index_size;
+				break;
+			default:
+				err = -EINVAL;
+			}
+			if (!err) {
+				query_table_args->format = zbc_tbl->format;
+				query_table_args->ref_cnt = zbc_tbl->ref_cnt;
+			}
+		}
+
+		if (zbc_tbl)
+			nvgpu_kfree(g, zbc_tbl);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS:
+		err = gk20a_ctrl_ioctl_gpu_characteristics(
+			g, (struct nvgpu_gpu_get_characteristics *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ:
+		err = gk20a_ctrl_prepare_compressible_read(g,
+			(struct nvgpu_gpu_prepare_compressible_read_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE:
+		err = gk20a_ctrl_mark_compressible_write(g,
+			(struct nvgpu_gpu_mark_compressible_write_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_ALLOC_AS:
+		err = gk20a_ctrl_alloc_as(g,
+			(struct nvgpu_alloc_as_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_OPEN_TSG:
+		err = gk20a_ctrl_open_tsg(g,
+			(struct nvgpu_gpu_open_tsg_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
+		err = gk20a_ctrl_get_tpc_masks(g,
+			(struct nvgpu_gpu_get_tpc_masks_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
+		err = gk20a_ctrl_get_fbp_l2_masks(g,
+			(struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
+		/* this arg type here, but ..gpu_open_channel_args in nvgpu.h
+		 * for consistency - they are the same */
+		err = gk20a_channel_open_ioctl(g,
+			(struct nvgpu_channel_open_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_FLUSH_L2:
+		err = nvgpu_gpu_ioctl_l2_fb_ops(g,
+			   (struct nvgpu_gpu_l2_fb_args *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_INVAL_ICACHE:
+		err = gr_gk20a_elpg_protected_call(g,
+				nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
+		break;
+
+	case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
+		err =  nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
+				(struct nvgpu_gpu_mmu_debug_mode_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
+		err = gr_gk20a_elpg_protected_call(g,
+				nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
+		break;
+
+	case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND:
+		err = nvgpu_gpu_ioctl_trigger_suspend(g);
+		break;
+
+	case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
+		err = nvgpu_gpu_ioctl_wait_for_pause(g,
+				(struct nvgpu_gpu_wait_pause_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE:
+		err = nvgpu_gpu_ioctl_resume_from_pause(g);
+		break;
+
+	case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS:
+		err = nvgpu_gpu_ioctl_clear_sm_errors(g);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
+		err =  nvgpu_gpu_ioctl_has_any_exception(g,
+				(struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_NUM_VSMS:
+		err = gk20a_ctrl_get_num_vsms(g,
+			(struct nvgpu_gpu_num_vsms *)buf);
+		break;
+	case NVGPU_GPU_IOCTL_VSMS_MAPPING:
+		err = gk20a_ctrl_vsm_mapping(g,
+			(struct nvgpu_gpu_vsms_mapping *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO:
+		err = nvgpu_gpu_get_cpu_time_correlation_info(g,
+			(struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_GPU_TIME:
+		err = nvgpu_gpu_get_gpu_time(g,
+			(struct nvgpu_gpu_get_gpu_time_args *)buf);
+		break;
+
+        case NVGPU_GPU_IOCTL_GET_ENGINE_INFO:
+		err = nvgpu_gpu_get_engine_info(g,
+			(struct nvgpu_gpu_get_engine_info_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
+		err = nvgpu_gpu_alloc_vidmem(g,
+			(struct nvgpu_gpu_alloc_vidmem_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_MEMORY_STATE:
+		err = nvgpu_gpu_get_memory_state(g,
+			(struct nvgpu_gpu_get_memory_state_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_CLK_GET_RANGE:
+		err = nvgpu_gpu_clk_get_range(g, priv,
+			(struct nvgpu_gpu_clk_range_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS:
+		err = nvgpu_gpu_clk_get_vf_points(g, priv,
+			(struct nvgpu_gpu_clk_vf_points_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_CLK_SET_INFO:
+		err = nvgpu_gpu_clk_set_info(g, priv,
+			(struct nvgpu_gpu_clk_set_info_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_CLK_GET_INFO:
+		err = nvgpu_gpu_clk_get_info(g, priv,
+			(struct nvgpu_gpu_clk_get_info_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_EVENT_FD:
+		err = nvgpu_gpu_get_event_fd(g, priv,
+			(struct nvgpu_gpu_get_event_fd_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_VOLTAGE:
+		err = nvgpu_gpu_get_voltage(g,
+			(struct nvgpu_gpu_get_voltage_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_CURRENT:
+		err = nvgpu_gpu_get_current(g,
+			(struct nvgpu_gpu_get_current_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_POWER:
+		err = nvgpu_gpu_get_power(g,
+			(struct nvgpu_gpu_get_power_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_GET_TEMPERATURE:
+		err = nvgpu_gpu_get_temperature(g,
+			(struct nvgpu_gpu_get_temperature_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT:
+		err = nvgpu_gpu_set_therm_alert_limit(g,
+			(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
+		err = nvgpu_gpu_set_deterministic_opts(g,
+			(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
+		break;
+
+	case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
+		err = nvgpu_gpu_read_single_sm_error_state(g,
+			(struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
+		break;
+
+	default:
+		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
+		err = -ENOTTY;
+		break;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
new file mode 100644
index 00000000..8b4a5e59
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_IOCTL_CTRL_H__
+#define __NVGPU_IOCTL_CTRL_H__
+
+int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
+int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
+long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
new file mode 100644
index 00000000..31e7e2cb
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -0,0 +1,2003 @@
+/*
+ * Tegra GK20A GPU Debugger/Profiler Driver
+ *
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+#include <linux/dma-buf.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/cond.h>
+
+#include <nvgpu/linux/vidmem.h>
+#include <nvgpu/linux/vm.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "gk20a/dbg_gpu_gk20a.h"
+#include "os_linux.h"
+#include "platform_gk20a.h"
+#include "ioctl_dbg.h"
+
+/* turn seriously unwieldy names -> something shorter */
+#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x
+
+/* silly allocator - just increment id */
+static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
+static int generate_unique_id(void)
+{
+	return nvgpu_atomic_add_return(1, &unique_id);
+}
+
+static int alloc_profiler(struct gk20a *g,
+			  struct dbg_profiler_object_data **_prof)
+{
+	struct dbg_profiler_object_data *prof;
+	*_prof = NULL;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	prof = nvgpu_kzalloc(g, sizeof(*prof));
+	if (!prof)
+		return -ENOMEM;
+
+	prof->prof_handle = generate_unique_id();
+	*_prof = prof;
+	return 0;
+}
+
+static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux)
+{
+	struct dbg_session_gk20a_linux *dbg_s_linux;
+	*_dbg_s_linux = NULL;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux));
+	if (!dbg_s_linux)
+		return -ENOMEM;
+
+	dbg_s_linux->dbg_s.id = generate_unique_id();
+	*_dbg_s_linux = dbg_s_linux;
+	return 0;
+}
+
+static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
+				      struct gr_gk20a *gr);
+
+static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset);
+
+static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_exec_reg_ops_args *args);
+
+static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_powergate_args *args);
+
+static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+			      struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args);
+
+static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+			      struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args);
+
+static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
+
+static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
+
+static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
+
+static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
+			   struct nvgpu_dbg_gpu_profiler_reserve_args *args);
+
+static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_perfbuf_map_args *args);
+
+static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_perfbuf_unmap_args *args);
+
+static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
+			  int timeout_mode);
+
+static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle);
+
+static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s);
+
+static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s);
+
+static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle);
+
+static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
+
+static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
+		struct file *filp, bool is_profiler);
+
+unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data;
+	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait);
+
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
+
+	if (dbg_s->dbg_events.events_enabled &&
+			dbg_s->dbg_events.num_pending_events > 0) {
+		nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d",
+				dbg_s->id);
+		nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
+				dbg_s->dbg_events.num_pending_events);
+		mask = (POLLPRI | POLLIN);
+	}
+
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
+
+	return mask;
+}
+
+int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
+{
+	struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data;
+	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
+
+	nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name);
+
+	/* unbind channels */
+	dbg_unbind_all_channels_gk20a(dbg_s);
+
+	/* Powergate/Timeout enable is called here as possibility of dbg_session
+	 * which called powergate/timeout disable ioctl, to be killed without
+	 * calling powergate/timeout enable ioctl
+	 */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
+	nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
+
+	/* If this session owned the perf buffer, release it */
+	if (g->perfbuf.owner == dbg_s)
+		gk20a_perfbuf_release_locked(g, g->perfbuf.offset);
+
+	/* Per-context profiler objects were released when we called
+	 * dbg_unbind_all_channels. We could still have global ones.
+	 */
+	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+		if (prof_obj->session_id == dbg_s->id) {
+			if (prof_obj->has_reservation)
+				g->ops.dbg_session_ops.
+				  release_profiler_reservation(dbg_s, prof_obj);
+			nvgpu_list_del(&prof_obj->prof_obj_entry);
+			nvgpu_kfree(g, prof_obj);
+		}
+	}
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
+	nvgpu_mutex_destroy(&dbg_s->ioctl_lock);
+
+	nvgpu_kfree(g, dbg_session_linux);
+	gk20a_put(g);
+
+	return 0;
+}
+
+int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
+			 struct nvgpu_os_linux, prof.cdev);
+	struct gk20a *g = &l->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+	return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */);
+}
+
+static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
+			 struct nvgpu_dbg_gpu_timeout_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	err = nvgpu_dbg_timeout_enable(dbg_s, args->enable);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	return err;
+}
+
+static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct gr_gk20a *gr = &g->gr;
+	u32 sm_id;
+	struct channel_gk20a *ch;
+	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
+	struct nvgpu_gr_sm_error_state sm_error_state;
+	int err = 0;
+
+	/* Not currently supported in the virtual case */
+	if (g->is_virtual)
+		return -ENOSYS;
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch)
+		return -EINVAL;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	if (args->sm_error_state_record_size > 0) {
+		size_t read_size = sizeof(sm_error_state_record);
+
+		if (read_size > args->sm_error_state_record_size)
+			read_size = args->sm_error_state_record_size;
+
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+		err = copy_from_user(&sm_error_state_record,
+			  (void __user *)(uintptr_t)
+				args->sm_error_state_record_mem,
+			  read_size);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		if (err)
+			return -ENOMEM;
+	}
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	sm_error_state.hww_global_esr =
+		sm_error_state_record.hww_global_esr;
+	sm_error_state.hww_warp_esr =
+		sm_error_state_record.hww_warp_esr;
+	sm_error_state.hww_warp_esr_pc =
+		sm_error_state_record.hww_warp_esr_pc;
+	sm_error_state.hww_global_esr_report_mask =
+		sm_error_state_record.hww_global_esr_report_mask;
+	sm_error_state.hww_warp_esr_report_mask =
+		sm_error_state_record.hww_warp_esr_report_mask;
+
+	err = gr_gk20a_elpg_protected_call(g,
+			g->ops.gr.update_sm_error_state(g, ch,
+					sm_id, &sm_error_state));
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+
+static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_gr_sm_error_state *sm_error_state;
+	struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
+	u32 sm_id;
+	int err = 0;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	sm_error_state = gr->sm_error_states + sm_id;
+	sm_error_state_record.hww_global_esr =
+		sm_error_state->hww_global_esr;
+	sm_error_state_record.hww_warp_esr =
+		sm_error_state->hww_warp_esr;
+	sm_error_state_record.hww_warp_esr_pc =
+		sm_error_state->hww_warp_esr_pc;
+	sm_error_state_record.hww_global_esr_report_mask =
+		sm_error_state->hww_global_esr_report_mask;
+	sm_error_state_record.hww_warp_esr_report_mask =
+		sm_error_state->hww_warp_esr_report_mask;
+
+	if (args->sm_error_state_record_size > 0) {
+		size_t write_size = sizeof(*sm_error_state);
+
+		if (write_size > args->sm_error_state_record_size)
+			write_size = args->sm_error_state_record_size;
+
+		nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+		err = copy_to_user((void __user *)(uintptr_t)
+						args->sm_error_state_record_mem,
+				   &sm_error_state_record,
+				   write_size);
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		if (err) {
+			nvgpu_err(g, "copy_to_user failed!");
+			return err;
+		}
+
+		args->sm_error_state_record_size = write_size;
+	}
+
+	return 0;
+}
+
+
+static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
+
+	dbg_s->broadcast_stop_trigger = (args->broadcast != 0);
+
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
+
+	return 0;
+}
+
+static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
+			  int timeout_mode)
+{
+	struct gk20a *g = dbg_s->g;
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
+			timeout_mode);
+
+	switch (timeout_mode) {
+	case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
+		if (dbg_s->is_timeout_disabled == true)
+			nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
+		dbg_s->is_timeout_disabled = false;
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE:
+		if (dbg_s->is_timeout_disabled == false)
+			nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
+		dbg_s->is_timeout_disabled = true;
+		break;
+
+	default:
+		nvgpu_err(g,
+			   "unrecognized dbg gpu timeout mode : 0x%x",
+			   timeout_mode);
+		err = -EINVAL;
+		break;
+	}
+
+	if (!err)
+		nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, "
+				"timeouts disabled refcount %d",
+			dbg_s->is_timeout_disabled ? "true" : "false",
+			nvgpu_atomic_read(&g->timeouts_disabled_refcount));
+	return err;
+}
+
+static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
+		struct file *filp, bool is_profiler)
+{
+	struct nvgpu_os_linux *l;
+	struct dbg_session_gk20a_linux *dbg_session_linux;
+	struct dbg_session_gk20a *dbg_s;
+	struct gk20a *g;
+
+	struct device *dev;
+
+	int err;
+
+	if (!is_profiler)
+		l = container_of(inode->i_cdev,
+				 struct nvgpu_os_linux, dbg.cdev);
+	else
+		l = container_of(inode->i_cdev,
+				 struct nvgpu_os_linux, prof.cdev);
+	g = gk20a_get(&l->g);
+	if (!g)
+		return -ENODEV;
+
+	dev = dev_from_gk20a(g);
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name);
+
+	err  = alloc_session(g, &dbg_session_linux);
+	if (err)
+		goto free_ref;
+
+	dbg_s = &dbg_session_linux->dbg_s;
+
+	filp->private_data = dbg_session_linux;
+	dbg_session_linux->dev   = dev;
+	dbg_s->g     = g;
+	dbg_s->is_profiler = is_profiler;
+	dbg_s->is_pg_disabled = false;
+	dbg_s->is_timeout_disabled = false;
+
+	nvgpu_cond_init(&dbg_s->dbg_events.wait_queue);
+	nvgpu_init_list_node(&dbg_s->ch_list);
+	err = nvgpu_mutex_init(&dbg_s->ch_list_lock);
+	if (err)
+		goto err_free_session;
+	err = nvgpu_mutex_init(&dbg_s->ioctl_lock);
+	if (err)
+		goto err_destroy_lock;
+	dbg_s->dbg_events.events_enabled = false;
+	dbg_s->dbg_events.num_pending_events = 0;
+
+	return 0;
+
+err_destroy_lock:
+	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
+err_free_session:
+	nvgpu_kfree(g, dbg_session_linux);
+free_ref:
+	gk20a_put(g);
+	return err;
+}
+
+void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
+{
+	nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue);
+}
+
+static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
+			struct dbg_session_channel_data *ch_data)
+{
+	struct gk20a *g = dbg_s->g;
+	int chid;
+	struct dbg_session_data *session_data;
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
+	struct dbg_session_channel_data_linux *ch_data_linux;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	chid = ch_data->chid;
+
+	/* If there's a profiler ctx reservation record associated with this
+	 * session/channel pair, release it.
+	 */
+	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+		if ((prof_obj->session_id == dbg_s->id) &&
+			(prof_obj->ch->chid == chid)) {
+			if (prof_obj->has_reservation) {
+				g->ops.dbg_session_ops.
+				  release_profiler_reservation(dbg_s, prof_obj);
+			}
+			nvgpu_list_del(&prof_obj->prof_obj_entry);
+			nvgpu_kfree(g, prof_obj);
+		}
+	}
+
+	nvgpu_list_del(&ch_data->ch_entry);
+
+	session_data = ch_data->session_data;
+	nvgpu_list_del(&session_data->dbg_s_entry);
+	nvgpu_kfree(dbg_s->g, session_data);
+
+	ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux,
+										ch_data);
+
+	fput(ch_data_linux->ch_f);
+	nvgpu_kfree(dbg_s->g, ch_data_linux);
+
+	return 0;
+}
+
+static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
+			  struct nvgpu_dbg_gpu_bind_channel_args *args)
+{
+	struct file *f;
+	struct gk20a *g = dbg_s->g;
+	struct channel_gk20a *ch;
+	struct dbg_session_channel_data_linux *ch_data_linux;
+	struct dbg_session_data *session_data;
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
+		   g->name, args->channel_fd);
+
+	/*
+	 * Although gk20a_get_channel_from_file gives us a channel ref, need to
+	 * hold a ref to the file during the session lifetime. See comment in
+	 * struct dbg_session_channel_data.
+	 */
+	f = fget(args->channel_fd);
+	if (!f)
+		return -ENODEV;
+
+	ch = gk20a_get_channel_from_file(args->channel_fd);
+	if (!ch) {
+		nvgpu_log_fn(g, "no channel found for fd");
+		err = -EINVAL;
+		goto out_fput;
+	}
+
+	nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
+
+	ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux));
+	if (!ch_data_linux) {
+		err = -ENOMEM;
+		goto out_chput;
+	}
+	ch_data_linux->ch_f = f;
+	ch_data_linux->ch_data.channel_fd = args->channel_fd;
+	ch_data_linux->ch_data.chid = ch->chid;
+	ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a;
+	nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry);
+
+	session_data = nvgpu_kzalloc(g, sizeof(*session_data));
+	if (!session_data) {
+		err = -ENOMEM;
+		goto out_kfree;
+	}
+	session_data->dbg_s = dbg_s;
+	nvgpu_init_list_node(&session_data->dbg_s_entry);
+	ch_data_linux->ch_data.session_data = session_data;
+
+	nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list);
+
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+	nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	gk20a_channel_put(ch);
+
+	return 0;
+
+out_kfree:
+	nvgpu_kfree(g, ch_data_linux);
+out_chput:
+	gk20a_channel_put(ch);
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+out_fput:
+	fput(f);
+	return err;
+}
+
+static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
+{
+	struct dbg_session_channel_data *ch_data, *tmp;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+	nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
+				dbg_session_channel_data, ch_entry)
+		ch_data->unbind_single_channel(dbg_s, ch_data);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	return 0;
+}
+
+/*
+ * Convert common regops op values of the form of NVGPU_DBG_REG_OP_*
+ * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
+ */
+static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
+{
+	switch (regops_op) {
+	case REGOP(READ_32):
+		return REGOP_LINUX(READ_32);
+	case REGOP(WRITE_32):
+		return REGOP_LINUX(WRITE_32);
+	case REGOP(READ_64):
+		return REGOP_LINUX(READ_64);
+	case REGOP(WRITE_64):
+		return REGOP_LINUX(WRITE_64);
+	case REGOP(READ_08):
+		return REGOP_LINUX(READ_08);
+	case REGOP(WRITE_08):
+		return REGOP_LINUX(WRITE_08);
+	}
+
+	return regops_op;
+}
+
+/*
+ * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
+ * into common regops op values of the form of NVGPU_DBG_REG_OP_*
+ */
+static u32 nvgpu_get_regops_op_values_common(u32 regops_op)
+{
+	switch (regops_op) {
+	case REGOP_LINUX(READ_32):
+		return REGOP(READ_32);
+	case REGOP_LINUX(WRITE_32):
+		return REGOP(WRITE_32);
+	case REGOP_LINUX(READ_64):
+		return REGOP(READ_64);
+	case REGOP_LINUX(WRITE_64):
+		return REGOP(WRITE_64);
+	case REGOP_LINUX(READ_08):
+		return REGOP(READ_08);
+	case REGOP_LINUX(WRITE_08):
+		return REGOP(WRITE_08);
+	}
+
+	return regops_op;
+}
+
+/*
+ * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
+ * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
+ */
+static u32 nvgpu_get_regops_type_values_linux(u32 regops_type)
+{
+	switch (regops_type) {
+	case REGOP(TYPE_GLOBAL):
+		return REGOP_LINUX(TYPE_GLOBAL);
+	case REGOP(TYPE_GR_CTX):
+		return REGOP_LINUX(TYPE_GR_CTX);
+	case REGOP(TYPE_GR_CTX_TPC):
+		return REGOP_LINUX(TYPE_GR_CTX_TPC);
+	case REGOP(TYPE_GR_CTX_SM):
+		return REGOP_LINUX(TYPE_GR_CTX_SM);
+	case REGOP(TYPE_GR_CTX_CROP):
+		return REGOP_LINUX(TYPE_GR_CTX_CROP);
+	case REGOP(TYPE_GR_CTX_ZROP):
+		return REGOP_LINUX(TYPE_GR_CTX_ZROP);
+	case REGOP(TYPE_GR_CTX_QUAD):
+		return REGOP_LINUX(TYPE_GR_CTX_QUAD);
+	}
+
+	return regops_type;
+}
+
+/*
+ * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_*
+ * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_*
+ */
+static u32 nvgpu_get_regops_type_values_common(u32 regops_type)
+{
+	switch (regops_type) {
+	case REGOP_LINUX(TYPE_GLOBAL):
+		return REGOP(TYPE_GLOBAL);
+	case REGOP_LINUX(TYPE_GR_CTX):
+		return REGOP(TYPE_GR_CTX);
+	case REGOP_LINUX(TYPE_GR_CTX_TPC):
+		return REGOP(TYPE_GR_CTX_TPC);
+	case REGOP_LINUX(TYPE_GR_CTX_SM):
+		return REGOP(TYPE_GR_CTX_SM);
+	case REGOP_LINUX(TYPE_GR_CTX_CROP):
+		return REGOP(TYPE_GR_CTX_CROP);
+	case REGOP_LINUX(TYPE_GR_CTX_ZROP):
+		return REGOP(TYPE_GR_CTX_ZROP);
+	case REGOP_LINUX(TYPE_GR_CTX_QUAD):
+		return REGOP(TYPE_GR_CTX_QUAD);
+	}
+
+	return regops_type;
+}
+
+/*
+ * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_*
+ * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
+ */
+static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
+{
+	switch (regops_status) {
+	case REGOP(STATUS_SUCCESS):
+		return REGOP_LINUX(STATUS_SUCCESS);
+	case REGOP(STATUS_INVALID_OP):
+		return REGOP_LINUX(STATUS_INVALID_OP);
+	case REGOP(STATUS_INVALID_TYPE):
+		return REGOP_LINUX(STATUS_INVALID_TYPE);
+	case REGOP(STATUS_INVALID_OFFSET):
+		return REGOP_LINUX(STATUS_INVALID_OFFSET);
+	case REGOP(STATUS_UNSUPPORTED_OP):
+		return REGOP_LINUX(STATUS_UNSUPPORTED_OP);
+	case REGOP(STATUS_INVALID_MASK ):
+		return REGOP_LINUX(STATUS_INVALID_MASK);
+	}
+
+	return regops_status;
+}
+
+/*
+ * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
+ * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_*
+ */
+static u32 nvgpu_get_regops_status_values_common(u32 regops_status)
+{
+	switch (regops_status) {
+	case REGOP_LINUX(STATUS_SUCCESS):
+		return REGOP(STATUS_SUCCESS);
+	case REGOP_LINUX(STATUS_INVALID_OP):
+		return REGOP(STATUS_INVALID_OP);
+	case REGOP_LINUX(STATUS_INVALID_TYPE):
+		return REGOP(STATUS_INVALID_TYPE);
+	case REGOP_LINUX(STATUS_INVALID_OFFSET):
+		return REGOP(STATUS_INVALID_OFFSET);
+	case REGOP_LINUX(STATUS_UNSUPPORTED_OP):
+		return REGOP(STATUS_UNSUPPORTED_OP);
+	case REGOP_LINUX(STATUS_INVALID_MASK ):
+		return REGOP(STATUS_INVALID_MASK);
+	}
+
+	return regops_status;
+}
+
+static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in,
+		struct nvgpu_dbg_reg_op *out, u32 num_ops)
+{
+	u32 i;
+
+	if(in == NULL || out == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < num_ops; i++) {
+		out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
+		out[i].type = nvgpu_get_regops_type_values_common(in[i].type);
+		out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
+		out[i].quad = in[i].quad;
+		out[i].group_mask = in[i].group_mask;
+		out[i].sub_group_mask = in[i].sub_group_mask;
+		out[i].offset = in[i].offset;
+		out[i].value_lo = in[i].value_lo;
+		out[i].value_hi = in[i].value_hi;
+		out[i].and_n_mask_lo = in[i].and_n_mask_lo;
+		out[i].and_n_mask_hi = in[i].and_n_mask_hi;
+	}
+
+	return 0;
+}
+
+static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in,
+		struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops)
+{
+	u32 i;
+
+	if(in == NULL || out == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < num_ops; i++) {
+		out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
+		out[i].type = nvgpu_get_regops_type_values_linux(in[i].type);
+		out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
+		out[i].quad = in[i].quad;
+		out[i].group_mask = in[i].group_mask;
+		out[i].sub_group_mask = in[i].sub_group_mask;
+		out[i].offset = in[i].offset;
+		out[i].value_lo = in[i].value_lo;
+		out[i].value_hi = in[i].value_hi;
+		out[i].and_n_mask_lo = in[i].and_n_mask_lo;
+		out[i].and_n_mask_hi = in[i].and_n_mask_hi;
+	}
+
+	return 0;
+}
+
+static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_exec_reg_ops_args *args)
+{
+	int err = 0, powergate_err = 0;
+	bool is_pg_disabled = false;
+
+	struct gk20a *g = dbg_s->g;
+	struct channel_gk20a *ch;
+
+	nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
+
+	if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
+		nvgpu_err(g, "regops limit exceeded");
+		return -EINVAL;
+	}
+
+	if (args->num_ops == 0) {
+		/* Nothing to do */
+		return 0;
+	}
+
+	if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
+		nvgpu_err(g, "reg ops work buffer not allocated");
+		return -ENODEV;
+	}
+
+	if (!dbg_s->id) {
+		nvgpu_err(g, "can't call reg_ops on an unbound debugger session");
+		return -EINVAL;
+	}
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!dbg_s->is_profiler && !ch) {
+		nvgpu_err(g, "bind a channel before regops for a debugging session");
+		return -EINVAL;
+	}
+
+	/* be sure that ctx info is in place */
+	if (!g->is_virtual &&
+		!gr_context_info_available(dbg_s, &g->gr)) {
+		nvgpu_err(g, "gr context data not available");
+		return -ENODEV;
+	}
+
+	/* since exec_reg_ops sends methods to the ucode, it must take the
+	 * global gpu lock to protect against mixing methods from debug sessions
+	 * on other channels */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	if (!dbg_s->is_pg_disabled && !g->is_virtual) {
+		/* In the virtual case, the server will handle
+		 * disabling/enabling powergating when processing reg ops
+		 */
+		powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
+					true);
+		is_pg_disabled = true;
+	}
+
+	if (!powergate_err) {
+		u64 ops_offset = 0; /* index offset */
+
+		struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL;
+
+		linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops *
+				sizeof(struct nvgpu_dbg_gpu_reg_op));
+
+		if (!linux_fragment)
+			return -ENOMEM;
+
+		while (ops_offset < args->num_ops && !err) {
+			const u64 num_ops =
+				min(args->num_ops - ops_offset,
+				    (u64)(g->dbg_regops_tmp_buf_ops));
+			const u64 fragment_size =
+				num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op);
+
+			void __user *const fragment =
+				(void __user *)(uintptr_t)
+				(args->ops +
+				 ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op));
+
+			nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu",
+				     ops_offset, num_ops);
+
+			nvgpu_log_fn(g, "Copying regops from userspace");
+
+			if (copy_from_user(linux_fragment,
+					   fragment, fragment_size)) {
+				nvgpu_err(g, "copy_from_user failed!");
+				err = -EFAULT;
+				break;
+			}
+
+			err = nvgpu_get_regops_data_common(linux_fragment,
+					g->dbg_regops_tmp_buf, num_ops);
+
+			if (err)
+				break;
+
+			err = g->ops.dbg_session_ops.exec_reg_ops(
+				dbg_s, g->dbg_regops_tmp_buf, num_ops);
+
+			err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,
+					linux_fragment, num_ops);
+
+			if (err)
+				break;
+
+			nvgpu_log_fn(g, "Copying result to userspace");
+
+			if (copy_to_user(fragment, linux_fragment,
+					 fragment_size)) {
+				nvgpu_err(g, "copy_to_user failed!");
+				err = -EFAULT;
+				break;
+			}
+
+			ops_offset += num_ops;
+		}
+
+		nvgpu_kfree(g, linux_fragment);
+
+		/* enable powergate, if previously disabled */
+		if (is_pg_disabled) {
+			powergate_err =
+				g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
+					false);
+		}
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	if (!err && powergate_err)
+		err = powergate_err;
+
+	if (err)
+		nvgpu_err(g, "dbg regops failed");
+
+	return err;
+}
+
+static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_powergate_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+	nvgpu_log_fn(g, "%s  powergate mode = %d",
+		      g->name, args->mode);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) {
+		err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true);
+	} else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) {
+		err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false);
+	} else {
+		nvgpu_err(g, "invalid powergate mode");
+		err = -EINVAL;
+	}
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return  err;
+}
+
+static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+			       struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+	struct channel_gk20a *ch_gk20a;
+
+	nvgpu_log_fn(g, "%s smpc ctxsw mode = %d",
+		     g->name, args->mode);
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to poweron");
+		return err;
+	}
+
+	/* Take the global lock, since we'll be doing global regops */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch_gk20a) {
+		nvgpu_err(g,
+			  "no bound channel for smpc ctxsw mode update");
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a,
+				args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+	if (err) {
+		nvgpu_err(g,
+			  "error (%d) during smpc ctxsw mode update", err);
+		goto clean_up;
+	}
+
+	err = g->ops.regops.apply_smpc_war(dbg_s);
+ clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	gk20a_idle(g);
+	return  err;
+}
+
+static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+			       struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
+{
+	int err;
+	struct gk20a *g = dbg_s->g;
+	struct channel_gk20a *ch_gk20a;
+
+	nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
+
+	/* Must have a valid reservation to enable/disable hwpm cxtsw.
+	 * Just print an error message for now, but eventually this should
+	 * return an error, at the point where all client sw has been
+	 * cleaned up.
+	 */
+	if (!dbg_s->has_profiler_reservation) {
+		nvgpu_err(g,
+			"session doesn't have a valid reservation");
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to poweron");
+		return err;
+	}
+
+	/* Take the global lock, since we'll be doing global regops */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch_gk20a) {
+		nvgpu_err(g,
+			  "no bound channel for pm ctxsw mode update");
+		err = -EINVAL;
+		goto clean_up;
+	}
+	if (!dbg_s->is_pg_disabled) {
+		nvgpu_err(g, "powergate is not disabled");
+		err = -ENOSYS;
+		goto clean_up;
+	}
+	err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
+		args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
+	if (err)
+		nvgpu_err(g,
+			"error (%d) during pm ctxsw mode update", err);
+	/* gk20a would require a WAR to set the core PM_ENABLE bit, not
+	 * added here with gk20a being deprecated
+	 */
+ clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	gk20a_idle(g);
+	return  err;
+}
+
+static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct channel_gk20a *ch;
+	int err = 0, action = args->mode;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode);
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to poweron");
+		return err;
+	}
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Suspend GPU context switching */
+	err = gr_gk20a_disable_ctxsw(g);
+	if (err) {
+		nvgpu_err(g, "unable to stop gr ctxsw");
+		/* this should probably be ctx-fatal... */
+		goto clean_up;
+	}
+
+	switch (action) {
+	case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
+		gr_gk20a_suspend_context(ch);
+		break;
+
+	case NVGPU_DBG_GPU_RESUME_ALL_SMS:
+		gr_gk20a_resume_context(ch);
+		break;
+	}
+
+	err = gr_gk20a_enable_ctxsw(g);
+	if (err)
+		nvgpu_err(g, "unable to restart ctxsw!");
+
+clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	gk20a_idle(g);
+
+	return  err;
+}
+
+static int nvgpu_ioctl_allocate_profiler_object(
+				struct dbg_session_gk20a_linux *dbg_session_linux,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
+{
+	int err = 0;
+	struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s;
+	struct gk20a *g = get_gk20a(dbg_session_linux->dev);
+	struct dbg_profiler_object_data *prof_obj;
+
+	nvgpu_log_fn(g, "%s", g->name);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	err = alloc_profiler(g, &prof_obj);
+	if (err)
+		goto clean_up;
+
+	prof_obj->session_id = dbg_s->id;
+
+	if (dbg_s->is_profiler)
+		prof_obj->ch = NULL;
+	else {
+		prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+		if (prof_obj->ch == NULL) {
+			nvgpu_err(g,
+				"bind a channel for dbg session");
+			nvgpu_kfree(g, prof_obj);
+			err = -EINVAL;
+			goto clean_up;
+		}
+	}
+
+	/* Return handle to client */
+	args->profiler_handle = prof_obj->prof_handle;
+
+	nvgpu_init_list_node(&prof_obj->prof_obj_entry);
+
+	nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
+clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return  err;
+}
+
+static int nvgpu_ioctl_free_profiler_object(
+				struct dbg_session_gk20a_linux *dbg_s_linux,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
+{
+	int err = 0;
+	struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
+	struct gk20a *g = get_gk20a(dbg_s_linux->dev);
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
+	bool obj_found = false;
+
+	nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x",
+		     g->name, dbg_s->id, args->profiler_handle);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Remove profiler object from the list, if a match is found */
+	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+		if (prof_obj->prof_handle == args->profiler_handle) {
+			if (prof_obj->session_id != dbg_s->id) {
+				nvgpu_err(g,
+						"invalid handle %x",
+						args->profiler_handle);
+				err = -EINVAL;
+				break;
+			}
+			if (prof_obj->has_reservation)
+				g->ops.dbg_session_ops.
+				  release_profiler_reservation(dbg_s, prof_obj);
+			nvgpu_list_del(&prof_obj->prof_obj_entry);
+			nvgpu_kfree(g, prof_obj);
+			obj_found = true;
+			break;
+		}
+	}
+	if (!obj_found) {
+		nvgpu_err(g, "profiler %x not found",
+							args->profiler_handle);
+		err = -EINVAL;
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return  err;
+}
+
+static struct dbg_profiler_object_data *find_matching_prof_obj(
+						struct dbg_session_gk20a *dbg_s,
+						u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj;
+
+	nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+		if (prof_obj->prof_handle == profiler_handle) {
+			if (prof_obj->session_id != dbg_s->id) {
+				nvgpu_err(g,
+						"invalid handle %x",
+						profiler_handle);
+				return NULL;
+			}
+			return prof_obj;
+		}
+	}
+	return NULL;
+}
+
+/* used in scenarios where the debugger session can take just the inter-session
+ * lock for performance, but the profiler session must take the per-gpu lock
+ * since it might not have an associated channel. */
+static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s)
+{
+	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+
+	if (dbg_s->is_profiler || !ch)
+		nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock);
+	else
+		nvgpu_mutex_acquire(&ch->dbg_s_lock);
+}
+
+static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s)
+{
+	struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+
+	if (dbg_s->is_profiler || !ch)
+		nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock);
+	else
+		nvgpu_mutex_release(&ch->dbg_s_lock);
+}
+
+static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s)
+{
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
+
+	dbg_s->dbg_events.events_enabled = true;
+	dbg_s->dbg_events.num_pending_events = 0;
+
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
+}
+
+static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s)
+{
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
+
+	dbg_s->dbg_events.events_enabled = false;
+	dbg_s->dbg_events.num_pending_events = 0;
+
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
+}
+
+static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s)
+{
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s);
+
+	if (dbg_s->dbg_events.events_enabled &&
+			dbg_s->dbg_events.num_pending_events > 0)
+		dbg_s->dbg_events.num_pending_events--;
+
+	gk20a_dbg_session_nvgpu_mutex_release(dbg_s);
+}
+
+
+static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
+			  struct nvgpu_dbg_gpu_events_ctrl_args *args)
+{
+	int ret = 0;
+	struct channel_gk20a *ch;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd);
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch) {
+		nvgpu_err(g, "no channel bound to dbg session");
+		return -EINVAL;
+	}
+
+	switch (args->cmd) {
+	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
+		gk20a_dbg_gpu_events_enable(dbg_s);
+		break;
+
+	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE:
+		gk20a_dbg_gpu_events_disable(dbg_s);
+		break;
+
+	case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR:
+		gk20a_dbg_gpu_events_clear(dbg_s);
+		break;
+
+	default:
+		nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x",
+			   args->cmd);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_perfbuf_map_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct mm_gk20a *mm = &g->mm;
+	int err;
+	u32 virt_size;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	if (g->perfbuf.owner) {
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		return -EBUSY;
+	}
+
+	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
+			big_page_size << 10,
+			NV_MM_DEFAULT_KERNEL_SIZE,
+			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+			false, false, "perfbuf");
+	if (!mm->perfbuf.vm) {
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		return -ENOMEM;
+	}
+
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
+			args->dmabuf_fd,
+			&args->offset,
+			0,
+			0,
+			0,
+			0,
+			args->mapping_size,
+			NULL);
+	if (err)
+		goto err_remove_vm;
+
+	/* perf output buffer may not cross a 4GB boundary */
+	virt_size = u64_lo32(args->mapping_size);
+	if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) {
+		err = -EINVAL;
+		goto err_unmap;
+	}
+
+	err = g->ops.dbg_session_ops.perfbuffer_enable(g,
+						args->offset, virt_size);
+	if (err)
+		goto err_unmap;
+
+	g->perfbuf.owner = dbg_s;
+	g->perfbuf.offset = args->offset;
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	return 0;
+
+err_unmap:
+	nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
+err_remove_vm:
+	nvgpu_vm_put(mm->perfbuf.vm);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return err;
+}
+
+static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_perfbuf_unmap_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	int err;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	if ((g->perfbuf.owner != dbg_s) ||
+					(g->perfbuf.offset != args->offset)) {
+		nvgpu_mutex_release(&g->dbg_sessions_lock);
+		return -EINVAL;
+	}
+
+	err = gk20a_perfbuf_release_locked(g, args->offset);
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	return err;
+}
+
+static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
+			  struct nvgpu_dbg_gpu_pc_sampling_args *args)
+{
+	struct channel_gk20a *ch;
+	struct gk20a *g = dbg_s->g;
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch)
+		return -EINVAL;
+
+	nvgpu_log_fn(g, " ");
+
+	return g->ops.gr.update_pc_sampling ?
+		g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
+}
+
+static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
+		struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct gr_gk20a *gr = &g->gr;
+	u32 sm_id;
+	struct channel_gk20a *ch;
+	int err = 0;
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (!ch)
+		return -EINVAL;
+
+	sm_id = args->sm_id;
+	if (sm_id >= gr->no_of_sm)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	err = gr_gk20a_elpg_protected_call(g,
+			g->ops.gr.clear_sm_error_state(g, ch, sm_id));
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int
+nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	int err = 0;
+	int ctx_resident_ch_fd = -1;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	switch (args->action) {
+	case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
+		err = g->ops.gr.suspend_contexts(g, dbg_s,
+					&ctx_resident_ch_fd);
+		break;
+
+	case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS:
+		err = g->ops.gr.resume_contexts(g, dbg_s,
+					&ctx_resident_ch_fd);
+		break;
+	}
+
+	if (ctx_resident_ch_fd < 0) {
+		args->is_resident_context = 0;
+	} else {
+		args->is_resident_context = 1;
+		args->resident_context_fd = ctx_resident_ch_fd;
+	}
+
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
+		struct nvgpu_dbg_gpu_access_fb_memory_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dma_buf *dmabuf;
+	void __user *user_buffer = (void __user *)(uintptr_t)args->buffer;
+	void *buffer;
+	u64 size, access_size, offset;
+	u64 access_limit_size = SZ_4K;
+	int err = 0;
+
+	if ((args->offset & 3) || (!args->size) || (args->size & 3))
+		return -EINVAL;
+
+	dmabuf = dma_buf_get(args->dmabuf_fd);
+	if (IS_ERR(dmabuf))
+		return -EINVAL;
+
+	if ((args->offset > dmabuf->size) ||
+	    (args->size > dmabuf->size) ||
+	    (args->offset + args->size > dmabuf->size)) {
+		err = -EINVAL;
+		goto fail_dmabuf_put;
+	}
+
+	buffer = nvgpu_big_zalloc(g, access_limit_size);
+	if (!buffer) {
+		err = -ENOMEM;
+		goto fail_dmabuf_put;
+	}
+
+	size = args->size;
+	offset = 0;
+
+	err = gk20a_busy(g);
+	if (err)
+		goto fail_free_buffer;
+
+	while (size) {
+		/* Max access size of access_limit_size in one loop */
+		access_size = min(access_limit_size, size);
+
+		if (args->cmd ==
+		    NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) {
+			err = copy_from_user(buffer, user_buffer + offset,
+					     access_size);
+			if (err)
+				goto fail_idle;
+		}
+
+		err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer,
+					 args->offset + offset, access_size,
+					 args->cmd);
+		if (err)
+			goto fail_idle;
+
+		if (args->cmd ==
+		    NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) {
+			err = copy_to_user(user_buffer + offset,
+					   buffer, access_size);
+			if (err)
+				goto fail_idle;
+		}
+
+		size -= access_size;
+		offset += access_size;
+	}
+
+fail_idle:
+	gk20a_idle(g);
+fail_free_buffer:
+	nvgpu_big_free(g, buffer);
+fail_dmabuf_put:
+	dma_buf_put(dmabuf);
+
+	return err;
+}
+
+static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
+			   struct nvgpu_dbg_gpu_profiler_reserve_args *args)
+{
+	if (args->acquire)
+		return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
+
+	return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
+}
+
+static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
+			 struct nvgpu_dbg_gpu_timeout_args *args)
+{
+	bool status;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	status = nvgpu_is_timeouts_enabled(g);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+	if (status)
+		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE;
+	else
+		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
+}
+
+/* In order to perform a context relative op the context has
+ * to be created already... which would imply that the
+ * context switch mechanism has already been put in place.
+ * So by the time we perform such an opertation it should always
+ * be possible to query for the appropriate context offsets, etc.
+ *
+ * But note: while the dbg_gpu bind requires the a channel fd,
+ * it doesn't require an allocated gr/compute obj at that point...
+ */
+static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s,
+				      struct gr_gk20a *gr)
+{
+	int err;
+
+	nvgpu_mutex_acquire(&gr->ctx_mutex);
+	err = !gr->ctx_vars.golden_image_initialized;
+	nvgpu_mutex_release(&gr->ctx_mutex);
+	if (err)
+		return false;
+	return true;
+
+}
+
+static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
+{
+	struct mm_gk20a *mm = &g->mm;
+	struct vm_gk20a *vm = mm->perfbuf.vm;
+	int err;
+
+	err = g->ops.dbg_session_ops.perfbuffer_disable(g);
+
+	nvgpu_vm_unmap(vm, offset, NULL);
+	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_vm_put(vm);
+
+	g->perfbuf.owner = NULL;
+	g->perfbuf.offset = 0;
+	return err;
+}
+
+static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj;
+	int err = 0;
+
+	nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Find matching object. */
+	prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
+
+	if (!prof_obj) {
+		nvgpu_err(g, "object not found");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (prof_obj->has_reservation)
+		g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj);
+	else {
+		nvgpu_err(g, "No reservation found");
+		err = -EINVAL;
+		goto exit;
+	}
+exit:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return err;
+}
+
+static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
+	int err = 0;
+
+	nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);
+
+	if (g->profiler_reservation_count < 0) {
+		nvgpu_err(g, "Negative reservation count!");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Find matching object. */
+	my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
+
+	if (!my_prof_obj) {
+		nvgpu_err(g, "object not found");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* If we already have the reservation, we're done */
+	if (my_prof_obj->has_reservation) {
+		err = 0;
+		goto exit;
+	}
+
+	if (my_prof_obj->ch == NULL) {
+		/* Global reservations are only allowed if there are no other
+		 * global or per-context reservations currently held
+		 */
+		if (!g->ops.dbg_session_ops.check_and_set_global_reservation(
+							dbg_s, my_prof_obj)) {
+			nvgpu_err(g,
+				"global reserve: have existing reservation");
+			err =  -EBUSY;
+		}
+	} else if (g->global_profiler_reservation_held) {
+		/* If there's a global reservation,
+		 * we can't take a per-context one.
+		 */
+		nvgpu_err(g,
+			"per-ctxt reserve: global reservation in effect");
+		err = -EBUSY;
+	} else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
+		/* TSG: check that another channel in the TSG
+		 * doesn't already have the reservation
+		 */
+		int my_tsgid = my_prof_obj->ch->tsgid;
+
+		nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+			if (prof_obj->has_reservation &&
+					(prof_obj->ch->tsgid == my_tsgid)) {
+				nvgpu_err(g,
+				    "per-ctxt reserve (tsg): already reserved");
+				err = -EBUSY;
+				goto exit;
+			}
+		}
+
+		if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
+							dbg_s, my_prof_obj)) {
+			/* Another guest OS has the global reservation */
+			nvgpu_err(g,
+				"per-ctxt reserve: global reservation in effect");
+			err = -EBUSY;
+		}
+	} else {
+		/* channel: check that some other profiler object doesn't
+		 * already have the reservation.
+		 */
+		struct channel_gk20a *my_ch = my_prof_obj->ch;
+
+		nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
+				dbg_profiler_object_data, prof_obj_entry) {
+			if (prof_obj->has_reservation &&
+						(prof_obj->ch == my_ch)) {
+				nvgpu_err(g,
+				    "per-ctxt reserve (ch): already reserved");
+				err = -EBUSY;
+				goto exit;
+			}
+		}
+
+		if (!g->ops.dbg_session_ops.check_and_set_context_reservation(
+							dbg_s, my_prof_obj)) {
+			/* Another guest OS has the global reservation */
+			nvgpu_err(g,
+				"per-ctxt reserve: global reservation in effect");
+			err = -EBUSY;
+		}
+	}
+exit:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return err;
+}
+
+static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s,
+			  struct nvgpu_dbg_gpu_unbind_channel_args *args)
+{
+	struct dbg_session_channel_data *ch_data;
+	struct gk20a *g = dbg_s->g;
+	bool channel_found = false;
+	struct channel_gk20a *ch;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d",
+		   g->name, args->channel_fd);
+
+	ch = gk20a_get_channel_from_file(args->channel_fd);
+	if (!ch) {
+		nvgpu_log_fn(g, "no channel found for fd");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+	nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list,
+				dbg_session_channel_data, ch_entry) {
+		if (ch->chid == ch_data->chid) {
+			channel_found = true;
+			break;
+		}
+	}
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+
+	if (!channel_found) {
+		nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd);
+		err = -EINVAL;
+		goto out;
+	}
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+	err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+
+out:
+	gk20a_channel_put(ch);
+	return err;
+}
+
+int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
+				 struct nvgpu_os_linux, dbg.cdev);
+	struct gk20a *g = &l->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+	return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */);
+}
+
+long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data;
+	struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s;
+	struct gk20a *g = dbg_s->g;
+	u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) ||
+	    (_IOC_NR(cmd) == 0) ||
+	    (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) ||
+	    (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	if (!g->sw_ready) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_idle(g);
+	}
+
+	/* protect from threaded user space calls */
+	nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
+
+	switch (cmd) {
+	case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
+		err = dbg_bind_channel_gk20a(dbg_s,
+			     (struct nvgpu_dbg_gpu_bind_channel_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_REG_OPS:
+		err = nvgpu_ioctl_channel_reg_ops(dbg_s,
+			   (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_POWERGATE:
+		err = nvgpu_ioctl_powergate_gk20a(dbg_s,
+			   (struct nvgpu_dbg_gpu_powergate_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL:
+		err = gk20a_dbg_gpu_events_ctrl(dbg_s,
+			   (struct nvgpu_dbg_gpu_events_ctrl_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
+		err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
+			   (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE:
+		err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s,
+			   (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
+		err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
+		       (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
+		err = gk20a_perfbuf_map(dbg_s,
+		       (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP:
+		err = gk20a_perfbuf_unmap(dbg_s,
+		       (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING:
+		err = gk20a_dbg_pc_sampling(dbg_s,
+			   (struct nvgpu_dbg_gpu_pc_sampling_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE:
+		err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s,
+		       (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_TIMEOUT:
+		err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s,
+			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT:
+		nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s,
+			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
+		err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
+		   (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE:
+		err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s,
+		  (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE:
+		err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s,
+		  (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL:
+		err = dbg_unbind_channel_gk20a(dbg_s,
+			     (struct nvgpu_dbg_gpu_unbind_channel_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS:
+		err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s,
+		      (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY:
+		err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s,
+			(struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
+		err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux,
+			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
+		err = nvgpu_ioctl_free_profiler_object(dbg_s_linux,
+			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
+		err = nvgpu_ioctl_profiler_reserve(dbg_s,
+			   (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
+		break;
+
+	default:
+		nvgpu_err(g,
+			   "unrecognized dbg gpu ioctl cmd: 0x%x",
+			   cmd);
+		err = -ENOTTY;
+		break;
+	}
+
+	nvgpu_mutex_release(&dbg_s->ioctl_lock);
+
+	nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err);
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *)arg,
+				   buf, _IOC_SIZE(cmd));
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
new file mode 100644
index 00000000..bd76045b
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
@@ -0,0 +1,54 @@
+/*
+ * Tegra GK20A GPU Debugger Driver
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef DBG_GPU_IOCTL_GK20A_H
+#define DBG_GPU_IOCTL_GK20A_H
+#include <linux/poll.h>
+
+#include "gk20a/dbg_gpu_gk20a.h"
+
+/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
+ * of regops */
+#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
+
+struct dbg_session_gk20a_linux {
+	struct device	*dev;
+	struct dbg_session_gk20a dbg_s;
+};
+
+struct dbg_session_channel_data_linux {
+	/*
+	 * We have to keep a ref to the _file_, not the channel, because
+	 * close(channel_fd) is synchronous and would deadlock if we had an
+	 * open debug session fd holding a channel ref at that time. Holding a
+	 * ref to the file makes close(channel_fd) just drop a kernel ref to
+	 * the file; the channel will close when the last file ref is dropped.
+	 */
+	struct file *ch_f;
+	struct dbg_session_channel_data ch_data;
+};
+
+/* module debug driver interface */
+int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
+int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
+long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
+
+/* used by profiler driver interface */
+int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
+
+#endif
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
new file mode 100644
index 00000000..4ef99ded
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -0,0 +1,677 @@
+/*
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <uapi/linux/nvgpu.h>
+#include <linux/anon_inodes.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/os_sched.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/tsg_gk20a.h"
+#include "gv11b/fifo_gv11b.h"
+#include "platform_gk20a.h"
+#include "ioctl_tsg.h"
+#include "ioctl_channel.h"
+#include "os_linux.h"
+
+struct tsg_private {
+	struct gk20a *g;
+	struct tsg_gk20a *tsg;
+};
+
+static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
+{
+	struct channel_gk20a *ch;
+	int err;
+
+	ch = gk20a_get_channel_from_file(ch_fd);
+	if (!ch)
+		return -EINVAL;
+
+	err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
+
+	gk20a_channel_put(ch);
+	return err;
+}
+
+static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	struct channel_gk20a *ch;
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	nvgpu_mutex_acquire(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto mutex_release;
+	}
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu");
+		goto mutex_release;
+	}
+
+	ch = gk20a_get_channel_from_file(arg->channel_fd);
+	if (!ch) {
+		err = -EINVAL;
+		goto idle;
+	}
+
+	if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
+		if ((arg->num_active_tpcs > gr->max_tpc_count) ||
+				!(arg->num_active_tpcs)) {
+			nvgpu_err(g, "Invalid num of active TPCs");
+			err = -EINVAL;
+			goto ch_put;
+		}
+		tsg->tpc_num_initialized = true;
+		tsg->num_active_tpcs = arg->num_active_tpcs;
+		tsg->tpc_pg_enabled = true;
+	} else {
+		tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
+	}
+
+	if (arg->subcontext_id < g->fifo.max_subctx_count) {
+		ch->subctx_id = arg->subcontext_id;
+	} else {
+		err = -EINVAL;
+		goto ch_put;
+	}
+
+	nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
+				ch->chid, ch->subctx_id);
+
+	/* Use runqueue selector 1 for all ASYNC ids */
+	if (ch->subctx_id > CHANNEL_INFO_VEID0)
+		ch->runqueue_sel = 1;
+
+	err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
+ch_put:
+	gk20a_channel_put(ch);
+idle:
+	gk20a_idle(g);
+mutex_release:
+	nvgpu_mutex_release(&sched->control_lock);
+	return err;
+}
+
+static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
+{
+	struct channel_gk20a *ch;
+	int err = 0;
+
+	ch = gk20a_get_channel_from_file(ch_fd);
+	if (!ch)
+		return -EINVAL;
+
+	if (ch->tsgid != tsg->tsgid) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = gk20a_tsg_unbind_channel(ch);
+
+	/*
+	 * Mark the channel timedout since channel unbound from TSG
+	 * has no context of its own so it can't serve any job
+	 */
+	ch->has_timedout = true;
+
+out:
+	gk20a_channel_put(ch);
+	return err;
+}
+
+static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
+				unsigned int event_id,
+				struct gk20a_event_id_data **event_id_data)
+{
+	struct gk20a_event_id_data *local_event_id_data;
+	bool event_found = false;
+
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
+	nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
+					gk20a_event_id_data, event_id_node) {
+		if (local_event_id_data->event_id == event_id) {
+			event_found = true;
+			break;
+		}
+	}
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
+
+	if (event_found) {
+		*event_id_data = local_event_id_data;
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+/*
+ * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
+ * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
+ */
+static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
+{
+	switch (event_id) {
+	case NVGPU_EVENT_ID_BPT_INT:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
+	case NVGPU_EVENT_ID_BPT_PAUSE:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
+	case NVGPU_EVENT_ID_BLOCKING_SYNC:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
+	case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
+	case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
+	case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
+		return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
+	}
+
+	return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
+}
+
+void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
+				       int __event_id)
+{
+	struct gk20a_event_id_data *event_id_data;
+	u32 event_id;
+	int err = 0;
+	struct gk20a *g = tsg->g;
+
+	event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
+	if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
+		return;
+
+	err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
+						&event_id_data);
+	if (err)
+		return;
+
+	nvgpu_mutex_acquire(&event_id_data->lock);
+
+	nvgpu_log_info(g,
+		"posting event for event_id=%d on tsg=%d\n",
+		event_id, tsg->tsgid);
+	event_id_data->event_posted = true;
+
+	nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
+
+	nvgpu_mutex_release(&event_id_data->lock);
+}
+
+static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct gk20a_event_id_data *event_id_data = filep->private_data;
+	struct gk20a *g = event_id_data->g;
+	u32 event_id = event_id_data->event_id;
+	struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
+
+	poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
+
+	nvgpu_mutex_acquire(&event_id_data->lock);
+
+	if (event_id_data->event_posted) {
+		nvgpu_log_info(g,
+			"found pending event_id=%d on TSG=%d\n",
+			event_id, tsg->tsgid);
+		mask = (POLLPRI | POLLIN);
+		event_id_data->event_posted = false;
+	}
+
+	nvgpu_mutex_release(&event_id_data->lock);
+
+	return mask;
+}
+
+static int gk20a_event_id_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_event_id_data *event_id_data = filp->private_data;
+	struct gk20a *g = event_id_data->g;
+	struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
+
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
+	nvgpu_list_del(&event_id_data->event_id_node);
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
+
+	nvgpu_mutex_destroy(&event_id_data->lock);
+	gk20a_put(g);
+	nvgpu_kfree(g, event_id_data);
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+const struct file_operations gk20a_event_id_ops = {
+	.owner = THIS_MODULE,
+	.poll = gk20a_event_id_poll,
+	.release = gk20a_event_id_release,
+};
+
+static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
+					 int event_id,
+					 int *fd)
+{
+	int err = 0;
+	int local_fd;
+	struct file *file;
+	char name[64];
+	struct gk20a_event_id_data *event_id_data;
+	struct gk20a *g;
+
+	g = gk20a_get(tsg->g);
+	if (!g)
+		return -ENODEV;
+
+	err = gk20a_tsg_get_event_data_from_id(tsg,
+				event_id, &event_id_data);
+	if (err == 0) {
+		/* We already have event enabled */
+		err = -EINVAL;
+		goto free_ref;
+	}
+
+	err = get_unused_fd_flags(O_RDWR);
+	if (err < 0)
+		goto free_ref;
+	local_fd = err;
+
+	snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
+		 event_id, local_fd);
+
+	file = anon_inode_getfile(name, &gk20a_event_id_ops,
+				  NULL, O_RDWR);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto clean_up;
+	}
+
+	event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
+	if (!event_id_data) {
+		err = -ENOMEM;
+		goto clean_up_file;
+	}
+	event_id_data->g = g;
+	event_id_data->id = tsg->tsgid;
+	event_id_data->event_id = event_id;
+
+	nvgpu_cond_init(&event_id_data->event_id_wq);
+	err = nvgpu_mutex_init(&event_id_data->lock);
+	if (err)
+		goto clean_up_free;
+
+	nvgpu_init_list_node(&event_id_data->event_id_node);
+
+	nvgpu_mutex_acquire(&tsg->event_id_list_lock);
+	nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
+	nvgpu_mutex_release(&tsg->event_id_list_lock);
+
+	fd_install(local_fd, file);
+	file->private_data = event_id_data;
+
+	*fd = local_fd;
+
+	return 0;
+
+clean_up_free:
+	nvgpu_kfree(g, event_id_data);
+clean_up_file:
+	fput(file);
+clean_up:
+	put_unused_fd(local_fd);
+free_ref:
+	gk20a_put(g);
+	return err;
+}
+
+static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
+		struct nvgpu_event_id_ctrl_args *args)
+{
+	int err = 0;
+	int fd = -1;
+
+	if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
+		return -EINVAL;
+
+	switch (args->cmd) {
+	case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
+		err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
+		if (!err)
+			args->event_fd = fd;
+		break;
+
+	default:
+		nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
+			   args->cmd);
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
+{
+	struct tsg_private *priv;
+	struct tsg_gk20a *tsg;
+	struct device *dev;
+	int err;
+
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
+	dev  = dev_from_gk20a(g);
+
+	nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
+
+	priv = nvgpu_kmalloc(g, sizeof(*priv));
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
+
+	tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
+	if (!tsg) {
+		nvgpu_kfree(g, priv);
+		err = -ENOMEM;
+		goto free_ref;
+	}
+
+	priv->g = g;
+	priv->tsg = tsg;
+	filp->private_data = priv;
+
+	gk20a_sched_ctrl_tsg_added(g, tsg);
+
+	return 0;
+
+free_ref:
+	gk20a_put(g);
+	return err;
+}
+
+int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l;
+	struct gk20a *g;
+	int ret;
+
+	l = container_of(inode->i_cdev,
+			 struct nvgpu_os_linux, tsg.cdev);
+	g = &l->g;
+
+	nvgpu_log_fn(g, " ");
+
+	ret = gk20a_busy(g);
+	if (ret) {
+		nvgpu_err(g, "failed to power on, %d", ret);
+		return ret;
+	}
+
+	ret = nvgpu_ioctl_tsg_open(&l->g, filp);
+
+	gk20a_idle(g);
+	nvgpu_log_fn(g, "done");
+	return ret;
+}
+
+void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
+{
+	struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
+	struct gk20a *g = tsg->g;
+
+	gk20a_sched_ctrl_tsg_removed(g, tsg);
+
+	gk20a_tsg_release(ref);
+	gk20a_put(g);
+}
+
+int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
+{
+	struct tsg_private *priv = filp->private_data;
+	struct tsg_gk20a *tsg = priv->tsg;
+
+	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+	nvgpu_kfree(tsg->g, priv);
+	return 0;
+}
+
+static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	u32 level = arg->level;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	nvgpu_mutex_acquire(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto done;
+	}
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu");
+		goto done;
+	}
+
+	level = nvgpu_get_common_runlist_level(level);
+	err = gk20a_tsg_set_runlist_interleave(tsg, level);
+
+	gk20a_idle(g);
+done:
+	nvgpu_mutex_release(&sched->control_lock);
+	return err;
+}
+
+static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	nvgpu_mutex_acquire(&sched->control_lock);
+	if (sched->control_locked) {
+		err = -EPERM;
+		goto done;
+	}
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu");
+		goto done;
+	}
+	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
+	gk20a_idle(g);
+done:
+	nvgpu_mutex_release(&sched->control_lock);
+	return err;
+}
+
+static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
+	struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
+{
+	arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
+	return 0;
+}
+
+long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct tsg_private *priv = filp->private_data;
+	struct tsg_gk20a *tsg = priv->tsg;
+	struct gk20a *g = tsg->g;
+	u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
+	    (_IOC_NR(cmd) == 0) ||
+	    (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
+	    (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	if (!g->sw_ready) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_idle(g);
+	}
+
+	switch (cmd) {
+	case NVGPU_TSG_IOCTL_BIND_CHANNEL:
+		{
+		int ch_fd = *(int *)buf;
+		if (ch_fd < 0) {
+			err = -EINVAL;
+			break;
+		}
+		err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
+		break;
+		}
+
+	case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
+	{
+		err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
+			(struct nvgpu_tsg_bind_channel_ex_args *)buf);
+		break;
+	}
+
+	case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
+		{
+		int ch_fd = *(int *)buf;
+
+		if (ch_fd < 0) {
+			err = -EINVAL;
+			break;
+		}
+		err = gk20a_busy(g);
+		if (err) {
+			nvgpu_err(g,
+			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
+			break;
+		}
+		err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
+		gk20a_idle(g);
+		break;
+		}
+
+	case NVGPU_IOCTL_TSG_ENABLE:
+		{
+		err = gk20a_busy(g);
+		if (err) {
+			nvgpu_err(g,
+			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
+			return err;
+		}
+		g->ops.fifo.enable_tsg(tsg);
+		gk20a_idle(g);
+		break;
+		}
+
+	case NVGPU_IOCTL_TSG_DISABLE:
+		{
+		err = gk20a_busy(g);
+		if (err) {
+			nvgpu_err(g,
+			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
+			return err;
+		}
+		g->ops.fifo.disable_tsg(tsg);
+		gk20a_idle(g);
+		break;
+		}
+
+	case NVGPU_IOCTL_TSG_PREEMPT:
+		{
+		err = gk20a_busy(g);
+		if (err) {
+			nvgpu_err(g,
+			   "failed to host gk20a for ioctl cmd: 0x%x", cmd);
+			return err;
+		}
+		/* preempt TSG */
+		err = g->ops.fifo.preempt_tsg(g, tsg->tsgid);
+		gk20a_idle(g);
+		break;
+		}
+
+	case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
+		{
+		err = gk20a_tsg_event_id_ctrl(g, tsg,
+			(struct nvgpu_event_id_ctrl_args *)buf);
+		break;
+		}
+
+	case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
+		err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
+			(struct nvgpu_runlist_interleave_args *)buf);
+		break;
+
+	case NVGPU_IOCTL_TSG_SET_TIMESLICE:
+		{
+		err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
+			(struct nvgpu_timeslice_args *)buf);
+		break;
+		}
+	case NVGPU_IOCTL_TSG_GET_TIMESLICE:
+		{
+		err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
+			(struct nvgpu_timeslice_args *)buf);
+		break;
+		}
+
+	default:
+		nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
+			   cmd);
+		err = -ENOTTY;
+		break;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *)arg,
+				   buf, _IOC_SIZE(cmd));
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h
new file mode 100644
index 00000000..67399fd4
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef NVGPU_IOCTL_TSG_H
+#define NVGPU_IOCTL_TSG_H
+
+struct inode;
+struct file;
+struct gk20a;
+struct nvgpu_ref;
+
+int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
+int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
+int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
+long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
+			       unsigned int cmd, unsigned long arg);
+void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/kmem.c b/drivers/gpu/nvgpu/os/linux/kmem.c
new file mode 100644
index 00000000..10946a08
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/kmem.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/stacktrace.h>
+
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+
+#include "gk20a/gk20a.h"
+
+#include "kmem_priv.h"
+
+/*
+ * Statically declared because this needs to be shared across all nvgpu driver
+ * instances. This makes sure that all kmem caches are _definitely_ uniquely
+ * named.
+ */
+static atomic_t kmem_cache_id;
+
+void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
+{
+	void *p;
+
+	if (size > PAGE_SIZE) {
+		if (clear)
+			p = nvgpu_vzalloc(g, size);
+		else
+			p = nvgpu_vmalloc(g, size);
+	} else {
+		if (clear)
+			p = nvgpu_kzalloc(g, size);
+		else
+			p = nvgpu_kmalloc(g, size);
+	}
+
+	return p;
+}
+
+void nvgpu_big_free(struct gk20a *g, void *p)
+{
+	/*
+	 * This will have to be fixed eventually. Allocs that use
+	 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
+	 * when freeing.
+	 */
+	if (is_vmalloc_addr(p))
+		nvgpu_vfree(g, p);
+	else
+		nvgpu_kfree(g, p);
+}
+
+void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+	void *alloc;
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	alloc = __nvgpu_track_kmalloc(g, size, ip);
+#else
+	alloc = kmalloc(size, GFP_KERNEL);
+#endif
+
+	kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+		 size, alloc, GFP_KERNEL);
+
+	return alloc;
+}
+
+void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+	void *alloc;
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	alloc = __nvgpu_track_kzalloc(g, size, ip);
+#else
+	alloc = kzalloc(size, GFP_KERNEL);
+#endif
+
+	kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+		 size, alloc, GFP_KERNEL);
+
+	return alloc;
+}
+
+void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
+{
+	void *alloc;
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	alloc = __nvgpu_track_kcalloc(g, n, size, ip);
+#else
+	alloc = kcalloc(n, size, GFP_KERNEL);
+#endif
+
+	kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+		 n * size, alloc, GFP_KERNEL);
+
+	return alloc;
+}
+
+void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
+{
+	void *alloc;
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	alloc = __nvgpu_track_vmalloc(g, size, ip);
+#else
+	alloc = vmalloc(size);
+#endif
+
+	kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
+
+	return alloc;
+}
+
+void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
+{
+	void *alloc;
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	alloc = __nvgpu_track_vzalloc(g, size, ip);
+#else
+	alloc = vzalloc(size);
+#endif
+
+	kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
+
+	return alloc;
+}
+
+void __nvgpu_kfree(struct gk20a *g, void *addr)
+{
+	kmem_dbg(g, "kfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	__nvgpu_track_kfree(g, addr);
+#else
+	kfree(addr);
+#endif
+}
+
+void __nvgpu_vfree(struct gk20a *g, void *addr)
+{
+	kmem_dbg(g, "vfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	__nvgpu_track_vfree(g, addr);
+#else
+	vfree(addr);
+#endif
+}
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+	nvgpu_mutex_acquire(&tracker->lock);
+}
+
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+	nvgpu_mutex_release(&tracker->lock);
+}
+
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s)
+{
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+	int i;
+
+	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
+		alloc->addr, alloc->size);
+	for (i = 0; i < alloc->stack_length; i++)
+		__pstat(s, "  %3d [<%p>] %pS\n", i,
+			(void *)alloc->stack[i],
+			(void *)alloc->stack[i]);
+	__pstat(s, "\n");
+#else
+	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
+		alloc->addr, alloc->size, alloc->ip);
+#endif
+}
+
+static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+			   struct nvgpu_mem_alloc *alloc)
+{
+	alloc->allocs_entry.key_start = alloc->addr;
+	alloc->allocs_entry.key_end = alloc->addr + alloc->size;
+
+	nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
+	return 0;
+}
+
+static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
+	struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
+{
+	struct nvgpu_mem_alloc *alloc;
+	struct nvgpu_rbtree_node *node = NULL;
+
+	nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
+	if (!node)
+		return NULL;
+
+	alloc = nvgpu_mem_alloc_from_rbtree_node(node);
+
+	nvgpu_rbtree_unlink(node, &tracker->allocs);
+
+	return alloc;
+}
+
+static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+				   unsigned long size, unsigned long real_size,
+				   u64 addr, unsigned long ip)
+{
+	int ret;
+	struct nvgpu_mem_alloc *alloc;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+	struct stack_trace stack_trace;
+#endif
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
+	if (!alloc)
+		return -ENOMEM;
+
+	alloc->owner = tracker;
+	alloc->size = size;
+	alloc->real_size = real_size;
+	alloc->addr = addr;
+	alloc->ip = (void *)(uintptr_t)ip;
+
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+	stack_trace.max_entries = MAX_STACK_TRACE;
+	stack_trace.nr_entries = 0;
+	stack_trace.entries = alloc->stack;
+	/*
+	 * This 4 here skips the 2 function calls that happen for all traced
+	 * allocs due to nvgpu:
+	 *
+	 *   __nvgpu_save_kmem_alloc+0x7c/0x128
+	 *   __nvgpu_track_kzalloc+0xcc/0xf8
+	 *
+	 * And the function calls that get made by the stack trace code itself.
+	 * If the trace savings code changes this will likely have to change
+	 * as well.
+	 */
+	stack_trace.skip = 4;
+	save_stack_trace(&stack_trace);
+	alloc->stack_length = stack_trace.nr_entries;
+#endif
+
+	nvgpu_lock_tracker(tracker);
+	tracker->bytes_alloced += size;
+	tracker->bytes_alloced_real += real_size;
+	tracker->nr_allocs++;
+
+	/* Keep track of this for building a histogram later on. */
+	if (tracker->max_alloc < size)
+		tracker->max_alloc = size;
+	if (tracker->min_alloc > size)
+		tracker->min_alloc = size;
+
+	ret = nvgpu_add_alloc(tracker, alloc);
+	if (ret) {
+		WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
+		kfree(alloc);
+		nvgpu_unlock_tracker(tracker);
+		return ret;
+	}
+	nvgpu_unlock_tracker(tracker);
+
+	return 0;
+}
+
+static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+				   u64 addr)
+{
+	struct nvgpu_mem_alloc *alloc;
+
+	nvgpu_lock_tracker(tracker);
+	alloc = nvgpu_rem_alloc(tracker, addr);
+	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
+		nvgpu_unlock_tracker(tracker);
+		return -EINVAL;
+	}
+
+	memset((void *)alloc->addr, 0, alloc->size);
+
+	tracker->nr_frees++;
+	tracker->bytes_freed += alloc->size;
+	tracker->bytes_freed_real += alloc->real_size;
+	nvgpu_unlock_tracker(tracker);
+
+	return 0;
+}
+
+static void __nvgpu_check_valloc_size(unsigned long size)
+{
+	WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
+}
+
+static void __nvgpu_check_kalloc_size(size_t size)
+{
+	WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
+}
+
+void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
+			    unsigned long ip)
+{
+	void *alloc = vmalloc(size);
+
+	if (!alloc)
+		return NULL;
+
+	__nvgpu_check_valloc_size(size);
+
+	/*
+	 * Ignore the return message. If this fails let's not cause any issues
+	 * for the rest of the driver.
+	 */
+	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+				(u64)(uintptr_t)alloc, ip);
+
+	return alloc;
+}
+
+void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
+			    unsigned long ip)
+{
+	void *alloc = vzalloc(size);
+
+	if (!alloc)
+		return NULL;
+
+	__nvgpu_check_valloc_size(size);
+
+	/*
+	 * Ignore the return message. If this fails let's not cause any issues
+	 * for the rest of the driver.
+	 */
+	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+				(u64)(uintptr_t)alloc, ip);
+
+	return alloc;
+}
+
+void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+	void *alloc = kmalloc(size, GFP_KERNEL);
+
+	if (!alloc)
+		return NULL;
+
+	__nvgpu_check_kalloc_size(size);
+
+	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+				(u64)(uintptr_t)alloc, ip);
+
+	return alloc;
+}
+
+void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+	void *alloc = kzalloc(size, GFP_KERNEL);
+
+	if (!alloc)
+		return NULL;
+
+	__nvgpu_check_kalloc_size(size);
+
+	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+				(u64)(uintptr_t)alloc, ip);
+
+	return alloc;
+}
+
+void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
+			    unsigned long ip)
+{
+	void *alloc = kcalloc(n, size, GFP_KERNEL);
+
+	if (!alloc)
+		return NULL;
+
+	__nvgpu_check_kalloc_size(n * size);
+
+	__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
+				roundup_pow_of_two(n * size),
+				(u64)(uintptr_t)alloc, ip);
+
+	return alloc;
+}
+
+void __nvgpu_track_vfree(struct gk20a *g, void *addr)
+{
+	/*
+	 * Often it is accepted practice to pass NULL pointers into free
+	 * functions to save code.
+	 */
+	if (!addr)
+		return;
+
+	__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
+
+	vfree(addr);
+}
+
+void __nvgpu_track_kfree(struct gk20a *g, void *addr)
+{
+	if (!addr)
+		return;
+
+	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
+
+	kfree(addr);
+}
+
+static int __do_check_for_outstanding_allocs(
+	struct gk20a *g,
+	struct nvgpu_mem_alloc_tracker *tracker,
+	const char *type, bool silent)
+{
+	struct nvgpu_rbtree_node *node;
+	int count = 0;
+
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		if (!silent)
+			kmem_print_mem_alloc(g, alloc, NULL);
+
+		count++;
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return count;
+}
+
+/**
+ * check_for_outstanding_allocs - Count and display outstanding allocs
+ *
+ * @g      - The GPU.
+ * @silent - If set don't print anything about the allocs.
+ *
+ * Dump (or just count) the number of allocations left outstanding.
+ */
+static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
+{
+	int count = 0;
+
+	count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
+						   silent);
+	count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
+						   silent);
+
+	return count;
+}
+
+static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
+				  void (*force_free_func)(const void *))
+{
+	struct nvgpu_rbtree_node *node;
+
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		if (force_free_func)
+			force_free_func((void *)alloc->addr);
+
+		nvgpu_rbtree_unlink(node, &tracker->allocs);
+		kfree(alloc);
+
+		nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	}
+}
+
+/**
+ * nvgpu_kmem_cleanup - Cleanup the kmem tracking
+ *
+ * @g          - The GPU.
+ * @force_free - If set will also free leaked objects if possible.
+ *
+ * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
+ * is non-zero then the allocation made by nvgpu is also freed. This is risky,
+ * though, as it is possible that the memory is still in use by other parts of
+ * the GPU driver not aware that this has happened.
+ *
+ * In theory it should be fine if the GPU driver has been deinitialized and
+ * there are no bugs in that code. However, if there are any bugs in that code
+ * then they could likely manifest as odd crashes indeterminate amounts of time
+ * in the future. So use @force_free at your own risk.
+ */
+static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
+{
+	do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
+	do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
+}
+
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+	int count;
+	bool silent, force_free;
+
+	if (!flags)
+		return;
+
+	silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
+	force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
+
+	count = check_for_outstanding_allocs(g, silent);
+	nvgpu_kmem_cleanup(g, force_free);
+
+	/*
+	 * If we leak objects we can either BUG() out or just WARN(). In general
+	 * it doesn't make sense to BUG() on here since leaking a few objects
+	 * won't crash the kernel but it can be helpful for development.
+	 *
+	 * If neither flag is set then we just silently do nothing.
+	 */
+	if (count > 0) {
+		if (flags & NVGPU_KMEM_FINI_WARN) {
+			WARN(1, "Letting %d allocs leak!!\n", count);
+		} else if (flags & NVGPU_KMEM_FINI_BUG) {
+			nvgpu_err(g, "Letting %d allocs leak!!", count);
+			BUG();
+		}
+	}
+}
+
+int nvgpu_kmem_init(struct gk20a *g)
+{
+	int err;
+
+	g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
+	g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
+
+	if (!g->vmallocs || !g->kmallocs) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	g->vmallocs->name = "vmalloc";
+	g->kmallocs->name = "kmalloc";
+
+	g->vmallocs->allocs = NULL;
+	g->kmallocs->allocs = NULL;
+
+	nvgpu_mutex_init(&g->vmallocs->lock);
+	nvgpu_mutex_init(&g->kmallocs->lock);
+
+	g->vmallocs->min_alloc = PAGE_SIZE;
+	g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
+
+	/*
+	 * This needs to go after all the other initialization since they use
+	 * the nvgpu_kzalloc() API.
+	 */
+	g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+						sizeof(struct nvgpu_mem_alloc));
+	g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+						sizeof(struct nvgpu_mem_alloc));
+
+	if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
+		err = -ENOMEM;
+		if (g->vmallocs->allocs_cache)
+			nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
+		if (g->kmallocs->allocs_cache)
+			nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	if (g->vmallocs)
+		kfree(g->vmallocs);
+	if (g->kmallocs)
+		kfree(g->kmallocs);
+	return err;
+}
+
+#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
+
+int nvgpu_kmem_init(struct gk20a *g)
+{
+	return 0;
+}
+
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+}
+#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
+
+struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
+{
+	struct nvgpu_kmem_cache *cache =
+		nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
+
+	if (!cache)
+		return NULL;
+
+	cache->g = g;
+
+	snprintf(cache->name, sizeof(cache->name),
+		 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
+		 atomic_inc_return(&kmem_cache_id));
+	cache->cache = kmem_cache_create(cache->name,
+					 size, size, 0, NULL);
+	if (!cache->cache) {
+		nvgpu_kfree(g, cache);
+		return NULL;
+	}
+
+	return cache;
+}
+
+void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
+{
+	struct gk20a *g = cache->g;
+
+	kmem_cache_destroy(cache->cache);
+	nvgpu_kfree(g, cache);
+}
+
+void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
+{
+	return kmem_cache_alloc(cache->cache, GFP_KERNEL);
+}
+
+void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
+{
+	kmem_cache_free(cache->cache, ptr);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/kmem_priv.h b/drivers/gpu/nvgpu/os/linux/kmem_priv.h
new file mode 100644
index 00000000..a41762af
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/kmem_priv.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __KMEM_PRIV_H__
+#define __KMEM_PRIV_H__
+
+#include <nvgpu/rbtree.h>
+#include <nvgpu/lock.h>
+
+struct seq_file;
+
+#define __pstat(s, fmt, msg...)				\
+	do {						\
+		if (s)					\
+			seq_printf(s, fmt, ##msg);	\
+		else					\
+			pr_info(fmt, ##msg);		\
+	} while (0)
+
+#define MAX_STACK_TRACE				20
+
+/*
+ * Linux specific version of the nvgpu_kmem_cache struct. This type is
+ * completely opaque to the rest of the driver.
+ */
+struct nvgpu_kmem_cache {
+	struct gk20a *g;
+	struct kmem_cache *cache;
+
+	/*
+	 * Memory to hold the kmem_cache unique name. Only necessary on our
+	 * k3.10 kernel when not using the SLUB allocator but it's easier to
+	 * just carry this on to newer kernels.
+	 */
+	char name[128];
+};
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+
+struct nvgpu_mem_alloc {
+	struct nvgpu_mem_alloc_tracker *owner;
+
+	void *ip;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+	unsigned long stack[MAX_STACK_TRACE];
+	int stack_length;
+#endif
+
+	u64 addr;
+
+	unsigned long size;
+	unsigned long real_size;
+
+	struct nvgpu_rbtree_node allocs_entry;
+};
+
+static inline struct nvgpu_mem_alloc *
+nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
+{
+	return (struct nvgpu_mem_alloc *)
+	((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
+};
+
+/*
+ * Linux specific tracking of vmalloc, kmalloc, etc.
+ */
+struct nvgpu_mem_alloc_tracker {
+	const char *name;
+	struct nvgpu_kmem_cache *allocs_cache;
+	struct nvgpu_rbtree_node *allocs;
+	struct nvgpu_mutex lock;
+
+	u64 bytes_alloced;
+	u64 bytes_freed;
+	u64 bytes_alloced_real;
+	u64 bytes_freed_real;
+	u64 nr_allocs;
+	u64 nr_frees;
+
+	unsigned long min_alloc;
+	unsigned long max_alloc;
+};
+
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s);
+#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
+
+#endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/log.c b/drivers/gpu/nvgpu/os/linux/log.c
new file mode 100644
index 00000000..ca29e0f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/log.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+
+#include <nvgpu/log.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "os_linux.h"
+
+/*
+ * Define a length for log buffers. This is the buffer that the 'fmt, ...' part
+ * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
+ * needs to not be overly sized since we have limited kernel stack space. But at
+ * the same time we don't want it to be restrictive either.
+ */
+#define LOG_BUFFER_LENGTH	160
+
+/*
+ * Annoying quirk of Linux: this has to be a string literal since the printk()
+ * function and friends use the preprocessor to concatenate stuff to the start
+ * of this string when printing.
+ */
+#define LOG_FMT			"nvgpu: %s %33s:%-4d [%s]  %s\n"
+
+static const char *log_types[] = {
+	"ERR",
+	"WRN",
+	"DBG",
+	"INFO",
+};
+
+int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
+{
+	return !!(g->log_mask & log_mask);
+}
+
+static inline const char *nvgpu_log_name(struct gk20a *g)
+{
+	return dev_name(dev_from_gk20a(g));
+}
+
+#ifdef CONFIG_GK20A_TRACE_PRINTK
+static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
+				     const char *func_name, int line,
+				     const char *log_type, const char *log)
+{
+	trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
+}
+#endif
+
+static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
+				     const char *func_name, int line,
+				     enum nvgpu_log_type type, const char *log)
+{
+	const char *name = gpu_name ? gpu_name : "";
+	const char *log_type = log_types[type];
+
+#ifdef CONFIG_GK20A_TRACE_PRINTK
+	if (trace)
+		return __nvgpu_trace_printk_log(trace, name, func_name,
+						line, log_type, log);
+#endif
+	switch (type) {
+	case NVGPU_DEBUG:
+		/*
+		 * We could use pr_debug() here but we control debug enablement
+		 * separately from the Linux kernel. Perhaps this is a bug in
+		 * nvgpu.
+		 */
+		pr_info(LOG_FMT, name, func_name, line, log_type, log);
+		break;
+	case NVGPU_INFO:
+		pr_info(LOG_FMT, name, func_name, line, log_type, log);
+		break;
+	case NVGPU_WARNING:
+		pr_warn(LOG_FMT, name, func_name, line, log_type, log);
+		break;
+	case NVGPU_ERROR:
+		pr_err(LOG_FMT, name, func_name, line, log_type, log);
+		break;
+	}
+}
+
+__attribute__((format (printf, 5, 6)))
+void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
+		     enum nvgpu_log_type type, const char *fmt, ...)
+{
+	char log[LOG_BUFFER_LENGTH];
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
+	va_end(args);
+
+	__nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
+				 func_name, line, type, log);
+}
+
+__attribute__((format (printf, 5, 6)))
+void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
+		     const char *func_name, int line,
+		     const char *fmt, ...)
+{
+	char log[LOG_BUFFER_LENGTH];
+	va_list args;
+
+	if ((log_mask & g->log_mask) == 0)
+		return;
+
+	va_start(args, fmt);
+	vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
+	va_end(args);
+
+	__nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
+				 func_name, line, NVGPU_DEBUG, log);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
new file mode 100644
index 00000000..af71cc81
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -0,0 +1,1365 @@
+/*
+ * GK20A Graphics
+ *
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/platform/tegra/common.h>
+#include <linux/pci.h>
+
+#include <uapi/linux/nvgpu.h>
+#include <dt-bindings/soc/gm20b-fuse.h>
+#include <dt-bindings/soc/gp10b-fuse.h>
+
+#include <soc/tegra/fuse.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/sim.h>
+
+#include "platform_gk20a.h"
+#include "sysfs.h"
+#include "vgpu/vgpu_linux.h"
+#include "scale.h"
+#include "pci.h"
+#include "module.h"
+#include "module_usermode.h"
+#include "intr.h"
+#include "ioctl.h"
+
+#include "os_linux.h"
+#include "ctxsw_trace.h"
+#include "driver_common.h"
+#include "channel.h"
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+#include "cde.h"
+#endif
+
+#define CLASS_NAME "nvidia-gpu"
+/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */
+
+#define GK20A_WAIT_FOR_IDLE_MS	2000
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gk20a.h>
+
+
+struct device_node *nvgpu_get_node(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	if (dev_is_pci(dev)) {
+		struct pci_bus *bus = to_pci_dev(dev)->bus;
+
+		while (!pci_is_root_bus(bus))
+			bus = bus->parent;
+
+		return bus->bridge->parent->of_node;
+	}
+
+	return dev->of_node;
+}
+
+void gk20a_busy_noresume(struct gk20a *g)
+{
+	pm_runtime_get_noresume(dev_from_gk20a(g));
+}
+
+int gk20a_busy(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int ret = 0;
+	struct device *dev;
+
+	if (!g)
+		return -ENODEV;
+
+	atomic_inc(&g->usage_count.atomic_var);
+
+	down_read(&l->busy_lock);
+
+	if (!gk20a_can_busy(g)) {
+		ret = -ENODEV;
+		atomic_dec(&g->usage_count.atomic_var);
+		goto fail;
+	}
+
+	dev = dev_from_gk20a(g);
+
+	if (pm_runtime_enabled(dev)) {
+		/* Increment usage count and attempt to resume device */
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0) {
+			/* Mark suspended so runtime pm will retry later */
+			pm_runtime_set_suspended(dev);
+			pm_runtime_put_noidle(dev);
+			atomic_dec(&g->usage_count.atomic_var);
+			goto fail;
+		}
+	} else {
+		nvgpu_mutex_acquire(&g->poweron_lock);
+		if (!g->power_on) {
+			ret = gk20a_gpu_is_virtual(dev) ?
+				vgpu_pm_finalize_poweron(dev)
+				: gk20a_pm_finalize_poweron(dev);
+			if (ret) {
+				atomic_dec(&g->usage_count.atomic_var);
+				nvgpu_mutex_release(&g->poweron_lock);
+				goto fail;
+			}
+		}
+		nvgpu_mutex_release(&g->poweron_lock);
+	}
+
+fail:
+	up_read(&l->busy_lock);
+
+	return ret < 0 ? ret : 0;
+}
+
+void gk20a_idle_nosuspend(struct gk20a *g)
+{
+	pm_runtime_put_noidle(dev_from_gk20a(g));
+}
+
+void gk20a_idle(struct gk20a *g)
+{
+	struct device *dev;
+
+	atomic_dec(&g->usage_count.atomic_var);
+
+	dev = dev_from_gk20a(g);
+
+	if (!(dev && gk20a_can_busy(g)))
+		return;
+
+	if (pm_runtime_enabled(dev)) {
+		pm_runtime_mark_last_busy(dev);
+		pm_runtime_put_sync_autosuspend(dev);
+	}
+}
+
+/*
+ * Undoes gk20a_lockout_registers().
+ */
+static int gk20a_restore_registers(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->regs = l->regs_saved;
+	l->bar1 = l->bar1_saved;
+
+	nvgpu_restore_usermode_registers(g);
+
+	return 0;
+}
+
+static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l)
+{
+	int err = 0;
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	err = nvgpu_cde_init_ops(l);
+#endif
+
+	return err;
+}
+
+int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
+{
+	struct gk20a *g = &l->g;
+	int err;
+
+	if (l->init_done)
+		return 0;
+
+	err = nvgpu_init_channel_support_linux(l);
+	if (err) {
+		nvgpu_err(g, "failed to init linux channel support");
+		return err;
+	}
+
+	l->init_done = true;
+
+	return 0;
+}
+
+int gk20a_pm_finalize_poweron(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->power_on)
+		return 0;
+
+	trace_gk20a_finalize_poweron(dev_name(dev));
+
+	/* Increment platform power refcount */
+	if (platform->busy) {
+		err = platform->busy(dev);
+		if (err < 0) {
+			nvgpu_err(g, "failed to poweron platform dependency");
+			return err;
+		}
+	}
+
+	err = gk20a_restore_registers(g);
+	if (err)
+		return err;
+
+	/* Enable interrupt workqueue */
+	if (!l->nonstall_work_queue) {
+		l->nonstall_work_queue = alloc_workqueue("%s",
+						WQ_HIGHPRI, 1, "mc_nonstall");
+		INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb);
+	}
+
+	err = gk20a_detect_chip(g);
+	if (err)
+		return err;
+
+	if (g->sim) {
+		if (g->sim->sim_init_late)
+			g->sim->sim_init_late(g);
+	}
+
+	err = gk20a_finalize_poweron(g);
+	if (err)
+		goto done;
+
+	err = nvgpu_finalize_poweron_linux(l);
+	if (err)
+		goto done;
+
+	nvgpu_init_mm_ce_context(g);
+
+	nvgpu_vidmem_thread_unpause(&g->mm);
+
+	/* Initialise scaling: it will initialize scaling drive only once */
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) &&
+			nvgpu_platform_is_silicon(g)) {
+		gk20a_scale_init(dev);
+		if (platform->initscale)
+			platform->initscale(dev);
+	}
+
+	trace_gk20a_finalize_poweron_done(dev_name(dev));
+
+	err = nvgpu_init_os_linux_ops(l);
+	if (err)
+		goto done;
+
+	enable_irq(g->irq_stall);
+	if (g->irq_stall != g->irq_nonstall)
+		enable_irq(g->irq_nonstall);
+	g->irqs_enabled = 1;
+
+	gk20a_scale_resume(dev_from_gk20a(g));
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	if (platform->has_cde)
+		gk20a_init_cde_support(l);
+#endif
+
+	err = gk20a_sched_ctrl_init(g);
+	if (err) {
+		nvgpu_err(g, "failed to init sched control");
+		return err;
+	}
+
+	g->sw_ready = true;
+
+done:
+	if (err)
+		g->power_on = false;
+
+	return err;
+}
+
+/*
+ * Locks out the driver from accessing GPU registers. This prevents access to
+ * thse registers after the GPU has been clock or power gated. This should help
+ * find annoying bugs where register reads and writes are silently dropped
+ * after the GPU has been turned off. On older chips these reads and writes can
+ * also lock the entire CPU up.
+ */
+static int gk20a_lockout_registers(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->regs = NULL;
+	l->bar1 = NULL;
+
+	nvgpu_lockout_usermode_registers(g);
+
+	return 0;
+}
+
+static int gk20a_pm_prepare_poweroff(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+#endif
+	int ret = 0;
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	bool irqs_enabled;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_mutex_acquire(&g->poweroff_lock);
+
+	if (!g->power_on)
+		goto done;
+
+	/* disable IRQs and wait for completion */
+	irqs_enabled = g->irqs_enabled;
+	if (irqs_enabled) {
+		disable_irq(g->irq_stall);
+		if (g->irq_stall != g->irq_nonstall)
+			disable_irq(g->irq_nonstall);
+		g->irqs_enabled = 0;
+	}
+
+	gk20a_scale_suspend(dev);
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	gk20a_cde_suspend(l);
+#endif
+
+	ret = gk20a_prepare_poweroff(g);
+	if (ret)
+		goto error;
+
+	/* Decrement platform power refcount */
+	if (platform->idle)
+		platform->idle(dev);
+
+	/* Stop CPU from accessing the GPU registers. */
+	gk20a_lockout_registers(g);
+
+	nvgpu_mutex_release(&g->poweroff_lock);
+	return 0;
+
+error:
+	/* re-enabled IRQs if previously enabled */
+	if (irqs_enabled) {
+		enable_irq(g->irq_stall);
+		if (g->irq_stall != g->irq_nonstall)
+			enable_irq(g->irq_nonstall);
+		g->irqs_enabled = 1;
+	}
+
+	gk20a_scale_resume(dev);
+done:
+	nvgpu_mutex_release(&g->poweroff_lock);
+
+	return ret;
+}
+
+static struct of_device_id tegra_gk20a_of_match[] = {
+#ifdef CONFIG_TEGRA_GK20A
+	{ .compatible = "nvidia,tegra210-gm20b",
+		.data = &gm20b_tegra_platform },
+	{ .compatible = "nvidia,tegra186-gp10b",
+		.data = &gp10b_tegra_platform },
+	{ .compatible = "nvidia,gv11b",
+		.data = &gv11b_tegra_platform },
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	{ .compatible = "nvidia,gv11b-vgpu",
+		.data = &gv11b_vgpu_tegra_platform},
+#endif
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	{ .compatible = "nvidia,tegra124-gk20a-vgpu",
+		.data = &vgpu_tegra_platform },
+#endif
+#endif
+
+	{ },
+};
+
+#ifdef CONFIG_PM
+/**
+ * __gk20a_do_idle() - force the GPU to idle and railgate
+ *
+ * In success, this call MUST be balanced by caller with __gk20a_do_unidle()
+ *
+ * Acquires two locks : &l->busy_lock and &platform->railgate_lock
+ * In success, we hold these locks and return
+ * In failure, we release these locks and return
+ */
+int __gk20a_do_idle(struct gk20a *g, bool force_reset)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct nvgpu_timeout timeout;
+	int ref_cnt;
+	int target_ref_cnt = 0;
+	bool is_railgated;
+	int err = 0;
+
+	/*
+	 * Hold back deterministic submits and changes to deterministic
+	 * channels - this must be outside the power busy locks.
+	 */
+	gk20a_channel_deterministic_idle(g);
+
+	/* acquire busy lock to block other busy() calls */
+	down_write(&l->busy_lock);
+
+	/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
+	nvgpu_mutex_acquire(&platform->railgate_lock);
+
+	/* check if it is already railgated ? */
+	if (platform->is_railgated(dev))
+		return 0;
+
+	/*
+	 * release railgate_lock, prevent suspend by incrementing usage counter,
+	 * re-acquire railgate_lock
+	 */
+	nvgpu_mutex_release(&platform->railgate_lock);
+	pm_runtime_get_sync(dev);
+
+	/*
+	 * One refcount taken in this API
+	 * If User disables rail gating, we take one more
+	 * extra refcount
+	 */
+	if (g->can_railgate)
+		target_ref_cnt = 1;
+	else
+		target_ref_cnt = 2;
+	nvgpu_mutex_acquire(&platform->railgate_lock);
+
+	nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
+			   NVGPU_TIMER_CPU_TIMER);
+
+	/* check and wait until GPU is idle (with a timeout) */
+	do {
+		nvgpu_usleep_range(1000, 1100);
+		ref_cnt = atomic_read(&dev->power.usage_count);
+	} while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout));
+
+	if (ref_cnt != target_ref_cnt) {
+		nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
+			ref_cnt);
+		goto fail_drop_usage_count;
+	}
+
+	/* check if global force_reset flag is set */
+	force_reset |= platform->force_reset_in_do_idle;
+
+	nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
+			   NVGPU_TIMER_CPU_TIMER);
+
+	if (g->can_railgate && !force_reset) {
+		/*
+		 * Case 1 : GPU railgate is supported
+		 *
+		 * if GPU is now idle, we will have only one ref count,
+		 * drop this ref which will rail gate the GPU
+		 */
+		pm_runtime_put_sync(dev);
+
+		/* add sufficient delay to allow GPU to rail gate */
+		nvgpu_msleep(g->railgate_delay);
+
+		/* check in loop if GPU is railgated or not */
+		do {
+			nvgpu_usleep_range(1000, 1100);
+			is_railgated = platform->is_railgated(dev);
+		} while (!is_railgated && !nvgpu_timeout_expired(&timeout));
+
+		if (is_railgated) {
+			return 0;
+		} else {
+			nvgpu_err(g, "failed to idle in timeout");
+			goto fail_timeout;
+		}
+	} else {
+		/*
+		 * Case 2 : GPU railgate is not supported or we explicitly
+		 * do not want to depend on runtime PM
+		 *
+		 * if GPU is now idle, call prepare_poweroff() to save the
+		 * state and then do explicit railgate
+		 *
+		 * __gk20a_do_unidle() needs to unrailgate, call
+		 * finalize_poweron(), and then call pm_runtime_put_sync()
+		 * to balance the GPU usage counter
+		 */
+
+		/* Save the GPU state */
+		err = gk20a_pm_prepare_poweroff(dev);
+		if (err)
+			goto fail_drop_usage_count;
+
+		/* railgate GPU */
+		platform->railgate(dev);
+
+		nvgpu_udelay(10);
+
+		g->forced_reset = true;
+		return 0;
+	}
+
+fail_drop_usage_count:
+	pm_runtime_put_noidle(dev);
+fail_timeout:
+	nvgpu_mutex_release(&platform->railgate_lock);
+	up_write(&l->busy_lock);
+	gk20a_channel_deterministic_unidle(g);
+	return -EBUSY;
+}
+
+/**
+ * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called
+ * from outside of GPU driver
+ *
+ * In success, this call MUST be balanced by caller with gk20a_do_unidle()
+ */
+static int gk20a_do_idle(void *_g)
+{
+	struct gk20a *g = (struct gk20a *)_g;
+
+	return __gk20a_do_idle(g, true);
+}
+
+/**
+ * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle()
+ */
+int __gk20a_do_unidle(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int err;
+
+	if (g->forced_reset) {
+		/*
+		 * If we did a forced-reset/railgate
+		 * then unrailgate the GPU here first
+		 */
+		platform->unrailgate(dev);
+
+		/* restore the GPU state */
+		err = gk20a_pm_finalize_poweron(dev);
+		if (err)
+			return err;
+
+		/* balance GPU usage counter */
+		pm_runtime_put_sync(dev);
+
+		g->forced_reset = false;
+	}
+
+	/* release the lock and open up all other busy() calls */
+	nvgpu_mutex_release(&platform->railgate_lock);
+	up_write(&l->busy_lock);
+
+	gk20a_channel_deterministic_unidle(g);
+
+	return 0;
+}
+
+/**
+ * gk20a_do_unidle() - wrap up for __gk20a_do_unidle()
+ */
+static int gk20a_do_unidle(void *_g)
+{
+	struct gk20a *g = (struct gk20a *)_g;
+
+	return __gk20a_do_unidle(g);
+}
+#endif
+
+void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
+					    struct resource **out)
+{
+	struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i);
+
+	if (!r)
+		return NULL;
+	if (out)
+		*out = r;
+	return devm_ioremap_resource(&dev->dev, r);
+}
+
+static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id)
+{
+	struct gk20a *g = dev_id;
+
+	return nvgpu_intr_stall(g);
+}
+
+static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id)
+{
+	struct gk20a *g = dev_id;
+
+	return nvgpu_intr_nonstall(g);
+}
+
+static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
+{
+	struct gk20a *g = dev_id;
+
+	return nvgpu_intr_thread_stall(g);
+}
+
+void gk20a_remove_support(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct sim_nvgpu_linux *sim_linux;
+
+	tegra_unregister_idle_unidle(gk20a_do_idle);
+
+	nvgpu_kfree(g, g->dbg_regops_tmp_buf);
+
+	nvgpu_remove_channel_support_linux(l);
+
+	if (g->pmu.remove_support)
+		g->pmu.remove_support(&g->pmu);
+
+	if (g->gr.remove_support)
+		g->gr.remove_support(&g->gr);
+
+	if (g->mm.remove_ce_support)
+		g->mm.remove_ce_support(&g->mm);
+
+	if (g->fifo.remove_support)
+		g->fifo.remove_support(&g->fifo);
+
+	if (g->mm.remove_support)
+		g->mm.remove_support(&g->mm);
+
+	if (g->sim) {
+		sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
+		if (g->sim->remove_support)
+			g->sim->remove_support(g);
+		if (sim_linux->remove_support_linux)
+			sim_linux->remove_support_linux(g);
+	}
+
+	/* free mappings to registers, etc */
+	if (l->regs) {
+		iounmap(l->regs);
+		l->regs = NULL;
+	}
+	if (l->bar1) {
+		iounmap(l->bar1);
+		l->bar1 = NULL;
+	}
+
+	nvgpu_remove_usermode_support(g);
+
+	nvgpu_free_enabled_flags(g);
+}
+
+static int gk20a_init_support(struct platform_device *dev)
+{
+	int err = -ENOMEM;
+	struct gk20a *g = get_gk20a(&dev->dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g);
+
+	l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM,
+					 &l->reg_mem);
+	if (IS_ERR(l->regs)) {
+		nvgpu_err(g, "failed to remap gk20a registers");
+		err = PTR_ERR(l->regs);
+		goto fail;
+	}
+
+	l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM,
+					 &l->bar1_mem);
+	if (IS_ERR(l->bar1)) {
+		nvgpu_err(g, "failed to remap gk20a bar1");
+		err = PTR_ERR(l->bar1);
+		goto fail;
+	}
+
+	err = nvgpu_init_sim_support_linux(g, dev);
+	if (err)
+		goto fail;
+	err = nvgpu_init_sim_support(g);
+	if (err)
+		goto fail_sim;
+
+	nvgpu_init_usermode_support(g);
+	return 0;
+
+fail_sim:
+	nvgpu_remove_sim_support_linux(g);
+fail:
+	if (l->regs) {
+		iounmap(l->regs);
+		l->regs = NULL;
+	}
+	if (l->bar1) {
+		iounmap(l->bar1);
+		l->bar1 = NULL;
+	}
+
+	return err;
+}
+
+static int gk20a_pm_railgate(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret = 0;
+	struct gk20a *g = get_gk20a(dev);
+
+	/* if platform is already railgated, then just return */
+	if (platform->is_railgated && platform->is_railgated(dev))
+		return ret;
+
+#ifdef CONFIG_DEBUG_FS
+	g->pstats.last_rail_gate_start = jiffies;
+
+	if (g->pstats.railgating_cycle_count >= 1)
+		g->pstats.total_rail_ungate_time_ms =
+			g->pstats.total_rail_ungate_time_ms +
+			jiffies_to_msecs(g->pstats.last_rail_gate_start -
+					g->pstats.last_rail_ungate_complete);
+#endif
+
+	if (platform->railgate)
+		ret = platform->railgate(dev);
+	if (ret) {
+		nvgpu_err(g, "failed to railgate platform, err=%d", ret);
+		return ret;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	g->pstats.last_rail_gate_complete = jiffies;
+#endif
+	ret = tegra_fuse_clock_disable();
+	if (ret)
+		nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret);
+
+	return ret;
+}
+
+static int gk20a_pm_unrailgate(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret = 0;
+	struct gk20a *g = get_gk20a(dev);
+
+	ret = tegra_fuse_clock_enable();
+	if (ret) {
+		nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret);
+		return ret;
+	}
+#ifdef CONFIG_DEBUG_FS
+	g->pstats.last_rail_ungate_start = jiffies;
+	if (g->pstats.railgating_cycle_count >= 1)
+		g->pstats.total_rail_gate_time_ms =
+			g->pstats.total_rail_gate_time_ms +
+			jiffies_to_msecs(g->pstats.last_rail_ungate_start -
+				g->pstats.last_rail_gate_complete);
+
+	g->pstats.railgating_cycle_count++;
+#endif
+
+	trace_gk20a_pm_unrailgate(dev_name(dev));
+
+	if (platform->unrailgate) {
+		nvgpu_mutex_acquire(&platform->railgate_lock);
+		ret = platform->unrailgate(dev);
+		nvgpu_mutex_release(&platform->railgate_lock);
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	g->pstats.last_rail_ungate_complete = jiffies;
+#endif
+
+	return ret;
+}
+
+/*
+ * Remove association of the driver with OS interrupt handler
+ */
+void nvgpu_free_irq(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	devm_free_irq(dev, g->irq_stall, g);
+	if (g->irq_stall != g->irq_nonstall)
+		devm_free_irq(dev, g->irq_nonstall, g);
+}
+
+/*
+ * Idle the GPU in preparation of shutdown/remove.
+ * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
+ * state to prevent further activity on the driver SW side.
+ * On driver removal quiesce() should be called after start_unload()
+ */
+int nvgpu_quiesce(struct gk20a *g)
+{
+	int err;
+	struct device *dev = dev_from_gk20a(g);
+
+	if (g->power_on) {
+		err = gk20a_wait_for_idle(g);
+		if (err) {
+			nvgpu_err(g, "failed to idle GPU, err=%d", err);
+			return err;
+		}
+
+		err = gk20a_fifo_disable_all_engine_activity(g, true);
+		if (err) {
+			nvgpu_err(g,
+				"failed to disable engine activity, err=%d",
+				err);
+		return err;
+		}
+
+		err = gk20a_fifo_wait_engine_idle(g);
+		if (err) {
+			nvgpu_err(g, "failed to idle engines, err=%d",
+				err);
+			return err;
+		}
+	}
+
+	if (gk20a_gpu_is_virtual(dev))
+		err = vgpu_pm_prepare_poweroff(dev);
+	else
+		err = gk20a_pm_prepare_poweroff(dev);
+
+	if (err)
+		nvgpu_err(g, "failed to prepare for poweroff, err=%d",
+			err);
+
+	return err;
+}
+
+static void gk20a_pm_shutdown(struct platform_device *pdev)
+{
+	struct gk20a_platform *platform = platform_get_drvdata(pdev);
+	struct gk20a *g = platform->g;
+	int err;
+
+	nvgpu_info(g, "shutting down");
+
+	/* vgpu has nothing to clean up currently */
+	if (gk20a_gpu_is_virtual(&pdev->dev))
+		return;
+
+	if (!g->power_on)
+		goto finish;
+
+	gk20a_driver_start_unload(g);
+
+	/* If GPU is already railgated,
+	 * just prevent more requests, and return */
+	if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
+		__pm_runtime_disable(&pdev->dev, false);
+		nvgpu_info(g, "already railgated, shut down complete");
+		return;
+	}
+
+	/* Prevent more requests by disabling Runtime PM */
+	__pm_runtime_disable(&pdev->dev, false);
+
+	err = nvgpu_quiesce(g);
+	if (err)
+		goto finish;
+
+	err = gk20a_pm_railgate(&pdev->dev);
+	if (err)
+		nvgpu_err(g, "failed to railgate, err=%d", err);
+
+finish:
+	nvgpu_info(g, "shut down complete");
+}
+
+#ifdef CONFIG_PM
+static int gk20a_pm_runtime_resume(struct device *dev)
+{
+	int err = 0;
+
+	err = gk20a_pm_unrailgate(dev);
+	if (err)
+		goto fail;
+
+	if (gk20a_gpu_is_virtual(dev))
+		err = vgpu_pm_finalize_poweron(dev);
+	else
+		err = gk20a_pm_finalize_poweron(dev);
+	if (err)
+		goto fail_poweron;
+
+	return 0;
+
+fail_poweron:
+	gk20a_pm_railgate(dev);
+fail:
+	return err;
+}
+
+static int gk20a_pm_runtime_suspend(struct device *dev)
+{
+	int err = 0;
+	struct gk20a *g = get_gk20a(dev);
+
+	if (gk20a_gpu_is_virtual(dev))
+		err = vgpu_pm_prepare_poweroff(dev);
+	else
+		err = gk20a_pm_prepare_poweroff(dev);
+	if (err) {
+		nvgpu_err(g, "failed to power off, err=%d", err);
+		goto fail;
+	}
+
+	err = gk20a_pm_railgate(dev);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	gk20a_pm_finalize_poweron(dev);
+	pm_runtime_mark_last_busy(dev);
+	return err;
+}
+
+static int gk20a_pm_suspend(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a *g = get_gk20a(dev);
+	int ret = 0;
+	int idle_usage_count = 0;
+
+	if (!g->power_on) {
+		if (!pm_runtime_enabled(dev))
+			gk20a_pm_railgate(dev);
+		return 0;
+	}
+
+	if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count)
+		return -EBUSY;
+
+	ret = gk20a_pm_runtime_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (platform->suspend)
+		platform->suspend(dev);
+
+	g->suspended = true;
+
+	return 0;
+}
+
+static int gk20a_pm_resume(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int ret = 0;
+
+	if (!g->suspended) {
+		if (!pm_runtime_enabled(dev))
+			gk20a_pm_unrailgate(dev);
+		return 0;
+	}
+
+	ret = gk20a_pm_runtime_resume(dev);
+
+	g->suspended = false;
+
+	return ret;
+}
+
+static const struct dev_pm_ops gk20a_pm_ops = {
+	.runtime_resume = gk20a_pm_runtime_resume,
+	.runtime_suspend = gk20a_pm_runtime_suspend,
+	.resume = gk20a_pm_resume,
+	.suspend = gk20a_pm_suspend,
+};
+#endif
+
+static int gk20a_pm_init(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	/*
+	 * Initialise pm runtime. For railgate disable
+	 * case, set autosuspend delay to negative which
+	 * will suspend runtime pm
+	 */
+	if (g->railgate_delay && g->can_railgate)
+		pm_runtime_set_autosuspend_delay(dev,
+				 g->railgate_delay);
+	else
+		pm_runtime_set_autosuspend_delay(dev, -1);
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_enable(dev);
+
+	return err;
+}
+
+/*
+ * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING.
+ */
+void gk20a_driver_start_unload(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
+
+	down_write(&l->busy_lock);
+	__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
+	/* GR SW ready needs to be invalidated at this time with the busy lock
+	 * held to prevent a racing condition on the gr/mm code */
+	g->gr.sw_ready = false;
+	g->sw_ready = false;
+	up_write(&l->busy_lock);
+
+	if (g->is_virtual)
+		return;
+
+	gk20a_wait_for_idle(g);
+
+	nvgpu_wait_for_deferred_interrupts(g);
+
+	if (l->nonstall_work_queue) {
+		cancel_work_sync(&l->nonstall_fn_work);
+		destroy_workqueue(l->nonstall_work_queue);
+		l->nonstall_work_queue = NULL;
+	}
+}
+
+static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a)
+{
+	gk20a_get_platform(&pdev->dev)->g = gk20a;
+}
+
+static int nvgpu_read_fuse_overrides(struct gk20a *g)
+{
+	struct device_node *np = nvgpu_get_node(g);
+	u32 *fuses;
+	int count, i;
+
+	if (!np) /* may be pcie device */
+		return 0;
+
+	count = of_property_count_elems_of_size(np, "fuse-overrides", 8);
+	if (count <= 0)
+		return count;
+
+	fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2);
+	if (!fuses)
+		return -ENOMEM;
+	of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2);
+	for (i = 0; i < count; i++) {
+		u32 fuse, value;
+
+		fuse = fuses[2 * i];
+		value = fuses[2 * i + 1];
+		switch (fuse) {
+		case GM20B_FUSE_OPT_TPC_DISABLE:
+			g->tpc_fs_mask_user = ~value;
+			break;
+		case GP10B_FUSE_OPT_ECC_EN:
+			g->gr.fecs_feature_override_ecc_val = value;
+			break;
+		default:
+			nvgpu_err(g, "ignore unknown fuse override %08x", fuse);
+			break;
+		}
+	}
+
+	nvgpu_kfree(g, fuses);
+
+	return 0;
+}
+
+static int gk20a_probe(struct platform_device *dev)
+{
+	struct nvgpu_os_linux *l = NULL;
+	struct gk20a *gk20a;
+	int err;
+	struct gk20a_platform *platform = NULL;
+	struct device_node *np;
+
+	if (dev->dev.of_node) {
+		const struct of_device_id *match;
+
+		match = of_match_device(tegra_gk20a_of_match, &dev->dev);
+		if (match)
+			platform = (struct gk20a_platform *)match->data;
+	} else
+		platform = (struct gk20a_platform *)dev->dev.platform_data;
+
+	if (!platform) {
+		dev_err(&dev->dev, "no platform data\n");
+		return -ENODATA;
+	}
+
+	platform_set_drvdata(dev, platform);
+
+	if (gk20a_gpu_is_virtual(&dev->dev))
+		return vgpu_probe(dev);
+
+	l = kzalloc(sizeof(*l), GFP_KERNEL);
+	if (!l) {
+		dev_err(&dev->dev, "couldn't allocate gk20a support");
+		return -ENOMEM;
+	}
+
+	hash_init(l->ecc_sysfs_stats_htable);
+
+	gk20a = &l->g;
+
+	nvgpu_log_fn(gk20a, " ");
+
+	nvgpu_init_gk20a(gk20a);
+	set_gk20a(dev, gk20a);
+	l->dev = &dev->dev;
+	gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;
+
+	nvgpu_kmem_init(gk20a);
+
+	err = nvgpu_init_enabled_flags(gk20a);
+	if (err)
+		goto return_err;
+
+	np = nvgpu_get_node(gk20a);
+	if (of_dma_is_coherent(np)) {
+		__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
+		__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
+	}
+
+	if (nvgpu_platform_is_simulation(gk20a))
+		__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
+
+	gk20a->irq_stall = platform_get_irq(dev, 0);
+	gk20a->irq_nonstall = platform_get_irq(dev, 1);
+	if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) {
+		err = -ENXIO;
+		goto return_err;
+	}
+
+	err = devm_request_threaded_irq(&dev->dev,
+			gk20a->irq_stall,
+			gk20a_intr_isr_stall,
+			gk20a_intr_thread_stall,
+			0, "gk20a_stall", gk20a);
+	if (err) {
+		dev_err(&dev->dev,
+			"failed to request stall intr irq @ %d\n",
+				gk20a->irq_stall);
+		goto return_err;
+	}
+	err = devm_request_irq(&dev->dev,
+			gk20a->irq_nonstall,
+			gk20a_intr_isr_nonstall,
+			0, "gk20a_nonstall", gk20a);
+	if (err) {
+		dev_err(&dev->dev,
+			"failed to request non-stall intr irq @ %d\n",
+				gk20a->irq_nonstall);
+		goto return_err;
+	}
+	disable_irq(gk20a->irq_stall);
+	if (gk20a->irq_stall != gk20a->irq_nonstall)
+		disable_irq(gk20a->irq_nonstall);
+
+	err = gk20a_init_support(dev);
+	if (err)
+		goto return_err;
+
+	err = nvgpu_read_fuse_overrides(gk20a);
+
+#ifdef CONFIG_RESET_CONTROLLER
+	platform->reset_control = devm_reset_control_get(&dev->dev, NULL);
+	if (IS_ERR(platform->reset_control))
+		platform->reset_control = NULL;
+#endif
+
+	err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class);
+	if (err)
+		goto return_err;
+
+	err = gk20a_pm_init(&dev->dev);
+	if (err) {
+		dev_err(&dev->dev, "pm init failed");
+		goto return_err;
+	}
+
+	gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
+
+	return 0;
+
+return_err:
+	nvgpu_free_enabled_flags(gk20a);
+
+	/*
+	 * Last since the above allocs may use data structures in here.
+	 */
+	nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP);
+
+	kfree(l);
+
+	return err;
+}
+
+int nvgpu_remove(struct device *dev, struct class *class)
+{
+	struct gk20a *g = get_gk20a(dev);
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+#endif
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_quiesce(g);
+	WARN(err, "gpu failed to idle during driver removal");
+
+	if (nvgpu_mem_is_valid(&g->syncpt_mem))
+		nvgpu_dma_free(g, &g->syncpt_mem);
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	if (platform->has_cde)
+		gk20a_cde_destroy(l);
+#endif
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	gk20a_ctxsw_trace_cleanup(g);
+#endif
+
+	gk20a_sched_ctrl_cleanup(g);
+
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+		gk20a_scale_exit(dev);
+
+	nvgpu_clk_arb_cleanup_arbiter(g);
+
+	gk20a_user_deinit(dev, class);
+
+	gk20a_debug_deinit(g);
+
+	nvgpu_remove_sysfs(dev);
+
+	if (platform->secure_buffer.destroy)
+		platform->secure_buffer.destroy(g,
+				&platform->secure_buffer);
+
+	if (pm_runtime_enabled(dev))
+		pm_runtime_disable(dev);
+
+	if (platform->remove)
+		platform->remove(dev);
+
+	nvgpu_log_fn(g, "removed");
+
+	return err;
+}
+
+static int __exit gk20a_remove(struct platform_device *pdev)
+{
+	int err;
+	struct device *dev = &pdev->dev;
+	struct gk20a *g = get_gk20a(dev);
+
+	if (gk20a_gpu_is_virtual(dev))
+		return vgpu_remove(pdev);
+
+	err = nvgpu_remove(dev, &nvgpu_class);
+
+	set_gk20a(pdev, NULL);
+	gk20a_put(g);
+
+	return err;
+}
+
+static struct platform_driver gk20a_driver = {
+	.probe = gk20a_probe,
+	.remove = __exit_p(gk20a_remove),
+	.shutdown = gk20a_pm_shutdown,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "gk20a",
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+#ifdef CONFIG_OF
+		.of_match_table = tegra_gk20a_of_match,
+#endif
+#ifdef CONFIG_PM
+		.pm = &gk20a_pm_ops,
+#endif
+		.suppress_bind_attrs = true,
+	}
+};
+
+struct class nvgpu_class = {
+	.owner = THIS_MODULE,
+	.name = CLASS_NAME,
+};
+
+static int __init gk20a_init(void)
+{
+
+	int ret;
+
+	ret = class_register(&nvgpu_class);
+	if (ret)
+		return ret;
+
+	ret = nvgpu_pci_init();
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&gk20a_driver);
+}
+
+static void __exit gk20a_exit(void)
+{
+	nvgpu_pci_exit();
+	platform_driver_unregister(&gk20a_driver);
+	class_unregister(&nvgpu_class);
+}
+
+MODULE_LICENSE("GPL v2");
+module_init(gk20a_init);
+module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/os/linux/module.h b/drivers/gpu/nvgpu/os/linux/module.h
new file mode 100644
index 00000000..ab4bca03
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/module.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
+#define __NVGPU_COMMON_LINUX_MODULE_H__
+
+struct gk20a;
+struct device;
+struct nvgpu_os_linux;
+
+int gk20a_pm_finalize_poweron(struct device *dev);
+int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
+void gk20a_remove_support(struct gk20a *g);
+void gk20a_driver_start_unload(struct gk20a *g);
+int nvgpu_quiesce(struct gk20a *g);
+int nvgpu_remove(struct device *dev, struct class *class);
+void nvgpu_free_irq(struct gk20a *g);
+struct device_node *nvgpu_get_node(struct gk20a *g);
+void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
+		struct resource **out);
+extern struct class nvgpu_class;
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.c b/drivers/gpu/nvgpu/os/linux/module_usermode.c
new file mode 100644
index 00000000..ea01c1b2
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/module_usermode.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/types.h>
+
+#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
+
+#include "os_linux.h"
+
+/*
+ * Locks out the driver from accessing GPU registers. This prevents access to
+ * thse registers after the GPU has been clock or power gated. This should help
+ * find annoying bugs where register reads and writes are silently dropped
+ * after the GPU has been turned off. On older chips these reads and writes can
+ * also lock the entire CPU up.
+ */
+void nvgpu_lockout_usermode_registers(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->usermode_regs = NULL;
+}
+
+/*
+ * Undoes t19x_lockout_registers().
+ */
+void nvgpu_restore_usermode_registers(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->usermode_regs = l->usermode_regs_saved;
+}
+
+void nvgpu_remove_usermode_support(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (l->usermode_regs) {
+		l->usermode_regs = NULL;
+	}
+}
+
+void nvgpu_init_usermode_support(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->usermode_regs = l->regs + usermode_cfg0_r();
+	l->usermode_regs_saved = l->usermode_regs;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.h b/drivers/gpu/nvgpu/os/linux/module_usermode.h
new file mode 100644
index 00000000..b17053ca
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/module_usermode.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_MODULE_T19X_H__
+#define __NVGPU_MODULE_T19X_H__
+
+struct gk20a;
+
+void nvgpu_init_usermode_support(struct gk20a *g);
+void nvgpu_remove_usermode_support(struct gk20a *g);
+void nvgpu_lockout_usermode_registers(struct gk20a *g);
+void nvgpu_restore_usermode_registers(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
new file mode 100644
index 00000000..93925803
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/vidmem.h>
+
+#include <nvgpu/linux/dma.h>
+#include <nvgpu/linux/vidmem.h>
+
+#include <linux/vmalloc.h>
+
+#include "os_linux.h"
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#include "platform_gk20a.h"
+
+static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	u64 ipa = sg_phys((struct scatterlist *)sgl);
+
+	if (platform->phys_addr)
+		return platform->phys_addr(g, ipa);
+
+	return ipa;
+}
+
+int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	void *cpu_va;
+	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
+		PAGE_KERNEL :
+		pgprot_writecombine(PAGE_KERNEL);
+
+	if (mem->aperture != APERTURE_SYSMEM)
+		return 0;
+
+	/*
+	 * WAR for bug 2040115: we already will always have a coherent vmap()
+	 * for all sysmem buffers. The prot settings are left alone since
+	 * eventually this should be deleted.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		return 0;
+
+	/*
+	 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
+	 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
+	 * another CPU mapping.
+	 */
+	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+		return 0;
+
+	if (WARN_ON(mem->cpu_va)) {
+		nvgpu_warn(g, "nested");
+		return -EBUSY;
+	}
+
+	cpu_va = vmap(mem->priv.pages,
+			PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
+			0, prot);
+
+	if (WARN_ON(!cpu_va))
+		return -ENOMEM;
+
+	mem->cpu_va = cpu_va;
+	return 0;
+}
+
+void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	if (mem->aperture != APERTURE_SYSMEM)
+		return;
+
+	/*
+	 * WAR for bug 2040115: skip this since the map will be taken care of
+	 * during the free in the DMA API.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		return;
+
+	/*
+	 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
+	 * already made by the DMA API.
+	 */
+	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
+		return;
+
+	vunmap(mem->cpu_va);
+	mem->cpu_va = NULL;
+}
+
+static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 r = start, *dest_u32 = *arg;
+
+	if (!l->regs) {
+		__gk20a_warn_on_no_regs();
+		return;
+	}
+
+	while (words--) {
+		*dest_u32++ = gk20a_readl(g, r);
+		r += sizeof(u32);
+	}
+
+	*arg = dest_u32;
+}
+
+u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
+{
+	u32 data = 0;
+
+	if (mem->aperture == APERTURE_SYSMEM) {
+		u32 *ptr = mem->cpu_va;
+
+		WARN_ON(!ptr);
+		data = ptr[w];
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+		nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+	} else if (mem->aperture == APERTURE_VIDMEM) {
+		u32 value;
+		u32 *p = &value;
+
+		nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+				sizeof(u32), pramin_access_batch_rd_n, &p);
+
+		data = value;
+
+	} else {
+		WARN_ON("Accessing unallocated nvgpu_mem");
+	}
+
+	return data;
+}
+
+u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
+{
+	WARN_ON(offset & 3);
+	return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
+}
+
+void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
+		u32 offset, void *dest, u32 size)
+{
+	WARN_ON(offset & 3);
+	WARN_ON(size & 3);
+
+	if (mem->aperture == APERTURE_SYSMEM) {
+		u8 *src = (u8 *)mem->cpu_va + offset;
+
+		WARN_ON(!mem->cpu_va);
+		memcpy(dest, src, size);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+		if (size)
+			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+					src, *dest, size);
+#endif
+	} else if (mem->aperture == APERTURE_VIDMEM) {
+		u32 *dest_u32 = dest;
+
+		nvgpu_pramin_access_batched(g, mem, offset, size,
+				pramin_access_batch_rd_n, &dest_u32);
+	} else {
+		WARN_ON("Accessing unallocated nvgpu_mem");
+	}
+}
+
+static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 r = start, *src_u32 = *arg;
+
+	if (!l->regs) {
+		__gk20a_warn_on_no_regs();
+		return;
+	}
+
+	while (words--) {
+		writel_relaxed(*src_u32++, l->regs + r);
+		r += sizeof(u32);
+	}
+
+	*arg = src_u32;
+}
+
+void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
+{
+	if (mem->aperture == APERTURE_SYSMEM) {
+		u32 *ptr = mem->cpu_va;
+
+		WARN_ON(!ptr);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+		nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
+#endif
+		ptr[w] = data;
+	} else if (mem->aperture == APERTURE_VIDMEM) {
+		u32 value = data;
+		u32 *p = &value;
+
+		nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
+				sizeof(u32), pramin_access_batch_wr_n, &p);
+		if (!mem->skip_wmb)
+			wmb();
+	} else {
+		WARN_ON("Accessing unallocated nvgpu_mem");
+	}
+}
+
+void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
+{
+	WARN_ON(offset & 3);
+	nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
+}
+
+void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+		void *src, u32 size)
+{
+	WARN_ON(offset & 3);
+	WARN_ON(size & 3);
+
+	if (mem->aperture == APERTURE_SYSMEM) {
+		u8 *dest = (u8 *)mem->cpu_va + offset;
+
+		WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+		if (size)
+			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
+					dest, *src, size);
+#endif
+		memcpy(dest, src, size);
+	} else if (mem->aperture == APERTURE_VIDMEM) {
+		u32 *src_u32 = src;
+
+		nvgpu_pramin_access_batched(g, mem, offset, size,
+				pramin_access_batch_wr_n, &src_u32);
+		if (!mem->skip_wmb)
+			wmb();
+	} else {
+		WARN_ON("Accessing unallocated nvgpu_mem");
+	}
+}
+
+static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	u32 r = start, repeat = **arg;
+
+	if (!l->regs) {
+		__gk20a_warn_on_no_regs();
+		return;
+	}
+
+	while (words--) {
+		writel_relaxed(repeat, l->regs + r);
+		r += sizeof(u32);
+	}
+}
+
+void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
+		u32 c, u32 size)
+{
+	WARN_ON(offset & 3);
+	WARN_ON(size & 3);
+	WARN_ON(c & ~0xff);
+
+	c &= 0xff;
+
+	if (mem->aperture == APERTURE_SYSMEM) {
+		u8 *dest = (u8 *)mem->cpu_va + offset;
+
+		WARN_ON(!mem->cpu_va);
+#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
+		if (size)
+			nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
+				dest, c, size);
+#endif
+		memset(dest, c, size);
+	} else if (mem->aperture == APERTURE_VIDMEM) {
+		u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
+		u32 *p = &repeat_value;
+
+		nvgpu_pramin_access_batched(g, mem, offset, size,
+				pramin_access_batch_set, &p);
+		if (!mem->skip_wmb)
+			wmb();
+	} else {
+		WARN_ON("Accessing unallocated nvgpu_mem");
+	}
+}
+
+/*
+ * Obtain a SYSMEM address from a Linux SGL. This should eventually go away
+ * and/or become private to this file once all bad usages of Linux SGLs are
+ * cleaned up in the driver.
+ */
+u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
+{
+	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
+	    !nvgpu_iommuable(g))
+		return g->ops.mm.gpu_phys_addr(g, NULL,
+			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
+
+	if (sg_dma_address(sgl) == 0)
+		return g->ops.mm.gpu_phys_addr(g, NULL,
+			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
+
+	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
+		return 0;
+
+	return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
+}
+
+/*
+ * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
+ * allocation.
+ */
+static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
+}
+
+/*
+ * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
+ * allocation.
+ *
+ * Note: this API does not make sense to use for _VIDMEM_ buffers with greater
+ * than one scatterlist chunk. If there's more than one scatterlist chunk then
+ * the buffer will not be contiguous. As such the base address probably isn't
+ * very useful. This is true for SYSMEM as well, if there's no IOMMU.
+ *
+ * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
+ * an IOMMU present and enabled for the GPU.
+ *
+ * %attrs can be NULL. If it is not NULL then it may be inspected to determine
+ * if the address needs to be modified before writing into a PTE.
+ */
+u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct nvgpu_page_alloc *alloc;
+
+	if (mem->aperture == APERTURE_SYSMEM)
+		return nvgpu_mem_get_addr_sysmem(g, mem);
+
+	/*
+	 * Otherwise get the vidmem address.
+	 */
+	alloc = mem->vidmem_alloc;
+
+	/* This API should not be used with > 1 chunks */
+	WARN_ON(alloc->nr_chunks != 1);
+
+	return alloc->base;
+}
+
+/*
+ * This should only be used on contiguous buffers regardless of whether
+ * there's an IOMMU present/enabled. This applies to both SYSMEM and
+ * VIDMEM.
+ */
+u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	/*
+	 * For a VIDMEM buf, this is identical to simply get_addr() so just fall
+	 * back to that.
+	 */
+	if (mem->aperture == APERTURE_VIDMEM)
+		return nvgpu_mem_get_addr(g, mem);
+
+	return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
+}
+
+/*
+ * Be careful how you use this! You are responsible for correctly freeing this
+ * memory.
+ */
+int nvgpu_mem_create_from_mem(struct gk20a *g,
+			      struct nvgpu_mem *dest, struct nvgpu_mem *src,
+			      int start_page, int nr_pages)
+{
+	int ret;
+	u64 start = start_page * PAGE_SIZE;
+	u64 size = nr_pages * PAGE_SIZE;
+	dma_addr_t new_iova;
+
+	if (src->aperture != APERTURE_SYSMEM)
+		return -EINVAL;
+
+	/* Some silly things a caller might do... */
+	if (size > src->size)
+		return -EINVAL;
+	if ((start + size) > src->size)
+		return -EINVAL;
+
+	dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
+	dest->aperture  = src->aperture;
+	dest->skip_wmb  = src->skip_wmb;
+	dest->size      = size;
+
+	/*
+	 * Re-use the CPU mapping only if the mapping was made by the DMA API.
+	 *
+	 * Bug 2040115: the DMA API wrapper makes the mapping that we should
+	 * re-use.
+	 */
+	if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
+	    nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
+
+	dest->priv.pages = src->priv.pages + start_page;
+	dest->priv.flags = src->priv.flags;
+
+	new_iova = sg_dma_address(src->priv.sgt->sgl) ?
+		sg_dma_address(src->priv.sgt->sgl) + start : 0;
+
+	/*
+	 * Make a new SG table that is based only on the subset of pages that
+	 * is passed to us. This table gets freed by the dma free routines.
+	 */
+	if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
+						   src->priv.pages + start_page,
+						   new_iova, size);
+	else
+		ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
+					new_iova, size);
+
+	return ret;
+}
+
+int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
+				  struct page **pages, int nr_pages)
+{
+	struct sg_table *sgt;
+	struct page **our_pages =
+		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+
+	if (!our_pages)
+		return -ENOMEM;
+
+	memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
+
+	if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
+					 nr_pages * PAGE_SIZE)) {
+		nvgpu_kfree(g, our_pages);
+		return -ENOMEM;
+	}
+
+	/*
+	 * If we are making an SGT from physical pages we can be reasonably
+	 * certain that this should bypass the SMMU - thus we set the DMA (aka
+	 * IOVA) address to 0. This tells the GMMU mapping code to not make a
+	 * mapping directed to the SMMU.
+	 */
+	sg_dma_address(sgt->sgl) = 0;
+
+	dest->mem_flags  = __NVGPU_MEM_FLAG_NO_DMA;
+	dest->aperture   = APERTURE_SYSMEM;
+	dest->skip_wmb   = 0;
+	dest->size       = PAGE_SIZE * nr_pages;
+
+	dest->priv.flags = 0;
+	dest->priv.pages = our_pages;
+	dest->priv.sgt   = sgt;
+
+	return 0;
+}
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
+				 u64 src_phys, int nr_pages)
+{
+	struct page **pages =
+		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
+	int i, ret = 0;
+
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_pages; i++)
+		pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
+
+	ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
+	nvgpu_kfree(g, pages);
+
+	return ret;
+}
+#endif
+
+static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
+{
+	return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
+}
+
+static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
+{
+	return (u64)__nvgpu_sgl_phys(g, sgl);
+}
+
+static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
+{
+	return (u64)sg_dma_address((struct scatterlist *)sgl);
+}
+
+static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
+{
+	return (u64)((struct scatterlist *)sgl)->length;
+}
+
+static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
+					struct nvgpu_sgl *sgl,
+					struct nvgpu_gmmu_attrs *attrs)
+{
+	if (sg_dma_address((struct scatterlist *)sgl) == 0)
+		return g->ops.mm.gpu_phys_addr(g, attrs,
+				__nvgpu_sgl_phys(g, sgl));
+
+	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
+		return 0;
+
+	return nvgpu_mem_iommu_translate(g,
+				sg_dma_address((struct scatterlist *)sgl));
+}
+
+static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
+					  struct nvgpu_sgt *sgt)
+{
+	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
+		return false;
+	return true;
+}
+
+static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+	/*
+	 * Free this SGT. All we do is free the passed SGT. The actual Linux
+	 * SGT/SGL needs to be freed separately.
+	 */
+	nvgpu_kfree(g, sgt);
+}
+
+static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
+	.sgl_next      = nvgpu_mem_linux_sgl_next,
+	.sgl_phys      = nvgpu_mem_linux_sgl_phys,
+	.sgl_dma       = nvgpu_mem_linux_sgl_dma,
+	.sgl_length    = nvgpu_mem_linux_sgl_length,
+	.sgl_gpu_addr  = nvgpu_mem_linux_sgl_gpu_addr,
+	.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
+	.sgt_free      = nvgpu_mem_linux_sgl_free,
+};
+
+static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
+	struct gk20a *g,
+	struct scatterlist *linux_sgl)
+{
+	struct nvgpu_page_alloc *vidmem_alloc;
+
+	vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
+	if (!vidmem_alloc)
+		return NULL;
+
+	return &vidmem_alloc->sgt;
+}
+
+struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
+{
+	struct nvgpu_sgt *nvgpu_sgt;
+	struct scatterlist *linux_sgl = sgt->sgl;
+
+	if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
+		return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
+
+	nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
+	if (!nvgpu_sgt)
+		return NULL;
+
+	nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
+
+	nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
+	nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
+
+	return nvgpu_sgt;
+}
+
+struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
+					    struct nvgpu_mem *mem)
+{
+	return nvgpu_linux_sgt_create(g, mem->priv.sgt);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/nvhost.c b/drivers/gpu/nvgpu/os/linux/nvhost.c
new file mode 100644
index 00000000..6ab60248
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvhost.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/nvhost.h>
+#include <linux/nvhost_t194.h>
+#include <linux/nvhost_ioctl.h>
+#include <linux/of_platform.h>
+
+#include <nvgpu/nvhost.h>
+
+#include "nvhost_priv.h"
+
+#include "gk20a/gk20a.h"
+#include "os_linux.h"
+#include "module.h"
+
+int nvgpu_get_nvhost_dev(struct gk20a *g)
+{
+	struct device_node *np = nvgpu_get_node(g);
+	struct platform_device *host1x_pdev = NULL;
+	const __be32 *host1x_ptr;
+
+	host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
+	if (host1x_ptr) {
+		struct device_node *host1x_node =
+			of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
+
+		host1x_pdev = of_find_device_by_node(host1x_node);
+		if (!host1x_pdev) {
+			nvgpu_warn(g, "host1x device not available");
+			return -EPROBE_DEFER;
+		}
+
+	} else {
+		if (g->has_syncpoints) {
+			nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
+			g->has_syncpoints = false;
+		}
+		return 0;
+	}
+
+	g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
+	if (!g->nvhost_dev)
+		return -ENOMEM;
+
+	g->nvhost_dev->host1x_pdev = host1x_pdev;
+
+	return 0;
+}
+
+void nvgpu_free_nvhost_dev(struct gk20a *g)
+{
+	nvgpu_kfree(g, g->nvhost_dev);
+}
+
+int nvgpu_nvhost_module_busy_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev)
+{
+	return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
+}
+
+void nvgpu_nvhost_module_idle_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev)
+{
+	nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
+}
+
+void nvgpu_nvhost_debug_dump_device(
+	struct nvgpu_nvhost_dev *nvhost_dev)
+{
+	nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
+}
+
+const char *nvgpu_nvhost_syncpt_get_name(
+	struct nvgpu_nvhost_dev *nvhost_dev, int id)
+{
+	return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
+}
+
+bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
+}
+
+int nvgpu_nvhost_syncpt_is_expired_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
+{
+	return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
+			id, thresh);
+}
+
+u32 nvgpu_nvhost_syncpt_incr_max_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
+{
+	return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
+}
+
+int nvgpu_nvhost_intr_register_notifier(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
+	void (*callback)(void *, int), void *private_data)
+{
+	return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
+			id, thresh,
+			callback, private_data);
+}
+
+void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
+}
+
+void nvgpu_nvhost_syncpt_put_ref_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
+}
+
+u32 nvgpu_nvhost_get_syncpt_host_managed(
+	struct nvgpu_nvhost_dev *nvhost_dev,
+	u32 param, const char *syncpt_name)
+{
+	return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
+			param, syncpt_name);
+}
+
+u32 nvgpu_nvhost_get_syncpt_client_managed(
+	struct nvgpu_nvhost_dev *nvhost_dev,
+	const char *syncpt_name)
+{
+	return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
+			syncpt_name);
+}
+
+int nvgpu_nvhost_syncpt_wait_timeout_ext(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
+	u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
+{
+	return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
+		id, thresh, timeout, value, ts);
+}
+
+int nvgpu_nvhost_syncpt_read_ext_check(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
+{
+	return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
+}
+
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
+}
+
+void nvgpu_nvhost_syncpt_set_safe_state(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	u32 val;
+
+	/*
+	 * Add large number of increments to current value
+	 * so that all waiters on this syncpoint are released
+	 *
+	 * We don't expect any case where more than 0x10000 increments
+	 * are pending
+	 */
+	val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
+	val += 0x10000;
+
+	nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
+	nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
+}
+
+int nvgpu_nvhost_create_symlink(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+	int err = 0;
+
+	if (g->nvhost_dev &&
+			(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
+		err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
+				&dev->kobj,
+				dev_name(dev));
+	}
+
+	return err;
+}
+
+void nvgpu_nvhost_remove_symlink(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	if (g->nvhost_dev &&
+			(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
+		sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
+				  dev_name(dev));
+	}
+}
+
+#ifdef CONFIG_SYNC
+u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
+{
+	return nvhost_sync_pt_id(pt);
+}
+
+u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
+{
+	return nvhost_sync_pt_thresh(pt);
+}
+
+struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
+{
+	return nvhost_sync_fdget(fd);
+}
+
+int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
+{
+	return nvhost_sync_num_pts(fence);
+}
+
+struct sync_fence *nvgpu_nvhost_sync_create_fence(
+	struct nvgpu_nvhost_dev *nvhost_dev,
+	u32 id, u32 thresh, const char *name)
+{
+	struct nvhost_ctrl_sync_fence_info pt = {
+		.id = id,
+		.thresh = thresh,
+	};
+
+	return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
+}
+#endif /* CONFIG_SYNC */
+
+#ifdef CONFIG_TEGRA_T19X_GRHOST
+int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
+		struct nvgpu_nvhost_dev *nvhost_dev,
+		u64 *base, size_t *size)
+{
+	return nvhost_syncpt_unit_interface_get_aperture(
+		nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
+}
+
+u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
+{
+	return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
+}
+
+int nvgpu_nvhost_syncpt_init(struct gk20a *g)
+{
+	int err = 0;
+
+	if (!g->has_syncpoints)
+		return -ENOSYS;
+
+	err = nvgpu_get_nvhost_dev(g);
+	if (err) {
+		nvgpu_err(g, "host1x device not available");
+		g->has_syncpoints = false;
+		return -ENOSYS;
+	}
+
+	err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
+			g->nvhost_dev,
+			&g->syncpt_unit_base,
+			&g->syncpt_unit_size);
+	if (err) {
+		nvgpu_err(g, "Failed to get syncpt interface");
+		g->has_syncpoints = false;
+		return -ENOSYS;
+	}
+
+	g->syncpt_size =
+			nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
+	nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
+			g->syncpt_unit_base, g->syncpt_unit_size,
+			g->syncpt_size);
+
+	return 0;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/nvhost_priv.h b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h
new file mode 100644
index 00000000..c03390a7
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_NVHOST_PRIV_H__
+#define __NVGPU_NVHOST_PRIV_H__
+
+struct nvgpu_nvhost_dev {
+	struct platform_device *host1x_pdev;
+};
+
+#endif /* __NVGPU_NVHOST_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/nvlink.c b/drivers/gpu/nvgpu/os/linux/nvlink.c
new file mode 100644
index 00000000..c93514c0
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/nvlink.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <gk20a/gk20a.h>
+#include <nvgpu/nvlink.h>
+#include <nvgpu/enabled.h>
+#include "module.h"
+
+#ifdef CONFIG_TEGRA_NVLINK
+int nvgpu_nvlink_read_dt_props(struct gk20a *g)
+{
+	struct device_node *np;
+	struct nvlink_device *ndev = g->nvlink.priv;
+	u32 local_dev_id;
+	u32 local_link_id;
+	u32 remote_dev_id;
+	u32 remote_link_id;
+	bool is_master;
+
+	/* Parse DT */
+	np = nvgpu_get_node(g);
+	if (!np)
+		goto fail;
+
+	np = of_get_child_by_name(np, "nvidia,nvlink");
+	if (!np)
+		goto fail;
+
+	np = of_get_child_by_name(np, "endpoint");
+	if (!np)
+		goto fail;
+
+	/* Parse DT structure to detect endpoint topology */
+	of_property_read_u32(np, "local_dev_id", &local_dev_id);
+	of_property_read_u32(np, "local_link_id", &local_link_id);
+	of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
+	of_property_read_u32(np, "remote_link_id", &remote_link_id);
+	is_master = of_property_read_bool(np, "is_master");
+
+	/* Check that we are in dGPU mode */
+	if (local_dev_id != NVLINK_ENDPT_GV100) {
+		nvgpu_err(g, "Local nvlink device is not dGPU");
+		return -EINVAL;
+	}
+
+	ndev->is_master = is_master;
+	ndev->device_id = local_dev_id;
+	ndev->link.link_id = local_link_id;
+	ndev->link.remote_dev_info.device_id = remote_dev_id;
+	ndev->link.remote_dev_info.link_id = remote_link_id;
+
+	return 0;
+
+fail:
+	nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
+	return -ENODEV;
+}
+#endif /* CONFIG_TEGRA_NVLINK */
+
+void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
+{
+		/* MSS_NVLINK_1_BASE */
+		void __iomem *soc1 = ioremap(0x01f20010, 4096);
+		/* MSS_NVLINK_2_BASE */
+		void __iomem *soc2 = ioremap(0x01f40010, 4096);
+		/* MSS_NVLINK_3_BASE */
+		void __iomem *soc3 = ioremap(0x01f60010, 4096);
+		/* MSS_NVLINK_4_BASE */
+		void __iomem *soc4 = ioremap(0x01f80010, 4096);
+		u32 val;
+
+		nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
+
+		val = readl_relaxed(soc1);
+		writel_relaxed(val, soc1);
+		val = readl_relaxed(soc1 + 4);
+		writel_relaxed(val, soc1 + 4);
+
+		val = readl_relaxed(soc2);
+		writel_relaxed(val, soc2);
+		val = readl_relaxed(soc2 + 4);
+		writel_relaxed(val, soc2 + 4);
+
+		val = readl_relaxed(soc3);
+		writel_relaxed(val, soc3);
+		val = readl_relaxed(soc3 + 4);
+		writel_relaxed(val, soc3 + 4);
+
+		val = readl_relaxed(soc4);
+		writel_relaxed(val, soc4);
+		val = readl_relaxed(soc4 + 4);
+		writel_relaxed(val, soc4 + 4);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android.c b/drivers/gpu/nvgpu/os/linux/os_fence_android.c
new file mode 100644
index 00000000..9be8c6c0
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/os_fence_android.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+
+#include "gk20a/gk20a.h"
+
+#include "../drivers/staging/android/sync.h"
+
+inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
+{
+	struct sync_fence *fence = (struct sync_fence *)s->priv;
+	return fence;
+}
+
+static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
+{
+	fence_out->priv = NULL;
+	fence_out->g = NULL;
+	fence_out->ops = NULL;
+}
+
+void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
+	struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
+	struct sync_fence *fence)
+{
+	fence_out->g = g;
+	fence_out->ops = fops;
+	fence_out->priv = (void *)fence;
+}
+
+void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
+{
+	struct sync_fence *fence = nvgpu_get_sync_fence(s);
+
+	sync_fence_put(fence);
+
+	nvgpu_os_fence_clear(s);
+}
+
+void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
+{
+	struct sync_fence *fence = nvgpu_get_sync_fence(s);
+
+	sync_fence_get(fence);
+	sync_fence_install(fence, fd);
+}
+
+int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	int err = -ENOSYS;
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
+#endif
+
+	if (err)
+		err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
+
+	if (err)
+		nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c
new file mode 100644
index 00000000..25832417
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/errno.h>
+
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+#include <nvgpu/semaphore.h>
+
+#include "gk20a/channel_sync_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "sync_sema_android.h"
+
+#include "../drivers/staging/android/sync.h"
+
+int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
+	struct priv_cmd_entry *wait_cmd,
+	struct channel_gk20a *c,
+	int max_wait_cmds)
+{
+	int err;
+	int wait_cmd_size;
+	int num_wait_cmds;
+	int i;
+	struct nvgpu_semaphore *sema;
+	struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
+
+	wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
+
+	num_wait_cmds = sync_fence->num_fences;
+	if (num_wait_cmds == 0)
+		return 0;
+
+	if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
+		return -EINVAL;
+
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+		wait_cmd_size * num_wait_cmds,
+		wait_cmd);
+	if (err) {
+		nvgpu_err(c->g, "not enough priv cmd buffer space");
+		return err;
+	}
+
+	for (i = 0; i < num_wait_cmds; i++) {
+		struct fence *f = sync_fence->cbs[i].sync_pt;
+		struct sync_pt *pt = sync_pt_from_fence(f);
+
+		sema = gk20a_sync_pt_sema(pt);
+		gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
+			wait_cmd_size, i);
+	}
+
+	return 0;
+}
+
+static const struct nvgpu_os_fence_ops sema_ops = {
+	.program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
+	.drop_ref = nvgpu_os_fence_android_drop_ref,
+	.install_fence = nvgpu_os_fence_android_install_fd,
+};
+
+int nvgpu_os_fence_sema_create(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c,
+	struct nvgpu_semaphore *sema)
+{
+	struct sync_fence *fence;
+
+	fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
+			nvgpu_semaphore_gpu_ro_va(sema));
+
+	if (!fence) {
+		nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
+			(u32)nvgpu_semaphore_gpu_ro_va(sema));
+
+		return -ENOMEM;
+	}
+
+	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
+
+	return 0;
+}
+
+int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
+
+	if (!fence)
+		return -EINVAL;
+
+	nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c
new file mode 100644
index 00000000..d7a72fcd
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/errno.h>
+
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+#include <nvgpu/nvhost.h>
+#include <nvgpu/atomic.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/channel_sync_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "../drivers/staging/android/sync.h"
+
+int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
+	struct priv_cmd_entry *wait_cmd,
+	struct channel_gk20a *c,
+	int max_wait_cmds)
+{
+	int err;
+	int wait_cmd_size;
+	int num_wait_cmds;
+	int i;
+	u32 wait_id;
+	struct sync_pt *pt;
+
+	struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
+
+	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
+		return -EINVAL;
+
+	/* validate syncpt ids */
+	for (i = 0; i < sync_fence->num_fences; i++) {
+		pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
+		wait_id = nvgpu_nvhost_sync_pt_id(pt);
+		if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
+					c->g->nvhost_dev, wait_id)) {
+			return -EINVAL;
+		}
+	}
+
+	num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
+	if (num_wait_cmds == 0)
+		return 0;
+
+	wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+		wait_cmd_size * num_wait_cmds, wait_cmd);
+	if (err) {
+		nvgpu_err(c->g,
+			"not enough priv cmd buffer space");
+		return err;
+	}
+
+	for (i = 0; i < sync_fence->num_fences; i++) {
+		struct fence *f = sync_fence->cbs[i].sync_pt;
+		struct sync_pt *pt = sync_pt_from_fence(f);
+		u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
+		u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
+
+		err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
+			wait_cmd, wait_cmd_size, i, true);
+	}
+
+	WARN_ON(i != num_wait_cmds);
+
+	return 0;
+}
+
+static const struct nvgpu_os_fence_ops syncpt_ops = {
+	.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
+	.drop_ref = nvgpu_os_fence_android_drop_ref,
+	.install_fence = nvgpu_os_fence_android_install_fd,
+};
+
+int nvgpu_os_fence_syncpt_create(
+	struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
+{
+	struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
+		nvhost_dev, id, thresh, "fence");
+
+	if (!fence) {
+		nvgpu_err(c->g, "error constructing fence %s", "fence");
+		return -ENOMEM;
+	}
+
+	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
+
+	return 0;
+}
+
+int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
+
+	if (!fence)
+		return -ENOMEM;
+
+	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
+
+	return 0;
+}
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h
new file mode 100644
index 00000000..4dcce322
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/os_linux.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVGPU_OS_LINUX_H
+#define NVGPU_OS_LINUX_H
+
+#include <linux/cdev.h>
+#include <linux/iommu.h>
+#include <linux/hashtable.h>
+
+#include "gk20a/gk20a.h"
+#include "cde.h"
+#include "sched.h"
+
+struct nvgpu_os_linux_ops {
+	struct {
+		void (*get_program_numbers)(struct gk20a *g,
+					    u32 block_height_log2,
+					    u32 shader_parameter,
+					    int *hprog, int *vprog);
+		bool (*need_scatter_buffer)(struct gk20a *g);
+		int (*populate_scatter_buffer)(struct gk20a *g,
+					       struct sg_table *sgt,
+					       size_t surface_size,
+					       void *scatter_buffer_ptr,
+					       size_t scatter_buffer_size);
+	} cde;
+};
+
+struct nvgpu_os_linux {
+	struct gk20a g;
+	struct device *dev;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} channel;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} ctrl;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} as_dev;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} dbg;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} prof;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} tsg;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} ctxsw;
+
+	struct {
+		struct cdev cdev;
+		struct device *node;
+	} sched;
+
+	dev_t cdev_region;
+
+	struct devfreq *devfreq;
+
+	struct device_dma_parameters dma_parms;
+
+	atomic_t hw_irq_stall_count;
+	atomic_t hw_irq_nonstall_count;
+
+	struct nvgpu_cond sw_irq_stall_last_handled_wq;
+	atomic_t sw_irq_stall_last_handled;
+
+	atomic_t nonstall_ops;
+
+	struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
+	atomic_t sw_irq_nonstall_last_handled;
+
+	struct work_struct nonstall_fn_work;
+	struct workqueue_struct *nonstall_work_queue;
+
+	struct resource *reg_mem;
+	void __iomem *regs;
+	void __iomem *regs_saved;
+
+	struct resource *bar1_mem;
+	void __iomem *bar1;
+	void __iomem *bar1_saved;
+
+	void __iomem *usermode_regs;
+	void __iomem *usermode_regs_saved;
+
+	struct nvgpu_os_linux_ops ops;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+	struct dentry *debugfs_alias;
+
+	struct dentry *debugfs_ltc_enabled;
+	struct dentry *debugfs_timeouts_enabled;
+	struct dentry *debugfs_gr_idle_timeout_default;
+	struct dentry *debugfs_disable_bigpage;
+	struct dentry *debugfs_gr_default_attrib_cb_size;
+
+	struct dentry *debugfs_timeslice_low_priority_us;
+	struct dentry *debugfs_timeslice_medium_priority_us;
+	struct dentry *debugfs_timeslice_high_priority_us;
+	struct dentry *debugfs_runlist_interleave;
+	struct dentry *debugfs_allocators;
+	struct dentry *debugfs_xve;
+	struct dentry *debugfs_kmem;
+	struct dentry *debugfs_hal;
+
+	struct dentry *debugfs_force_preemption_cilp;
+	struct dentry *debugfs_force_preemption_gfxp;
+	struct dentry *debugfs_dump_ctxsw_stats;
+#endif
+	DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
+
+	struct gk20a_cde_app cde_app;
+
+	struct rw_semaphore busy_lock;
+
+	struct gk20a_sched_ctrl sched_ctrl;
+
+	bool init_done;
+};
+
+static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
+{
+	return container_of(g, struct nvgpu_os_linux, g);
+}
+
+static inline struct device *dev_from_gk20a(struct gk20a *g)
+{
+	return nvgpu_os_linux_from_gk20a(g)->dev;
+}
+
+#define INTERFACE_NAME "nvhost%s-gpu"
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/os_sched.c b/drivers/gpu/nvgpu/os/linux/os_sched.c
new file mode 100644
index 00000000..586b35eb
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/os_sched.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <nvgpu/os_sched.h>
+
+#include <linux/sched.h>
+
+int nvgpu_current_tid(struct gk20a *g)
+{
+	return current->pid;
+}
+
+int nvgpu_current_pid(struct gk20a *g)
+{
+	return current->tgid;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c
new file mode 100644
index 00000000..1011b441
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/pci.c
@@ -0,0 +1,861 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+
+#include <nvgpu/nvhost.h>
+#include <nvgpu/nvgpu_common.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/nvlink.h>
+#include <nvgpu/soc.h>
+
+#include "gk20a/gk20a.h"
+#include "clk/clk.h"
+#include "clk/clk_mclk.h"
+#include "module.h"
+#include "intr.h"
+#include "sysfs.h"
+#include "os_linux.h"
+#include "platform_gk20a.h"
+#include <nvgpu/sim.h>
+
+#include "pci.h"
+#include "pci_usermode.h"
+
+#include "os_linux.h"
+#include "driver_common.h"
+
+#define PCI_INTERFACE_NAME "card-%s%%s"
+
+static int nvgpu_pci_tegra_probe(struct device *dev)
+{
+	return 0;
+}
+
+static int nvgpu_pci_tegra_remove(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	if (g->ops.gr.remove_gr_sysfs)
+		g->ops.gr.remove_gr_sysfs(g);
+
+	return 0;
+}
+
+static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
+{
+	return false;
+}
+
+static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
+{
+	long ret = (long)rate;
+
+	if (rate == UINT_MAX)
+		ret = BOOT_GPC2CLK_MHZ * 1000000UL;
+
+	return ret;
+}
+
+static struct gk20a_platform nvgpu_pci_device[] = {
+	{ /* DEVICE=0x1c35 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = true,
+	.enable_elpg = true,
+	.enable_elcg = false,
+	.enable_slcg = true,
+	.enable_blcg = true,
+	.enable_mscg = true,
+	.can_slcg    = true,
+	.can_blcg    = true,
+	.can_elcg    = true,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x86063000,
+	.hardcode_sw_threshold = true,
+	.ina3221_dcb_index = 0,
+	.ina3221_i2c_address = 0x84,
+	.ina3221_i2c_port = 0x2,
+	},
+	{ /* DEVICE=0x1c36 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = true,
+	.enable_elpg = true,
+	.enable_elcg = false,
+	.enable_slcg = true,
+	.enable_blcg = true,
+	.enable_mscg = true,
+	.can_slcg    = true,
+	.can_blcg    = true,
+	.can_elcg    = true,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x86062d00,
+	.hardcode_sw_threshold = true,
+	.ina3221_dcb_index = 0,
+	.ina3221_i2c_address = 0x84,
+	.ina3221_i2c_port = 0x2,
+	},
+	{ /* DEVICE=0x1c37 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = true,
+	.enable_elpg = true,
+	.enable_elcg = false,
+	.enable_slcg = true,
+	.enable_blcg = true,
+	.enable_mscg = true,
+	.can_slcg    = true,
+	.can_blcg    = true,
+	.can_elcg    = true,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x86063000,
+	.hardcode_sw_threshold = true,
+	.ina3221_dcb_index = 0,
+	.ina3221_i2c_address = 0x84,
+	.ina3221_i2c_port = 0x2,
+	},
+	{ /* DEVICE=0x1c75 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = true,
+	.enable_elpg = true,
+	.enable_elcg = false,
+	.enable_slcg = true,
+	.enable_blcg = true,
+	.enable_mscg = true,
+	.can_slcg    = true,
+	.can_blcg    = true,
+	.can_elcg    = true,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x86065300,
+	.hardcode_sw_threshold = false,
+	.ina3221_dcb_index = 1,
+	.ina3221_i2c_address = 0x80,
+	.ina3221_i2c_port = 0x1,
+	},
+	{ /* DEVICE=PG503 SKU 201 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = false,
+	.enable_slcg = false,
+	.enable_blcg = false,
+	.enable_mscg = false,
+	.can_slcg    = false,
+	.can_blcg    = false,
+	.can_elcg    = false,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x88001e00,
+	.hardcode_sw_threshold = false,
+	.run_preos = true,
+	},
+	{ /* DEVICE=PG503 SKU 200 ES */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = false,
+	.enable_slcg = false,
+	.enable_blcg = false,
+	.enable_mscg = false,
+	.can_slcg    = false,
+	.can_blcg    = false,
+	.can_elcg    = false,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x88001e00,
+	.hardcode_sw_threshold = false,
+	.run_preos = true,
+	},
+	{
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = false,
+	.enable_slcg = false,
+	.enable_blcg = false,
+	.enable_mscg = false,
+	.can_slcg    = false,
+	.can_blcg    = false,
+	.can_elcg    = false,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x88000126,
+	.hardcode_sw_threshold = false,
+	.run_preos = true,
+	.has_syncpoints = true,
+	},
+	{ /* SKU250 */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = true,
+	.enable_slcg = true,
+	.enable_blcg = true,
+	.enable_mscg = false,
+	.can_slcg    = true,
+	.can_blcg    = true,
+	.can_elcg    = true,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x1,
+	.hardcode_sw_threshold = false,
+	.run_preos = true,
+	.has_syncpoints = true,
+	},
+	{ /* SKU 0x1e3f */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = false,
+	.enable_slcg = false,
+	.enable_blcg = false,
+	.enable_mscg = false,
+	.can_slcg    = false,
+	.can_blcg    = false,
+	.can_elcg    = false,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	/*
+	 * WAR: PCIE X1 is very slow, set to very high value till nvlink is up
+	 */
+	.ch_wdt_timeout_ms = 30000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x1,
+	.hardcode_sw_threshold = false,
+	.unified_memory = false,
+	},
+	{ /* 0x1eba */
+	/* ptimer src frequency in hz */
+	.ptimer_src_freq	= 31250000,
+
+	.probe = nvgpu_pci_tegra_probe,
+	.remove = nvgpu_pci_tegra_remove,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= false,
+	.can_elpg_init = false,
+	.enable_elpg = false,
+	.enable_elcg = false,
+	.enable_slcg = false,
+	.enable_blcg = false,
+	.enable_mscg = false,
+	.can_slcg    = false,
+	.can_blcg    = false,
+	.can_elcg    = false,
+
+	.disable_aspm = true,
+
+	/* power management callbacks */
+	.is_railgated = nvgpu_pci_tegra_is_railgated,
+	.clk_round_rate = nvgpu_pci_clk_round_rate,
+
+	.ch_wdt_timeout_ms = 7000,
+
+	.honors_aperture = true,
+	.dma_mask = DMA_BIT_MASK(40),
+	.vbios_min_version = 0x90040109,
+	.hardcode_sw_threshold = false,
+	.has_syncpoints = true,
+	},
+};
+
+static struct pci_device_id nvgpu_pci_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 0,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 1,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 2,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 3,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 4,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 5,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 6,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 7,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 8,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba),
+		.class = PCI_BASE_CLASS_DISPLAY << 16,
+		.class_mask = 0xff << 16,
+		.driver_data = 9,
+	},
+	{}
+};
+
+static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
+{
+	struct gk20a *g = dev_id;
+	irqreturn_t ret_stall;
+	irqreturn_t ret_nonstall;
+
+	ret_stall = nvgpu_intr_stall(g);
+	ret_nonstall = nvgpu_intr_nonstall(g);
+
+#if defined(CONFIG_PCI_MSI)
+	/* Send MSI EOI */
+	if (g->ops.xve.rearm_msi && g->msi_enabled)
+		g->ops.xve.rearm_msi(g);
+#endif
+
+	return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
+{
+	struct gk20a *g = dev_id;
+
+	return nvgpu_intr_thread_stall(g);
+}
+
+static int nvgpu_pci_init_support(struct pci_dev *pdev)
+{
+	int err = 0;
+	struct gk20a *g = get_gk20a(&pdev->dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	l->regs = ioremap(pci_resource_start(pdev, 0),
+			  pci_resource_len(pdev, 0));
+	if (IS_ERR(l->regs)) {
+		nvgpu_err(g, "failed to remap gk20a registers");
+		err = PTR_ERR(l->regs);
+		goto fail;
+	}
+
+	l->bar1 = ioremap(pci_resource_start(pdev, 1),
+			  pci_resource_len(pdev, 1));
+	if (IS_ERR(l->bar1)) {
+		nvgpu_err(g, "failed to remap gk20a bar1");
+		err = PTR_ERR(l->bar1);
+		goto fail;
+	}
+
+	err = nvgpu_init_sim_support_linux_pci(g);
+	if (err)
+		goto fail;
+	err = nvgpu_init_sim_support_pci(g);
+	if (err)
+		goto fail_sim;
+
+	nvgpu_pci_init_usermode_support(l);
+
+	return 0;
+
+ fail_sim:
+	nvgpu_remove_sim_support_linux_pci(g);
+ fail:
+	if (l->regs) {
+		iounmap(l->regs);
+		l->regs = NULL;
+	}
+	if (l->bar1) {
+		iounmap(l->bar1);
+		l->bar1 = NULL;
+	}
+
+	return err;
+}
+
+static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
+{
+	if (mode)
+		*mode = S_IRUGO | S_IWUGO;
+	return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
+}
+
+static struct class nvgpu_pci_class = {
+	.owner = THIS_MODULE,
+	.name = "nvidia-pci-gpu",
+	.devnode = nvgpu_pci_devnode,
+};
+
+#ifdef CONFIG_PM
+static int nvgpu_pci_pm_runtime_resume(struct device *dev)
+{
+	return gk20a_pm_finalize_poweron(dev);
+}
+
+static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static const struct dev_pm_ops nvgpu_pci_pm_ops = {
+	.runtime_resume = nvgpu_pci_pm_runtime_resume,
+	.runtime_suspend = nvgpu_pci_pm_runtime_suspend,
+	.resume = nvgpu_pci_pm_runtime_resume,
+	.suspend = nvgpu_pci_pm_runtime_suspend,
+};
+#endif
+
+static int nvgpu_pci_pm_init(struct device *dev)
+{
+#ifdef CONFIG_PM
+	struct gk20a *g = get_gk20a(dev);
+
+	if (!g->can_railgate) {
+		pm_runtime_disable(dev);
+	} else {
+		if (g->railgate_delay)
+			pm_runtime_set_autosuspend_delay(dev,
+				g->railgate_delay);
+
+		/*
+		 * Runtime PM for PCI devices is disabled by default,
+		 * so we need to enable it first
+		 */
+		pm_runtime_use_autosuspend(dev);
+		pm_runtime_put_noidle(dev);
+		pm_runtime_allow(dev);
+	}
+#endif
+	return 0;
+}
+
+static int nvgpu_pci_probe(struct pci_dev *pdev,
+			   const struct pci_device_id *pent)
+{
+	struct gk20a_platform *platform = NULL;
+	struct nvgpu_os_linux *l;
+	struct gk20a *g;
+	int err;
+	char nodefmt[64];
+	struct device_node *np;
+
+	/* make sure driver_data is a sane index */
+	if (pent->driver_data >= sizeof(nvgpu_pci_device) /
+				 sizeof(nvgpu_pci_device[0])) {
+		return -EINVAL;
+	}
+
+	l = kzalloc(sizeof(*l), GFP_KERNEL);
+	if (!l) {
+		dev_err(&pdev->dev, "couldn't allocate gk20a support");
+		return -ENOMEM;
+	}
+
+	hash_init(l->ecc_sysfs_stats_htable);
+
+	g = &l->g;
+	nvgpu_init_gk20a(g);
+
+	nvgpu_kmem_init(g);
+
+	/* Allocate memory to hold platform data*/
+	platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
+			sizeof(struct gk20a_platform));
+	if (!platform) {
+		dev_err(&pdev->dev, "couldn't allocate platform data");
+		err = -ENOMEM;
+		goto err_free_l;
+	}
+
+	/* copy detected device data to allocated platform space*/
+	memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
+		sizeof(struct gk20a_platform));
+
+	pci_set_drvdata(pdev, platform);
+
+	err = nvgpu_init_enabled_flags(g);
+	if (err)
+		goto err_free_platform;
+
+	platform->g = g;
+	l->dev = &pdev->dev;
+
+	np = nvgpu_get_node(g);
+	if (of_dma_is_coherent(np)) {
+		__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
+		__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
+	}
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_free_platform;
+	pci_set_master(pdev);
+
+	g->pci_vendor_id = pdev->vendor;
+	g->pci_device_id = pdev->device;
+	g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
+	g->pci_subsystem_device_id = pdev->subsystem_device;
+	g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
+	g->pci_revision = pdev->revision;
+
+	g->ina3221_dcb_index = platform->ina3221_dcb_index;
+	g->ina3221_i2c_address = platform->ina3221_i2c_address;
+	g->ina3221_i2c_port = platform->ina3221_i2c_port;
+	g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
+
+#if defined(CONFIG_PCI_MSI)
+	err = pci_enable_msi(pdev);
+	if (err) {
+		nvgpu_err(g,
+			"MSI could not be enabled, falling back to legacy");
+		g->msi_enabled = false;
+	} else
+		g->msi_enabled = true;
+#endif
+
+	g->irq_stall = pdev->irq;
+	g->irq_nonstall = pdev->irq;
+	if (g->irq_stall < 0) {
+		err = -ENXIO;
+		goto err_disable_msi;
+	}
+
+	err = devm_request_threaded_irq(&pdev->dev,
+			g->irq_stall,
+			nvgpu_pci_isr,
+			nvgpu_pci_intr_thread,
+#if defined(CONFIG_PCI_MSI)
+			g->msi_enabled ? 0 :
+#endif
+			IRQF_SHARED, "nvgpu", g);
+	if (err) {
+		nvgpu_err(g,
+			"failed to request irq @ %d", g->irq_stall);
+		goto err_disable_msi;
+	}
+	disable_irq(g->irq_stall);
+
+	err = nvgpu_pci_init_support(pdev);
+	if (err)
+		goto err_free_irq;
+
+	if (strchr(dev_name(&pdev->dev), '%')) {
+		nvgpu_err(g, "illegal character in device name");
+		err = -EINVAL;
+		goto err_free_irq;
+	}
+
+	snprintf(nodefmt, sizeof(nodefmt),
+		 PCI_INTERFACE_NAME, dev_name(&pdev->dev));
+
+	err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
+	if (err)
+		goto err_free_irq;
+
+	err = nvgpu_pci_pm_init(&pdev->dev);
+	if (err) {
+		nvgpu_err(g, "pm init failed");
+		goto err_free_irq;
+	}
+
+	err = nvgpu_nvlink_probe(g);
+	/*
+	 * ENODEV is a legal error which means there is no NVLINK
+	 * any other error is fatal
+	 */
+	if (err) {
+		if (err != -ENODEV) {
+			nvgpu_err(g, "fatal error probing nvlink, bailing out");
+			goto err_free_irq;
+		}
+		/* Enable Semaphore SHIM on nvlink only for now. */
+		__nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
+		g->has_syncpoints = false;
+	} else {
+		err = nvgpu_nvhost_syncpt_init(g);
+		if (err) {
+			if (err != -ENOSYS) {
+				nvgpu_err(g, "syncpt init failed");
+				goto err_free_irq;
+			}
+		}
+	}
+
+	g->mm.has_physical_mode = false;
+
+	return 0;
+
+err_free_irq:
+	nvgpu_free_irq(g);
+err_disable_msi:
+#if defined(CONFIG_PCI_MSI)
+	if (g->msi_enabled)
+		pci_disable_msi(pdev);
+#endif
+err_free_platform:
+	nvgpu_kfree(g, platform);
+err_free_l:
+	kfree(l);
+	return err;
+}
+
+static void nvgpu_pci_remove(struct pci_dev *pdev)
+{
+	struct gk20a *g = get_gk20a(&pdev->dev);
+	struct device *dev = dev_from_gk20a(g);
+	int err;
+
+	/* no support yet for unbind if DGPU is in VGPU mode */
+	if (gk20a_gpu_is_virtual(dev))
+		return;
+
+	nvgpu_nvlink_remove(g);
+
+	gk20a_driver_start_unload(g);
+	err = nvgpu_quiesce(g);
+	/* TODO: handle failure to idle */
+	WARN(err, "gpu failed to idle during driver removal");
+
+	nvgpu_free_irq(g);
+
+	nvgpu_remove(dev, &nvgpu_pci_class);
+
+#if defined(CONFIG_PCI_MSI)
+	if (g->msi_enabled)
+		pci_disable_msi(pdev);
+	else {
+		/* IRQ does not need to be enabled in MSI as the line is not
+		 * shared
+		 */
+		enable_irq(g->irq_stall);
+	}
+#endif
+
+	/* free allocated platform data space */
+	nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
+
+	gk20a_get_platform(&pdev->dev)->g = NULL;
+	gk20a_put(g);
+}
+
+static struct pci_driver nvgpu_pci_driver = {
+	.name = "nvgpu",
+	.id_table = nvgpu_pci_table,
+	.probe = nvgpu_pci_probe,
+	.remove = nvgpu_pci_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &nvgpu_pci_pm_ops,
+#endif
+};
+
+int __init nvgpu_pci_init(void)
+{
+	int ret;
+
+	ret = class_register(&nvgpu_pci_class);
+	if (ret)
+		return ret;
+
+	return pci_register_driver(&nvgpu_pci_driver);
+}
+
+void __exit nvgpu_pci_exit(void)
+{
+	pci_unregister_driver(&nvgpu_pci_driver);
+	class_unregister(&nvgpu_pci_class);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/pci.h b/drivers/gpu/nvgpu/os/linux/pci.h
new file mode 100644
index 00000000..cc6b77b1
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/pci.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef NVGPU_PCI_H
+#define NVGPU_PCI_H
+
+#ifdef CONFIG_GK20A_PCI
+int nvgpu_pci_init(void);
+void nvgpu_pci_exit(void);
+#else
+static inline int nvgpu_pci_init(void) { return 0; }
+static inline void nvgpu_pci_exit(void) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.c b/drivers/gpu/nvgpu/os/linux/pci_usermode.c
new file mode 100644
index 00000000..270b834b
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <nvgpu/types.h>
+
+#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
+
+#include "os_linux.h"
+
+void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
+{
+	l->usermode_regs = l->regs + usermode_cfg0_r();
+	l->usermode_regs_saved = l->usermode_regs;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.h b/drivers/gpu/nvgpu/os/linux/pci_usermode.h
new file mode 100644
index 00000000..25a08d28
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_PCI_USERMODE_H__
+#define __NVGPU_PCI_USERMODE_H__
+
+struct nvgpu_os_linux;
+
+void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
new file mode 100644
index 00000000..2a6ace37
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/hashtable.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/hashtable.h>
+
+#include "os_linux.h"
+
+#include "gk20a/gk20a.h"
+
+#include "platform_gk20a.h"
+#include "platform_gk20a_tegra.h"
+#include "platform_gp10b.h"
+#include "platform_gp10b_tegra.h"
+#include "platform_ecc_sysfs.h"
+
+static u32 gen_ecc_hash_key(char *str)
+{
+	int i = 0;
+	u32 hash_key = 0x811c9dc5;
+
+	while (str[i]) {
+		hash_key *= 0x1000193;
+		hash_key ^= (u32)(str[i]);
+		i++;
+	};
+
+	return hash_key;
+}
+
+static ssize_t ecc_stat_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	const char *ecc_stat_full_name = attr->attr.name;
+	const char *ecc_stat_base_name;
+	unsigned int hw_unit;
+	unsigned int subunit;
+	struct gk20a_ecc_stat *ecc_stat;
+	u32 hash_key;
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
+							&subunit) == 2) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
+		hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
+	} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
+	} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
+	} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
+	} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
+		ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
+	} else {
+		return snprintf(buf,
+				PAGE_SIZE,
+				"Error: Invalid ECC stat name!\n");
+	}
+
+	hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
+
+	hash_for_each_possible(l->ecc_sysfs_stats_htable,
+				ecc_stat,
+				hash_node,
+				hash_key) {
+		if (hw_unit >= ecc_stat->count)
+			continue;
+		if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
+			return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
+	}
+
+	return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
+}
+
+int nvgpu_gr_ecc_stat_create(struct device *dev,
+			     int is_l2, char *ecc_stat_name,
+			     struct gk20a_ecc_stat *ecc_stat)
+{
+	struct gk20a *g = get_gk20a(dev);
+	char *ltc_unit_name = "ltc";
+	char *gr_unit_name = "gpc0_tpc";
+	char *lts_unit_name = "lts";
+	int num_hw_units = 0;
+	int num_subunits = 0;
+
+	if (is_l2 == 1)
+		num_hw_units = g->ltc_count;
+	else if (is_l2 == 2) {
+		num_hw_units = g->ltc_count;
+		num_subunits = g->gr.slices_per_ltc;
+	} else
+		num_hw_units = g->gr.tpc_count;
+
+
+	return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
+				is_l2 ? ltc_unit_name : gr_unit_name,
+				num_subunits ? lts_unit_name: NULL,
+				ecc_stat_name,
+				ecc_stat);
+}
+
+int nvgpu_ecc_stat_create(struct device *dev,
+			  int num_hw_units, int num_subunits,
+			  char *ecc_unit_name, char *ecc_subunit_name,
+			  char *ecc_stat_name,
+			  struct gk20a_ecc_stat *ecc_stat)
+{
+	int error = 0;
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int hw_unit = 0;
+	int subunit = 0;
+	int element = 0;
+	u32 hash_key = 0;
+	struct device_attribute *dev_attr_array;
+
+	int num_elements = num_subunits ? num_subunits * num_hw_units :
+		num_hw_units;
+
+	/* Allocate arrays */
+	dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
+				       num_elements);
+	ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
+	ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
+
+	for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
+		ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
+						ECC_STAT_NAME_MAX_SIZE);
+	}
+	ecc_stat->count = num_elements;
+	if (num_subunits) {
+		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+			for (subunit = 0; subunit < num_subunits; subunit++) {
+				element = hw_unit*num_subunits + subunit;
+
+				snprintf(ecc_stat->names[element],
+					ECC_STAT_NAME_MAX_SIZE,
+					"%s%d_%s%d_%s",
+					ecc_unit_name,
+					hw_unit,
+					ecc_subunit_name,
+					subunit,
+					ecc_stat_name);
+
+				sysfs_attr_init(&dev_attr_array[element].attr);
+				dev_attr_array[element].attr.name =
+					ecc_stat->names[element];
+				dev_attr_array[element].attr.mode =
+					VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
+				dev_attr_array[element].show = ecc_stat_show;
+				dev_attr_array[element].store = NULL;
+
+				/* Create sysfs file */
+				error |= device_create_file(dev,
+						&dev_attr_array[element]);
+
+			}
+		}
+	} else {
+		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+
+			/* Fill in struct device_attribute members */
+			snprintf(ecc_stat->names[hw_unit],
+				ECC_STAT_NAME_MAX_SIZE,
+				"%s%d_%s",
+				ecc_unit_name,
+				hw_unit,
+				ecc_stat_name);
+
+			sysfs_attr_init(&dev_attr_array[hw_unit].attr);
+			dev_attr_array[hw_unit].attr.name =
+						ecc_stat->names[hw_unit];
+			dev_attr_array[hw_unit].attr.mode =
+					VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
+			dev_attr_array[hw_unit].show = ecc_stat_show;
+			dev_attr_array[hw_unit].store = NULL;
+
+			/* Create sysfs file */
+			error |= device_create_file(dev,
+					&dev_attr_array[hw_unit]);
+		}
+	}
+
+	/* Add hash table entry */
+	hash_key = gen_ecc_hash_key(ecc_stat_name);
+	hash_add(l->ecc_sysfs_stats_htable,
+		&ecc_stat->hash_node,
+		hash_key);
+
+	ecc_stat->attr_array = dev_attr_array;
+
+	return error;
+}
+
+void nvgpu_gr_ecc_stat_remove(struct device *dev,
+			      int is_l2, struct gk20a_ecc_stat *ecc_stat)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int num_hw_units = 0;
+	int num_subunits = 0;
+
+	if (is_l2 == 1)
+		num_hw_units = g->ltc_count;
+	else if (is_l2 == 2) {
+		num_hw_units = g->ltc_count;
+		num_subunits = g->gr.slices_per_ltc;
+	} else
+		num_hw_units = g->gr.tpc_count;
+
+	nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
+}
+
+void nvgpu_ecc_stat_remove(struct device *dev,
+			   int num_hw_units, int num_subunits,
+			   struct gk20a_ecc_stat *ecc_stat)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct device_attribute *dev_attr_array = ecc_stat->attr_array;
+	int hw_unit = 0;
+	int subunit = 0;
+	int element = 0;
+	int num_elements = num_subunits ? num_subunits * num_hw_units :
+		num_hw_units;
+
+	/* Remove sysfs files */
+	if (num_subunits) {
+		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
+			for (subunit = 0; subunit < num_subunits; subunit++) {
+				element = hw_unit * num_subunits + subunit;
+
+				device_remove_file(dev,
+						   &dev_attr_array[element]);
+			}
+		}
+	} else {
+		for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
+			device_remove_file(dev, &dev_attr_array[hw_unit]);
+	}
+
+	/* Remove hash table entry */
+	hash_del(&ecc_stat->hash_node);
+
+	/* Free arrays */
+	nvgpu_kfree(g, ecc_stat->counters);
+
+	for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
+		nvgpu_kfree(g, ecc_stat->names[hw_unit]);
+
+	nvgpu_kfree(g, ecc_stat->names);
+	nvgpu_kfree(g, dev_attr_array);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
new file mode 100644
index 00000000..d29f7bd3
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NVGPU_PLATFORM_SYSFS_H_
+#define _NVGPU_PLATFORM_SYSFS_H_
+
+#include "gp10b/gr_gp10b.h"
+
+#define ECC_STAT_NAME_MAX_SIZE	100
+
+int nvgpu_gr_ecc_stat_create(struct device *dev,
+			     int is_l2, char *ecc_stat_name,
+			     struct gk20a_ecc_stat *ecc_stat);
+int nvgpu_ecc_stat_create(struct device *dev,
+			  int num_hw_units, int num_subunits,
+			  char *ecc_unit_name, char *ecc_subunit_name,
+			  char *ecc_stat_name,
+			  struct gk20a_ecc_stat *ecc_stat);
+void nvgpu_gr_ecc_stat_remove(struct device *dev,
+			      int is_l2, struct gk20a_ecc_stat *ecc_stat);
+void nvgpu_ecc_stat_remove(struct device *dev,
+			   int num_hw_units, int num_subunits,
+			   struct gk20a_ecc_stat *ecc_stat);
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
new file mode 100644
index 00000000..9a99b7fe
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -0,0 +1,317 @@
+/*
+ * GK20A Platform (SoC) Interface
+ *
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GK20A_PLATFORM_H_
+#define _GK20A_PLATFORM_H_
+
+#include <linux/device.h>
+
+#include <nvgpu/lock.h>
+
+#include "gk20a/gk20a.h"
+
+#define GK20A_CLKS_MAX		4
+
+struct gk20a;
+struct channel_gk20a;
+struct gr_ctx_buffer_desc;
+struct gk20a_scale_profile;
+
+struct secure_page_buffer {
+	void (*destroy)(struct gk20a *, struct secure_page_buffer *);
+	size_t size;
+	dma_addr_t phys;
+	size_t used;
+};
+
+struct gk20a_platform {
+	/* Populated by the gk20a driver before probing the platform. */
+	struct gk20a *g;
+
+	/* Should be populated at probe. */
+	bool can_railgate_init;
+
+	/* Should be populated at probe. */
+	bool can_elpg_init;
+
+	/* Should be populated at probe. */
+	bool has_syncpoints;
+
+	/* channel limit after which to start aggressive sync destroy */
+	unsigned int aggressive_sync_destroy_thresh;
+
+	/* flag to set sync destroy aggressiveness */
+	bool aggressive_sync_destroy;
+
+	/* set if ASPM should be disabled on boot; only makes sense for PCI */
+	bool disable_aspm;
+
+	/* Set if the platform can unify the small/large address spaces. */
+	bool unify_address_spaces;
+
+	/* Clock configuration is stored here. Platform probe is responsible
+	 * for filling this data. */
+	struct clk *clk[GK20A_CLKS_MAX];
+	int num_clks;
+	int maxmin_clk_id;
+
+#ifdef CONFIG_RESET_CONTROLLER
+	/* Reset control for device */
+	struct reset_control *reset_control;
+#endif
+
+	/* Delay before rail gated */
+	int railgate_delay_init;
+
+	/* init value for slowdown factor */
+	u8 ldiv_slowdown_factor_init;
+
+	/* Second Level Clock Gating: true = enable false = disable */
+	bool enable_slcg;
+
+	/* Block Level Clock Gating: true = enable flase = disable */
+	bool enable_blcg;
+
+	/* Engine Level Clock Gating: true = enable flase = disable */
+	bool enable_elcg;
+
+	/* Should be populated at probe. */
+	bool can_slcg;
+
+	/* Should be populated at probe. */
+	bool can_blcg;
+
+	/* Should be populated at probe. */
+	bool can_elcg;
+
+	/* Engine Level Power Gating: true = enable flase = disable */
+	bool enable_elpg;
+
+	/* Adaptative ELPG: true = enable flase = disable */
+	bool enable_aelpg;
+
+	/* PMU Perfmon: true = enable false = disable */
+	bool enable_perfmon;
+
+	/* Memory System Clock Gating: true = enable flase = disable*/
+	bool enable_mscg;
+
+	/* Timeout for per-channel watchdog (in mS) */
+	u32 ch_wdt_timeout_ms;
+
+	/* Disable big page support */
+	bool disable_bigpage;
+
+	/*
+	 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset
+	 * This flag can be used to force CAR reset case instead of rail gate
+	 */
+	bool force_reset_in_do_idle;
+
+	/* guest/vm id, needed for IPA to PA transation */
+	int vmid;
+
+	/* Initialize the platform interface of the gk20a driver.
+	 *
+	 * The platform implementation of this function must
+	 *   - set the power and clocks of the gk20a device to a known
+	 *     state, and
+	 *   - populate the gk20a_platform structure (a pointer to the
+	 *     structure can be obtained by calling gk20a_get_platform).
+	 *
+	 * After this function is finished, the driver will initialise
+	 * pm runtime and genpd based on the platform configuration.
+	 */
+	int (*probe)(struct device *dev);
+
+	/* Second stage initialisation - called once all power management
+	 * initialisations are done.
+	 */
+	int (*late_probe)(struct device *dev);
+
+	/* Remove device after power management has been done
+	 */
+	int (*remove)(struct device *dev);
+
+	/* Poweron platform dependencies */
+	int (*busy)(struct device *dev);
+
+	/* Powerdown platform dependencies */
+	void (*idle)(struct device *dev);
+
+	/* Preallocated VPR buffer for kernel */
+	size_t secure_buffer_size;
+	struct secure_page_buffer secure_buffer;
+
+	/* Device is going to be suspended */
+	int (*suspend)(struct device *);
+
+	/* Called to turn off the device */
+	int (*railgate)(struct device *dev);
+
+	/* Called to turn on the device */
+	int (*unrailgate)(struct device *dev);
+	struct nvgpu_mutex railgate_lock;
+
+	/* Called to check state of device */
+	bool (*is_railgated)(struct device *dev);
+
+	/* get supported frequency list */
+	int (*get_clk_freqs)(struct device *pdev,
+				unsigned long **freqs, int *num_freqs);
+
+	/* clk related supported functions */
+	long (*clk_round_rate)(struct device *dev,
+				unsigned long rate);
+
+	/* Called to register GPCPLL with common clk framework */
+	int (*clk_register)(struct gk20a *g);
+
+	/* platform specific scale init quirks */
+	void (*initscale)(struct device *dev);
+
+	/* Postscale callback is called after frequency change */
+	void (*postscale)(struct device *dev,
+			  unsigned long freq);
+
+	/* Pre callback is called before frequency change */
+	void (*prescale)(struct device *dev);
+
+	/* Devfreq governor name. If scaling is enabled, we request
+	 * this governor to be used in scaling */
+	const char *devfreq_governor;
+
+	/* Quality of service notifier callback. If this is set, the scaling
+	 * routines will register a callback to Qos. Each time we receive
+	 * a new value, this callback gets called.  */
+	int (*qos_notify)(struct notifier_block *nb,
+			  unsigned long n, void *p);
+
+	/* Called as part of debug dump. If the gpu gets hung, this function
+	 * is responsible for delivering all necessary debug data of other
+	 * hw units which may interact with the gpu without direct supervision
+	 * of the CPU.
+	 */
+	void (*dump_platform_dependencies)(struct device *dev);
+
+	/* Defined when SMMU stage-2 is enabled, and we need to use physical
+	 * addresses (not IPA). This is the case for GV100 nvlink in HV+L
+	 * configuration, when dGPU is in pass-through mode.
+	 */
+	u64 (*phys_addr)(struct gk20a *g, u64 ipa);
+
+	/* Callbacks to assert/deassert GPU reset */
+	int (*reset_assert)(struct device *dev);
+	int (*reset_deassert)(struct device *dev);
+	struct clk *clk_reset;
+	struct dvfs_rail *gpu_rail;
+
+	bool virtual_dev;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+	void *vgpu_priv;
+#endif
+	/* source frequency for ptimer in hz */
+	u32 ptimer_src_freq;
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	bool has_cde;
+#endif
+
+	/* soc name for finding firmware files */
+	const char *soc_name;
+
+	/* false if vidmem aperture actually points to sysmem */
+	bool honors_aperture;
+	/* unified or split memory with separate vidmem? */
+	bool unified_memory;
+
+	/*
+	 * DMA mask for Linux (both coh and non-coh). If not set defaults to
+	 * 0x3ffffffff (i.e a 34 bit mask).
+	 */
+	u64 dma_mask;
+
+	/* minimum supported VBIOS version */
+	u32 vbios_min_version;
+
+	/* true if we run preos microcode on this board */
+	bool run_preos;
+
+	/* true if we need to program sw threshold for
+         * power limits
+	 */
+	bool hardcode_sw_threshold;
+
+	/* i2c device index, port and address for INA3221 */
+	u32 ina3221_dcb_index;
+	u32 ina3221_i2c_address;
+	u32 ina3221_i2c_port;
+
+	/* stream id to use */
+	u32 ltc_streamid;
+
+	/* scaling rate */
+	unsigned long cached_rate;
+};
+
+static inline struct gk20a_platform *gk20a_get_platform(
+		struct device *dev)
+{
+	return (struct gk20a_platform *)dev_get_drvdata(dev);
+}
+
+#ifdef CONFIG_TEGRA_GK20A
+extern struct gk20a_platform gm20b_tegra_platform;
+extern struct gk20a_platform gp10b_tegra_platform;
+extern struct gk20a_platform gv11b_tegra_platform;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+extern struct gk20a_platform vgpu_tegra_platform;
+extern struct gk20a_platform gv11b_vgpu_tegra_platform;
+#endif
+#endif
+
+int gk20a_tegra_busy(struct device *dev);
+void gk20a_tegra_idle(struct device *dev);
+void gk20a_tegra_debug_dump(struct device *pdev);
+
+static inline struct gk20a *get_gk20a(struct device *dev)
+{
+	return gk20a_get_platform(dev)->g;
+}
+static inline struct gk20a *gk20a_from_dev(struct device *dev)
+{
+	if (!dev)
+		return NULL;
+
+	return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
+}
+static inline bool gk20a_gpu_is_virtual(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	return platform->virtual_dev;
+}
+
+static inline int support_gk20a_pmu(struct device *dev)
+{
+	if (IS_ENABLED(CONFIG_GK20A_PMU)) {
+		/* gPMU is not supported for vgpu */
+		return !gk20a_gpu_is_virtual(dev);
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
new file mode 100644
index 00000000..af55e5b6
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
@@ -0,0 +1,957 @@
+/*
+ * GK20A Tegra Platform Interface
+ *
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clkdev.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/platform_data/tegra_edp.h>
+#include <linux/delay.h>
+#include <uapi/linux/nvgpu.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-attrs.h>
+#include <linux/nvmap.h>
+#include <linux/reset.h>
+#if defined(CONFIG_TEGRA_DVFS)
+#include <linux/tegra_soctherm.h>
+#endif
+#include <linux/platform/tegra/common.h>
+#include <linux/platform/tegra/mc.h>
+#include <linux/clk/tegra.h>
+#if defined(CONFIG_COMMON_CLK)
+#include <soc/tegra/tegra-dvfs.h>
+#endif
+#ifdef CONFIG_TEGRA_BWMGR
+#include <linux/platform/tegra/emc_bwmgr.h>
+#endif
+
+#include <linux/platform/tegra/tegra_emc.h>
+#include <soc/tegra/chip-id.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/nvhost.h>
+
+#include <nvgpu/linux/dma.h>
+
+#include "gk20a/gk20a.h"
+#include "gm20b/clk_gm20b.h"
+
+#include "scale.h"
+#include "platform_gk20a.h"
+#include "clk.h"
+#include "os_linux.h"
+
+#include "../../../arch/arm/mach-tegra/iomap.h"
+#include <soc/tegra/pmc.h>
+
+#define TEGRA_GK20A_BW_PER_FREQ 32
+#define TEGRA_GM20B_BW_PER_FREQ 64
+#define TEGRA_DDR3_BW_PER_FREQ 16
+#define TEGRA_DDR4_BW_PER_FREQ 16
+#define MC_CLIENT_GPU 34
+#define PMC_GPU_RG_CNTRL_0		0x2d4
+
+#ifdef CONFIG_COMMON_CLK
+#define GPU_RAIL_NAME "vdd-gpu"
+#else
+#define GPU_RAIL_NAME "vdd_gpu"
+#endif
+
+extern struct device tegra_vpr_dev;
+
+#ifdef CONFIG_TEGRA_BWMGR
+struct gk20a_emc_params {
+	unsigned long bw_ratio;
+	unsigned long freq_last_set;
+	struct tegra_bwmgr_client *bwmgr_cl;
+};
+#else
+struct gk20a_emc_params {
+	unsigned long bw_ratio;
+	unsigned long freq_last_set;
+};
+#endif
+
+#define MHZ_TO_HZ(x) ((x) * 1000000)
+#define HZ_TO_MHZ(x) ((x) / 1000000)
+
+static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
+				       struct secure_page_buffer *secure_buffer)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
+	dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
+			(void *)(uintptr_t)secure_buffer->phys,
+			secure_buffer->phys, __DMA_ATTR(attrs));
+
+	secure_buffer->destroy = NULL;
+}
+
+static int gk20a_tegra_secure_alloc(struct gk20a *g,
+			     struct gr_ctx_buffer_desc *desc,
+			     size_t size)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
+	dma_addr_t phys;
+	struct sg_table *sgt;
+	struct page *page;
+	int err = 0;
+	size_t aligned_size = PAGE_ALIGN(size);
+
+	if (nvgpu_mem_is_valid(&desc->mem))
+		return 0;
+
+	/* We ran out of preallocated memory */
+	if (secure_buffer->used + aligned_size > secure_buffer->size) {
+		nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
+				size, secure_buffer->used, secure_buffer->size);
+		return -ENOMEM;
+	}
+
+	phys = secure_buffer->phys + secure_buffer->used;
+
+	sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
+	if (!sgt) {
+		nvgpu_err(platform->g, "failed to allocate memory");
+		return -ENOMEM;
+	}
+	err = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (err) {
+		nvgpu_err(platform->g, "failed to allocate sg_table");
+		goto fail_sgt;
+	}
+	page = phys_to_page(phys);
+	sg_set_page(sgt->sgl, page, size, 0);
+	/* This bypasses SMMU for VPR during gmmu_map. */
+	sg_dma_address(sgt->sgl) = 0;
+
+	desc->destroy = NULL;
+
+	desc->mem.priv.sgt = sgt;
+	desc->mem.size = size;
+	desc->mem.aperture = APERTURE_SYSMEM;
+
+	secure_buffer->used += aligned_size;
+
+	return err;
+
+fail_sgt:
+	nvgpu_kfree(platform->g, sgt);
+	return err;
+}
+
+/*
+ * gk20a_tegra_get_emc_rate()
+ *
+ * This function returns the minimum emc clock based on gpu frequency
+ */
+
+static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
+				struct gk20a_emc_params *emc_params)
+{
+	unsigned long gpu_freq, gpu_fmax_at_vmin;
+	unsigned long emc_rate, emc_scale;
+
+	gpu_freq = clk_get_rate(g->clk.tegra_clk);
+	gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
+		clk_get_parent(g->clk.tegra_clk));
+
+	/* When scaling emc, account for the gpu load when the
+	 * gpu frequency is less than or equal to fmax@vmin. */
+	if (gpu_freq <= gpu_fmax_at_vmin)
+		emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
+	else
+		emc_scale = g->emc3d_ratio;
+
+	emc_rate =
+		(HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
+
+	return MHZ_TO_HZ(emc_rate);
+}
+
+/*
+ * gk20a_tegra_prescale(profile, freq)
+ *
+ * This function informs EDP about changed constraints.
+ */
+
+static void gk20a_tegra_prescale(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	u32 avg = 0;
+
+	nvgpu_pmu_load_norm(g, &avg);
+	tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
+}
+
+/*
+ * gk20a_tegra_calibrate_emc()
+ *
+ */
+
+static void gk20a_tegra_calibrate_emc(struct device *dev,
+			       struct gk20a_emc_params *emc_params)
+{
+	enum tegra_chipid cid = tegra_get_chip_id();
+	long gpu_bw, emc_bw;
+
+	/* store gpu bw based on soc */
+	switch (cid) {
+	case TEGRA210:
+		gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
+		break;
+	case TEGRA124:
+	case TEGRA132:
+		gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
+		break;
+	default:
+		gpu_bw = 0;
+		break;
+	}
+
+	/* TODO detect DDR type.
+	 * Okay for now since DDR3 and DDR4 have the same BW ratio */
+	emc_bw = TEGRA_DDR3_BW_PER_FREQ;
+
+	/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
+	 *   NOTE the ratio must come out as an integer */
+	emc_params->bw_ratio = (gpu_bw / emc_bw);
+}
+
+#ifdef CONFIG_TEGRA_BWMGR
+#ifdef CONFIG_TEGRA_DVFS
+static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
+{
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a_emc_params *params;
+	unsigned long rate;
+
+	if (!profile || !profile->private_data)
+		return;
+
+	params = (struct gk20a_emc_params *)profile->private_data;
+	rate = (enb) ? params->freq_last_set : 0;
+	tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
+}
+#endif
+
+static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a_emc_params *emc_params;
+	unsigned long emc_rate;
+
+	if (!profile || !profile->private_data)
+		return;
+
+	emc_params = profile->private_data;
+	emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
+
+	if (emc_rate > tegra_bwmgr_get_max_emc_rate())
+		emc_rate = tegra_bwmgr_get_max_emc_rate();
+
+	emc_params->freq_last_set = emc_rate;
+	if (platform->is_railgated && platform->is_railgated(dev))
+		return;
+
+	tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
+			TEGRA_BWMGR_SET_EMC_FLOOR);
+
+}
+
+#endif
+
+#if defined(CONFIG_TEGRA_DVFS)
+/*
+ * gk20a_tegra_is_railgated()
+ *
+ * Check status of gk20a power rail
+ */
+
+static bool gk20a_tegra_is_railgated(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	bool ret = false;
+
+	if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+		ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
+
+	return ret;
+}
+
+/*
+ * gm20b_tegra_railgate()
+ *
+ * Gate (disable) gm20b power rail
+ */
+
+static int gm20b_tegra_railgate(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
+	    !tegra_dvfs_is_rail_up(platform->gpu_rail))
+		return 0;
+
+	tegra_mc_flush(MC_CLIENT_GPU);
+
+	udelay(10);
+
+	/* enable clamp */
+	tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
+	tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
+
+	udelay(10);
+
+	platform->reset_assert(dev);
+
+	udelay(10);
+
+	/*
+	 * GPCPLL is already disabled before entering this function; reference
+	 * clocks are enabled until now - disable them just before rail gating
+	 */
+	clk_disable_unprepare(platform->clk_reset);
+	clk_disable_unprepare(platform->clk[0]);
+	clk_disable_unprepare(platform->clk[1]);
+	if (platform->clk[3])
+		clk_disable_unprepare(platform->clk[3]);
+
+	udelay(10);
+
+	tegra_soctherm_gpu_tsens_invalidate(1);
+
+	if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
+		ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
+		if (ret)
+			goto err_power_off;
+	} else
+		pr_info("No GPU regulator?\n");
+
+#ifdef CONFIG_TEGRA_BWMGR
+	gm20b_bwmgr_set_rate(platform, false);
+#endif
+
+	return 0;
+
+err_power_off:
+	nvgpu_err(platform->g, "Could not railgate GPU");
+	return ret;
+}
+
+
+/*
+ * gm20b_tegra_unrailgate()
+ *
+ * Ungate (enable) gm20b power rail
+ */
+
+static int gm20b_tegra_unrailgate(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a *g = platform->g;
+	int ret = 0;
+	bool first = false;
+
+	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+		return 0;
+
+	ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_TEGRA_BWMGR
+	gm20b_bwmgr_set_rate(platform, true);
+#endif
+
+	tegra_soctherm_gpu_tsens_invalidate(0);
+
+	if (!platform->clk_reset) {
+		platform->clk_reset = clk_get(dev, "gpu_gate");
+		if (IS_ERR(platform->clk_reset)) {
+			nvgpu_err(g, "fail to get gpu reset clk");
+			goto err_clk_on;
+		}
+	}
+
+	if (!first) {
+		ret = clk_prepare_enable(platform->clk_reset);
+		if (ret) {
+			nvgpu_err(g, "could not turn on gpu_gate");
+			goto err_clk_on;
+		}
+
+		ret = clk_prepare_enable(platform->clk[0]);
+		if (ret) {
+			nvgpu_err(g, "could not turn on gpu pll");
+			goto err_clk_on;
+		}
+		ret = clk_prepare_enable(platform->clk[1]);
+		if (ret) {
+			nvgpu_err(g, "could not turn on pwr clock");
+			goto err_clk_on;
+		}
+
+		if (platform->clk[3]) {
+			ret = clk_prepare_enable(platform->clk[3]);
+			if (ret) {
+				nvgpu_err(g, "could not turn on fuse clock");
+				goto err_clk_on;
+			}
+		}
+	}
+
+	udelay(10);
+
+	platform->reset_assert(dev);
+
+	udelay(10);
+
+	tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
+	tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
+
+	udelay(10);
+
+	clk_disable(platform->clk_reset);
+	platform->reset_deassert(dev);
+	clk_enable(platform->clk_reset);
+
+	/* Flush MC after boot/railgate/SC7 */
+	tegra_mc_flush(MC_CLIENT_GPU);
+
+	udelay(10);
+
+	tegra_mc_flush_done(MC_CLIENT_GPU);
+
+	udelay(10);
+
+	return 0;
+
+err_clk_on:
+	tegra_dvfs_rail_power_down(platform->gpu_rail);
+
+	return ret;
+}
+#endif
+
+
+static struct {
+	char *name;
+	unsigned long default_rate;
+} tegra_gk20a_clocks[] = {
+	{"gpu_ref", UINT_MAX},
+	{"pll_p_out5", 204000000},
+	{"emc", UINT_MAX},
+	{"fuse", UINT_MAX},
+};
+
+
+
+/*
+ * gk20a_tegra_get_clocks()
+ *
+ * This function finds clocks in tegra platform and populates
+ * the clock information to gk20a platform data.
+ */
+
+static int gk20a_tegra_get_clocks(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	char devname[16];
+	unsigned int i;
+	int ret = 0;
+
+	BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
+
+	snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
+
+	platform->num_clks = 0;
+	for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
+		long rate = tegra_gk20a_clocks[i].default_rate;
+		struct clk *c;
+
+		c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
+		if (IS_ERR(c)) {
+			ret = PTR_ERR(c);
+			goto err_get_clock;
+		}
+		rate = clk_round_rate(c, rate);
+		clk_set_rate(c, rate);
+		platform->clk[i] = c;
+		if (i == 0)
+			platform->cached_rate = rate;
+	}
+	platform->num_clks = i;
+
+	return 0;
+
+err_get_clock:
+
+	while (i--)
+		clk_put(platform->clk[i]);
+	return ret;
+}
+
+#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
+static int gm20b_tegra_reset_assert(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+	if (!platform->reset_control) {
+		WARN(1, "Reset control not initialized\n");
+		return -ENOSYS;
+	}
+
+	return reset_control_assert(platform->reset_control);
+}
+
+static int gm20b_tegra_reset_deassert(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+	if (!platform->reset_control) {
+		WARN(1, "Reset control not initialized\n");
+		return -ENOSYS;
+	}
+
+	return reset_control_deassert(platform->reset_control);
+}
+#endif
+
+static void gk20a_tegra_scale_init(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a_emc_params *emc_params;
+	struct gk20a *g = platform->g;
+
+	if (!profile)
+		return;
+
+	if (profile->private_data)
+		return;
+
+	emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
+	if (!emc_params)
+		return;
+
+	emc_params->freq_last_set = -1;
+	gk20a_tegra_calibrate_emc(dev, emc_params);
+
+#ifdef CONFIG_TEGRA_BWMGR
+	emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
+	if (!emc_params->bwmgr_cl) {
+		nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
+		return;
+	}
+#endif
+
+	profile->private_data = emc_params;
+}
+
+static void gk20a_tegra_scale_exit(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a_emc_params *emc_params;
+
+	if (!profile)
+		return;
+
+	emc_params = profile->private_data;
+#ifdef CONFIG_TEGRA_BWMGR
+	tegra_bwmgr_unregister(emc_params->bwmgr_cl);
+#endif
+
+	nvgpu_kfree(platform->g, profile->private_data);
+}
+
+void gk20a_tegra_debug_dump(struct device *dev)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+
+	if (g->nvhost_dev)
+		nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
+#endif
+}
+
+int gk20a_tegra_busy(struct device *dev)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+
+	if (g->nvhost_dev)
+		return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
+#endif
+	return 0;
+}
+
+void gk20a_tegra_idle(struct device *dev)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+
+	if (g->nvhost_dev)
+		nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
+#endif
+}
+
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
+{
+	struct gk20a *g = platform->g;
+	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
+	DEFINE_DMA_ATTRS(attrs);
+	dma_addr_t iova;
+
+	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
+		return 0;
+
+	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
+	(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
+				      GFP_KERNEL, __DMA_ATTR(attrs));
+	/* Some platforms disable VPR. In that case VPR allocations always
+	 * fail. Just disable VPR usage in nvgpu in that case. */
+	if (dma_mapping_error(&tegra_vpr_dev, iova))
+		return 0;
+
+	secure_buffer->size = platform->secure_buffer_size;
+	secure_buffer->phys = iova;
+	secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
+
+	g->ops.secure_alloc = gk20a_tegra_secure_alloc;
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
+
+	return 0;
+}
+
+#ifdef CONFIG_COMMON_CLK
+static struct clk *gk20a_clk_get(struct gk20a *g)
+{
+	if (!g->clk.tegra_clk) {
+		struct clk *clk;
+		char clk_dev_id[32];
+		struct device *dev = dev_from_gk20a(g);
+
+		snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
+
+		clk = clk_get_sys(clk_dev_id, "gpu");
+		if (IS_ERR(clk)) {
+			nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
+				  clk_dev_id);
+			return NULL;
+		}
+		g->clk.tegra_clk = clk;
+	}
+
+	return g->clk.tegra_clk;
+}
+
+static int gm20b_clk_prepare_ops(struct clk_hw *hw)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	return gm20b_clk_prepare(clk);
+}
+
+static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	gm20b_clk_unprepare(clk);
+}
+
+static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	return gm20b_clk_is_prepared(clk);
+}
+
+static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	return gm20b_recalc_rate(clk, parent_rate);
+}
+
+static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
+				 unsigned long parent_rate)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
+}
+
+static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
+			     unsigned long *parent_rate)
+{
+	struct clk_gk20a *clk = to_clk_gk20a(hw);
+	return gm20b_round_rate(clk, rate, parent_rate);
+}
+
+static const struct clk_ops gm20b_clk_ops = {
+	.prepare = gm20b_clk_prepare_ops,
+	.unprepare = gm20b_clk_unprepare_ops,
+	.is_prepared = gm20b_clk_is_prepared_ops,
+	.recalc_rate = gm20b_recalc_rate_ops,
+	.set_rate = gm20b_gpcclk_set_rate_ops,
+	.round_rate = gm20b_round_rate_ops,
+};
+
+static int gm20b_register_gpcclk(struct gk20a *g)
+{
+	const char *parent_name = "pllg_ref";
+	struct clk_gk20a *clk = &g->clk;
+	struct clk_init_data init;
+	struct clk *c;
+	int err = 0;
+
+	/* make sure the clock is available */
+	if (!gk20a_clk_get(g))
+		return -ENOSYS;
+
+	err = gm20b_init_clk_setup_sw(g);
+	if (err)
+		return err;
+
+	init.name = "gpcclk";
+	init.ops = &gm20b_clk_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = 0;
+
+	/* Data in .init is copied by clk_register(), so stack variable OK */
+	clk->hw.init = &init;
+	c = clk_register(dev_from_gk20a(g), &clk->hw);
+	if (IS_ERR(c)) {
+		nvgpu_err(g, "Failed to register GPCPLL clock");
+		return -EINVAL;
+	}
+
+	clk->g = g;
+	clk_register_clkdev(c, "gpcclk", "gpcclk");
+
+	return err;
+}
+#endif /* CONFIG_COMMON_CLK */
+
+static int gk20a_tegra_probe(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct device_node *np = dev->of_node;
+	bool joint_xpu_rail = false;
+	int ret;
+	struct gk20a *g = platform->g;
+
+#ifdef CONFIG_COMMON_CLK
+	/* DVFS is not guaranteed to be initialized at the time of probe on
+	 * kernels with Common Clock Framework enabled.
+	 */
+	if (!platform->gpu_rail) {
+		platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
+		if (!platform->gpu_rail) {
+			nvgpu_log_info(g, "deferring probe no gpu_rail");
+			return -EPROBE_DEFER;
+		}
+	}
+
+	if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
+		nvgpu_log_info(g, "deferring probe gpu_rail not ready");
+		return -EPROBE_DEFER;
+	}
+#endif
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	ret = nvgpu_get_nvhost_dev(platform->g);
+	if (ret)
+		return ret;
+#endif
+
+#ifdef CONFIG_OF
+	joint_xpu_rail = of_property_read_bool(of_chosen,
+				"nvidia,tegra-joint_xpu_rail");
+#endif
+
+	if (joint_xpu_rail) {
+		nvgpu_log_info(g, "XPU rails are joint\n");
+		platform->g->can_railgate = false;
+	}
+
+	platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
+	if (tegra_get_chip_id() == TEGRA210) {
+		/* WAR for bug 1547668: Disable railgating and scaling
+		   irrespective of platform data if the rework was not made. */
+		np = of_find_node_by_path("/gpu-dvfs-rework");
+		if (!(np && of_device_is_available(np))) {
+			platform->devfreq_governor = "";
+			dev_warn(dev, "board does not support scaling");
+		}
+		platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
+		if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
+			platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
+	}
+
+	if (tegra_get_chip_id() == TEGRA132)
+		platform->soc_name = "tegra13x";
+
+	gk20a_tegra_get_clocks(dev);
+	nvgpu_linux_init_clk_support(platform->g);
+	ret = gk20a_tegra_init_secure_alloc(platform);
+	if (ret)
+		return ret;
+
+	if (platform->clk_register) {
+		ret = platform->clk_register(platform->g);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int gk20a_tegra_late_probe(struct device *dev)
+{
+	return 0;
+}
+
+static int gk20a_tegra_remove(struct device *dev)
+{
+	/* deinitialise tegra specific scaling quirks */
+	gk20a_tegra_scale_exit(dev);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	nvgpu_free_nvhost_dev(get_gk20a(dev));
+#endif
+
+	return 0;
+}
+
+static int gk20a_tegra_suspend(struct device *dev)
+{
+	tegra_edp_notify_gpu_load(0, 0);
+	return 0;
+}
+
+#if defined(CONFIG_COMMON_CLK)
+static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+
+	/* make sure the clock is available */
+	if (!gk20a_clk_get(g))
+		return rate;
+
+	return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
+}
+
+static int gk20a_clk_get_freqs(struct device *dev,
+				unsigned long **freqs, int *num_freqs)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+
+	/* make sure the clock is available */
+	if (!gk20a_clk_get(g))
+		return -ENOSYS;
+
+	return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
+				freqs, num_freqs);
+}
+#endif
+
+struct gk20a_platform gm20b_tegra_platform = {
+	.has_syncpoints = true,
+	.aggressive_sync_destroy_thresh = 64,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+	.can_railgate_init	= true,
+	.can_elpg_init          = true,
+	.enable_slcg            = true,
+	.enable_blcg            = true,
+	.enable_elcg            = true,
+	.can_slcg               = true,
+	.can_blcg               = true,
+	.can_elcg               = true,
+	.enable_elpg            = true,
+	.enable_aelpg           = true,
+	.enable_perfmon         = true,
+	.ptimer_src_freq	= 19200000,
+
+	.force_reset_in_do_idle = false,
+
+	.ch_wdt_timeout_ms = 5000,
+
+	.probe = gk20a_tegra_probe,
+	.late_probe = gk20a_tegra_late_probe,
+	.remove = gk20a_tegra_remove,
+	/* power management callbacks */
+	.suspend = gk20a_tegra_suspend,
+
+#if defined(CONFIG_TEGRA_DVFS)
+	.railgate = gm20b_tegra_railgate,
+	.unrailgate = gm20b_tegra_unrailgate,
+	.is_railgated = gk20a_tegra_is_railgated,
+#endif
+
+	.busy = gk20a_tegra_busy,
+	.idle = gk20a_tegra_idle,
+
+#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
+	.reset_assert = gm20b_tegra_reset_assert,
+	.reset_deassert = gm20b_tegra_reset_deassert,
+#else
+	.reset_assert = gk20a_tegra_reset_assert,
+	.reset_deassert = gk20a_tegra_reset_deassert,
+#endif
+
+#if defined(CONFIG_COMMON_CLK)
+	.clk_round_rate = gk20a_round_clk_rate,
+	.get_clk_freqs = gk20a_clk_get_freqs,
+#endif
+
+#ifdef CONFIG_COMMON_CLK
+	.clk_register = gm20b_register_gpcclk,
+#endif
+
+	/* frequency scaling configuration */
+	.initscale = gk20a_tegra_scale_init,
+	.prescale = gk20a_tegra_prescale,
+#ifdef CONFIG_TEGRA_BWMGR
+	.postscale = gm20b_tegra_postscale,
+#endif
+	.devfreq_governor = "nvhost_podgov",
+	.qos_notify = gk20a_scale_qos_notify,
+
+	.dump_platform_dependencies = gk20a_tegra_debug_dump,
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	.has_cde = true,
+#endif
+
+	.soc_name = "tegra21x",
+
+	.unified_memory = true,
+	.dma_mask = DMA_BIT_MASK(34),
+
+	.secure_buffer_size = 335872,
+};
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h
new file mode 100644
index 00000000..f7d50406
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h
@@ -0,0 +1,23 @@
+/*
+ * GK20A Platform (SoC) Interface
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
+#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
+
+struct gk20a_platform;
+
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h
new file mode 100644
index 00000000..d256d126
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h
@@ -0,0 +1,39 @@
+/*
+ * GP10B Platform (SoC) Interface
+ *
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _GP10B_PLATFORM_H_
+#define _GP10B_PLATFORM_H_
+
+struct device;
+
+int gp10b_tegra_get_clocks(struct device *dev);
+int gp10b_tegra_reset_assert(struct device *dev);
+int gp10b_tegra_reset_deassert(struct device *dev);
+void gp10b_tegra_scale_init(struct device *dev);
+long gp10b_round_clk_rate(struct device *dev, unsigned long rate);
+int gp10b_clk_get_freqs(struct device *dev,
+			unsigned long **freqs, int *num_freqs);
+void gp10b_tegra_prescale(struct device *dev);
+void gp10b_tegra_postscale(struct device *pdev, unsigned long freq);
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
new file mode 100644
index 00000000..5cb82687
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
@@ -0,0 +1,607 @@
+/*
+ * GP10B Tegra Platform Interface
+ *
+ * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/nvmap.h>
+#include <linux/reset.h>
+#include <linux/platform/tegra/emc_bwmgr.h>
+
+#include <uapi/linux/nvgpu.h>
+
+#include <soc/tegra/tegra_bpmp.h>
+#include <soc/tegra/tegra_powergate.h>
+#include <soc/tegra/tegra-bpmp-dvfs.h>
+
+#include <dt-bindings/memory/tegra-swgroup.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/hashtable.h>
+#include <nvgpu/nvhost.h>
+
+#include "os_linux.h"
+
+#include "clk.h"
+
+#include "gk20a/gk20a.h"
+
+#include "platform_gk20a.h"
+#include "platform_ecc_sysfs.h"
+#include "platform_gk20a_tegra.h"
+#include "platform_gp10b.h"
+#include "platform_gp10b_tegra.h"
+#include "scale.h"
+
+/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
+#define GP10B_FREQ_SELECT_STEP	8
+/* Max number of freq supported in h/w */
+#define GP10B_MAX_SUPPORTED_FREQS 120
+static unsigned long
+gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
+
+#define TEGRA_GP10B_BW_PER_FREQ 64
+#define TEGRA_DDR4_BW_PER_FREQ 16
+
+#define EMC_BW_RATIO  (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
+
+#define GPCCLK_INIT_RATE 1000000000
+
+static struct {
+	char *name;
+	unsigned long default_rate;
+} tegra_gp10b_clocks[] = {
+	{"gpu", GPCCLK_INIT_RATE},
+	{"gpu_sys", 204000000} };
+
+/*
+ * gp10b_tegra_get_clocks()
+ *
+ * This function finds clocks in tegra platform and populates
+ * the clock information to gp10b platform data.
+ */
+
+int gp10b_tegra_get_clocks(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	unsigned int i;
+
+	platform->num_clks = 0;
+	for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
+		long rate = tegra_gp10b_clocks[i].default_rate;
+		struct clk *c;
+
+		c = clk_get(dev, tegra_gp10b_clocks[i].name);
+		if (IS_ERR(c)) {
+			nvgpu_err(platform->g, "cannot get clock %s",
+					tegra_gp10b_clocks[i].name);
+		} else {
+			clk_set_rate(c, rate);
+			platform->clk[i] = c;
+			if (i == 0)
+				platform->cached_rate = rate;
+		}
+	}
+	platform->num_clks = i;
+
+	if (platform->clk[0]) {
+		i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
+					       tegra_gp10b_clocks[0].name);
+		if (i > 0)
+			platform->maxmin_clk_id = i;
+	}
+
+	return 0;
+}
+
+void gp10b_tegra_scale_init(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct tegra_bwmgr_client *bwmgr_handle;
+
+	if (!profile)
+		return;
+
+	if ((struct tegra_bwmgr_client *)profile->private_data)
+		return;
+
+	bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
+	if (!bwmgr_handle)
+		return;
+
+	profile->private_data = (void *)bwmgr_handle;
+}
+
+static void gp10b_tegra_scale_exit(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	if (profile)
+		tegra_bwmgr_unregister(
+			(struct tegra_bwmgr_client *)profile->private_data);
+}
+
+static int gp10b_tegra_probe(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	int ret;
+
+	ret = nvgpu_get_nvhost_dev(platform->g);
+	if (ret)
+		return ret;
+#endif
+
+	ret = gk20a_tegra_init_secure_alloc(platform);
+	if (ret)
+		return ret;
+
+	platform->disable_bigpage = !device_is_iommuable(dev);
+
+	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+
+	platform->g->gr.ctx_vars.force_preemption_gfxp = false;
+	platform->g->gr.ctx_vars.force_preemption_cilp = false;
+
+	gp10b_tegra_get_clocks(dev);
+	nvgpu_linux_init_clk_support(platform->g);
+
+	return 0;
+}
+
+static int gp10b_tegra_late_probe(struct device *dev)
+{
+	return 0;
+}
+
+static int gp10b_tegra_remove(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	if (g->ops.gr.remove_gr_sysfs)
+		g->ops.gr.remove_gr_sysfs(g);
+
+	/* deinitialise tegra specific scaling quirks */
+	gp10b_tegra_scale_exit(dev);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	nvgpu_free_nvhost_dev(get_gk20a(dev));
+#endif
+
+	return 0;
+}
+
+static bool gp10b_tegra_is_railgated(struct device *dev)
+{
+	bool ret = false;
+
+	if (tegra_bpmp_running())
+		ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
+
+	return ret;
+}
+
+static int gp10b_tegra_railgate(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	/* remove emc frequency floor */
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			0, TEGRA_BWMGR_SET_EMC_FLOOR);
+
+	if (tegra_bpmp_running() &&
+	    tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
+		int i;
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_disable_unprepare(platform->clk[i]);
+		}
+		tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
+	}
+	return 0;
+}
+
+static int gp10b_tegra_unrailgate(struct device *dev)
+{
+	int ret = 0;
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	if (tegra_bpmp_running()) {
+		int i;
+		ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_prepare_enable(platform->clk[i]);
+		}
+	}
+
+	/* to start with set emc frequency floor to max rate*/
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			tegra_bwmgr_get_max_emc_rate(),
+			TEGRA_BWMGR_SET_EMC_FLOOR);
+	return ret;
+}
+
+static int gp10b_tegra_suspend(struct device *dev)
+{
+	return 0;
+}
+
+int gp10b_tegra_reset_assert(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int ret = 0;
+
+	if (!platform->reset_control)
+		return -EINVAL;
+
+	ret = reset_control_assert(platform->reset_control);
+
+	return ret;
+}
+
+int gp10b_tegra_reset_deassert(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int ret = 0;
+
+	if (!platform->reset_control)
+		return -EINVAL;
+
+	ret = reset_control_deassert(platform->reset_control);
+
+	return ret;
+}
+
+void gp10b_tegra_prescale(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	u32 avg = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_pmu_load_norm(g, &avg);
+
+	nvgpu_log_fn(g, "done");
+}
+
+void gp10b_tegra_postscale(struct device *pdev,
+					unsigned long freq)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(pdev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a *g = get_gk20a(pdev);
+	unsigned long emc_rate;
+
+	nvgpu_log_fn(g, " ");
+	if (profile && !platform->is_railgated(pdev)) {
+		unsigned long emc_scale;
+
+		if (freq <= gp10b_freq_table[0])
+			emc_scale = 0;
+		else
+			emc_scale = g->emc3d_ratio;
+
+		emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
+
+		if (emc_rate > tegra_bwmgr_get_max_emc_rate())
+			emc_rate = tegra_bwmgr_get_max_emc_rate();
+
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
+	}
+	nvgpu_log_fn(g, "done");
+}
+
+long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_scale_profile *profile = g->scale_profile;
+	unsigned long *freq_table = profile->devfreq_profile.freq_table;
+	int max_states = profile->devfreq_profile.max_state;
+	int i;
+
+	for (i = 0; i < max_states; ++i)
+		if (freq_table[i] >= rate)
+			return freq_table[i];
+
+	return freq_table[max_states - 1];
+}
+
+int gp10b_clk_get_freqs(struct device *dev,
+				unsigned long **freqs, int *num_freqs)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+	unsigned long max_rate;
+	unsigned long new_rate = 0, prev_rate = 0;
+	int i = 0, freq_counter = 0;
+
+	max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
+
+	/*
+	 * Walk the h/w frequency table and only select
+	 * GP10B_FREQ_SELECT_STEP'th frequencies and
+	 * add MAX freq to last
+	 */
+	for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
+		prev_rate = new_rate;
+		new_rate = clk_round_rate(platform->clk[0], prev_rate + 1);
+
+		if (i % GP10B_FREQ_SELECT_STEP == 0 ||
+				new_rate == max_rate) {
+			gp10b_freq_table[freq_counter++] = new_rate;
+
+			if (new_rate == max_rate)
+				break;
+		}
+	}
+
+	WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS);
+
+	/* Fill freq table */
+	*freqs = gp10b_freq_table;
+	*num_freqs = freq_counter;
+
+	nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
+				gp10b_freq_table[0], max_rate, *num_freqs);
+
+	return 0;
+}
+
+struct gk20a_platform gp10b_tegra_platform = {
+	.has_syncpoints = true,
+
+	/* power management configuration */
+	.railgate_delay_init	= 500,
+
+	/* ldiv slowdown factor */
+	.ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
+
+	/* power management configuration */
+	.can_railgate_init	= true,
+	.enable_elpg            = true,
+	.can_elpg_init          = true,
+	.enable_blcg		= true,
+	.enable_slcg		= true,
+	.enable_elcg		= true,
+	.can_slcg               = true,
+	.can_blcg               = true,
+	.can_elcg               = true,
+	.enable_aelpg       = true,
+	.enable_perfmon         = true,
+
+	/* ptimer src frequency in hz*/
+	.ptimer_src_freq	= 31250000,
+
+	.ch_wdt_timeout_ms = 5000,
+
+	.probe = gp10b_tegra_probe,
+	.late_probe = gp10b_tegra_late_probe,
+	.remove = gp10b_tegra_remove,
+
+	/* power management callbacks */
+	.suspend = gp10b_tegra_suspend,
+	.railgate = gp10b_tegra_railgate,
+	.unrailgate = gp10b_tegra_unrailgate,
+	.is_railgated = gp10b_tegra_is_railgated,
+
+	.busy = gk20a_tegra_busy,
+	.idle = gk20a_tegra_idle,
+
+	.dump_platform_dependencies = gk20a_tegra_debug_dump,
+
+#ifdef CONFIG_NVGPU_SUPPORT_CDE
+	.has_cde = true,
+#endif
+
+	.clk_round_rate = gp10b_round_clk_rate,
+	.get_clk_freqs = gp10b_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.initscale = gp10b_tegra_scale_init,
+	.prescale = gp10b_tegra_prescale,
+	.postscale = gp10b_tegra_postscale,
+	.devfreq_governor = "nvhost_podgov",
+
+	.qos_notify = gk20a_scale_qos_notify,
+
+	.reset_assert = gp10b_tegra_reset_assert,
+	.reset_deassert = gp10b_tegra_reset_deassert,
+
+	.force_reset_in_do_idle = false,
+
+	.soc_name = "tegra18x",
+
+	.unified_memory = true,
+	.dma_mask = DMA_BIT_MASK(36),
+
+	.ltc_streamid = TEGRA_SID_GPUB,
+
+	.secure_buffer_size = 401408,
+};
+
+void gr_gp10b_create_sysfs(struct gk20a *g)
+{
+	int error = 0;
+	struct device *dev = dev_from_gk20a(g);
+
+	/* This stat creation function is called on GR init. GR can get
+	   initialized multiple times but we only need to create the ECC
+	   stats once. Therefore, add the following check to avoid
+	   creating duplicate stat sysfs nodes. */
+	if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
+		return;
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_lrf_ecc_single_err_count",
+				&g->ecc.gr.sm_lrf_single_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_lrf_ecc_double_err_count",
+				&g->ecc.gr.sm_lrf_double_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_sec_count",
+				&g->ecc.gr.sm_shm_sec_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_sed_count",
+				&g->ecc.gr.sm_shm_sed_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_shm_ecc_ded_count",
+				&g->ecc.gr.sm_shm_ded_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_sec_pipe0_count",
+				&g->ecc.gr.tex_total_sec_pipe0_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_ded_pipe0_count",
+				&g->ecc.gr.tex_total_ded_pipe0_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_sec_pipe0_count",
+				&g->ecc.gr.tex_unique_sec_pipe0_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_ded_pipe0_count",
+				&g->ecc.gr.tex_unique_ded_pipe0_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_sec_pipe1_count",
+				&g->ecc.gr.tex_total_sec_pipe1_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_total_ded_pipe1_count",
+				&g->ecc.gr.tex_total_ded_pipe1_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_sec_pipe1_count",
+				&g->ecc.gr.tex_unique_sec_pipe1_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"tex_ecc_unique_ded_pipe1_count",
+				&g->ecc.gr.tex_unique_ded_pipe1_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				2,
+				"ecc_sec_count",
+				&g->ecc.ltc.l2_sec_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				2,
+				"ecc_ded_count",
+				&g->ecc.ltc.l2_ded_count);
+
+	if (error)
+		dev_err(dev, "Failed to create sysfs attributes!\n");
+}
+
+void gr_gp10b_remove_sysfs(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	if (!g->ecc.gr.sm_lrf_single_err_count.counters)
+		return;
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_lrf_single_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_lrf_double_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_shm_sec_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_shm_sed_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_shm_ded_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_total_sec_pipe0_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_total_ded_pipe0_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_unique_sec_pipe0_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_unique_ded_pipe0_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_total_sec_pipe1_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_total_ded_pipe1_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_unique_sec_pipe1_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.tex_unique_ded_pipe1_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			2,
+			&g->ecc.ltc.l2_sec_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			2,
+			&g->ecc.ltc.l2_ded_count);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
new file mode 100644
index 00000000..6de90275
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PLATFORM_GP10B_TEGRA_H_
+#define _PLATFORM_GP10B_TEGRA_H_
+
+#include "gp10b/gr_gp10b.h"
+#include "platform_ecc_sysfs.h"
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
new file mode 100644
index 00000000..d62e7932
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
@@ -0,0 +1,588 @@
+/*
+ * GV11B Tegra Platform Interface
+ *
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/nvmap.h>
+#include <linux/reset.h>
+#include <linux/hashtable.h>
+#include <linux/clk.h>
+#include <linux/platform/tegra/emc_bwmgr.h>
+
+#include <nvgpu/nvhost.h>
+
+#include <uapi/linux/nvgpu.h>
+
+#include <soc/tegra/tegra_bpmp.h>
+#include <soc/tegra/tegra_powergate.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "clk.h"
+#include "scale.h"
+
+#include "platform_gp10b.h"
+#include "platform_gp10b_tegra.h"
+#include "platform_ecc_sysfs.h"
+
+#include "os_linux.h"
+#include "platform_gk20a_tegra.h"
+#include "gv11b/gr_gv11b.h"
+
+static void gv11b_tegra_scale_exit(struct device *dev)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	if (profile)
+		tegra_bwmgr_unregister(
+			(struct tegra_bwmgr_client *)profile->private_data);
+}
+
+static int gv11b_tegra_probe(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int err;
+
+	err = nvgpu_nvhost_syncpt_init(platform->g);
+	if (err) {
+		if (err != -ENOSYS)
+			return err;
+	}
+
+	err = gk20a_tegra_init_secure_alloc(platform);
+	if (err)
+		return err;
+
+	platform->disable_bigpage = !device_is_iommuable(dev);
+
+	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
+		= false;
+
+	platform->g->gr.ctx_vars.force_preemption_gfxp = false;
+	platform->g->gr.ctx_vars.force_preemption_cilp = false;
+
+	gp10b_tegra_get_clocks(dev);
+	nvgpu_linux_init_clk_support(platform->g);
+
+	return 0;
+}
+
+static int gv11b_tegra_late_probe(struct device *dev)
+{
+	return 0;
+}
+
+
+static int gv11b_tegra_remove(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	if (g->ops.gr.remove_gr_sysfs)
+		g->ops.gr.remove_gr_sysfs(g);
+
+	gv11b_tegra_scale_exit(dev);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	nvgpu_free_nvhost_dev(get_gk20a(dev));
+#endif
+
+	return 0;
+}
+
+static bool gv11b_tegra_is_railgated(struct device *dev)
+{
+	bool ret = false;
+#ifdef TEGRA194_POWER_DOMAIN_GPU
+	struct gk20a *g = get_gk20a(dev);
+
+	if (tegra_bpmp_running()) {
+		nvgpu_log(g, gpu_dbg_info, "bpmp running");
+		ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
+
+		nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
+	} else {
+		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
+	}
+#endif
+	return ret;
+}
+
+static int gv11b_tegra_railgate(struct device *dev)
+{
+#ifdef TEGRA194_POWER_DOMAIN_GPU
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	struct gk20a *g = get_gk20a(dev);
+	int i;
+
+	/* remove emc frequency floor */
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			0, TEGRA_BWMGR_SET_EMC_FLOOR);
+
+	if (tegra_bpmp_running()) {
+		nvgpu_log(g, gpu_dbg_info, "bpmp running");
+		if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
+			nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
+			return 0;
+		}
+		nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_disable_unprepare(platform->clk[i]);
+		}
+		nvgpu_log(g, gpu_dbg_info, "powergate_partition");
+		tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
+	} else {
+		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
+	}
+#endif
+	return 0;
+}
+
+static int gv11b_tegra_unrailgate(struct device *dev)
+{
+	int ret = 0;
+#ifdef TEGRA194_POWER_DOMAIN_GPU
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+	int i;
+
+	if (tegra_bpmp_running()) {
+		nvgpu_log(g, gpu_dbg_info, "bpmp running");
+		ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
+		if (ret) {
+			nvgpu_log(g, gpu_dbg_info,
+				"unpowergate partition failed");
+			return ret;
+		}
+		nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
+		for (i = 0; i < platform->num_clks; i++) {
+			if (platform->clk[i])
+				clk_prepare_enable(platform->clk[i]);
+		}
+	} else {
+		nvgpu_log(g, gpu_dbg_info, "bpmp not running");
+	}
+
+	/* to start with set emc frequency floor to max rate*/
+	if (profile)
+		tegra_bwmgr_set_emc(
+			(struct tegra_bwmgr_client *)profile->private_data,
+			tegra_bwmgr_get_max_emc_rate(),
+			TEGRA_BWMGR_SET_EMC_FLOOR);
+#endif
+	return ret;
+}
+
+static int gv11b_tegra_suspend(struct device *dev)
+{
+	return 0;
+}
+
+struct gk20a_platform gv11b_tegra_platform = {
+	.has_syncpoints = true,
+
+	/* ptimer src frequency in hz*/
+	.ptimer_src_freq	= 31250000,
+
+	.ch_wdt_timeout_ms = 5000,
+
+	.probe = gv11b_tegra_probe,
+	.late_probe = gv11b_tegra_late_probe,
+	.remove = gv11b_tegra_remove,
+	.railgate_delay_init    = 500,
+	.can_railgate_init      = true,
+
+	.can_slcg               = true,
+	.can_blcg               = true,
+	.can_elcg               = true,
+	.enable_slcg            = true,
+	.enable_blcg            = true,
+	.enable_elcg            = true,
+	.enable_perfmon         = true,
+
+	/* power management configuration */
+	.enable_elpg		= true,
+	.can_elpg_init		= true,
+	.enable_aelpg           = true,
+
+	/* power management callbacks */
+	.suspend = gv11b_tegra_suspend,
+	.railgate = gv11b_tegra_railgate,
+	.unrailgate = gv11b_tegra_unrailgate,
+	.is_railgated = gv11b_tegra_is_railgated,
+
+	.busy = gk20a_tegra_busy,
+	.idle = gk20a_tegra_idle,
+
+	.clk_round_rate = gp10b_round_clk_rate,
+	.get_clk_freqs = gp10b_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.initscale = gp10b_tegra_scale_init,
+	.prescale = gp10b_tegra_prescale,
+	.postscale = gp10b_tegra_postscale,
+	.devfreq_governor = "nvhost_podgov",
+
+	.qos_notify = gk20a_scale_qos_notify,
+
+	.dump_platform_dependencies = gk20a_tegra_debug_dump,
+
+	.soc_name = "tegra19x",
+
+	.honors_aperture = true,
+	.unified_memory = true,
+	.dma_mask = DMA_BIT_MASK(36),
+
+	.reset_assert = gp10b_tegra_reset_assert,
+	.reset_deassert = gp10b_tegra_reset_deassert,
+
+	.secure_buffer_size = 667648,
+};
+
+void gr_gv11b_create_sysfs(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+	int error = 0;
+
+	/* This stat creation function is called on GR init. GR can get
+       initialized multiple times but we only need to create the ECC
+       stats once. Therefore, add the following check to avoid
+       creating duplicate stat sysfs nodes. */
+	if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
+		return;
+
+	gr_gp10b_create_sysfs(g);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_l1_tag_ecc_corrected_err_count",
+				&g->ecc.gr.sm_l1_tag_corrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_l1_tag_ecc_uncorrected_err_count",
+				&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_cbu_ecc_corrected_err_count",
+				&g->ecc.gr.sm_cbu_corrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_cbu_ecc_uncorrected_err_count",
+				&g->ecc.gr.sm_cbu_uncorrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_l1_data_ecc_corrected_err_count",
+				&g->ecc.gr.sm_l1_data_corrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_l1_data_ecc_uncorrected_err_count",
+				&g->ecc.gr.sm_l1_data_uncorrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_icache_ecc_corrected_err_count",
+				&g->ecc.gr.sm_icache_corrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"sm_icache_ecc_uncorrected_err_count",
+				&g->ecc.gr.sm_icache_uncorrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"gcc_l15_ecc_corrected_err_count",
+				&g->ecc.gr.gcc_l15_corrected_err_count);
+
+	error |= nvgpu_gr_ecc_stat_create(dev,
+				0,
+				"gcc_l15_ecc_uncorrected_err_count",
+				&g->ecc.gr.gcc_l15_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->ltc_count,
+				0,
+				"ltc",
+				NULL,
+				"l2_cache_uncorrected_err_count",
+				&g->ecc.ltc.l2_cache_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->ltc_count,
+				0,
+				"ltc",
+				NULL,
+				"l2_cache_corrected_err_count",
+				&g->ecc.ltc.l2_cache_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"gpc",
+				NULL,
+				"fecs_ecc_uncorrected_err_count",
+				&g->ecc.gr.fecs_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"gpc",
+				NULL,
+				"fecs_ecc_corrected_err_count",
+				&g->ecc.gr.fecs_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->gr.gpc_count,
+				0,
+				"gpc",
+				NULL,
+				"gpccs_ecc_uncorrected_err_count",
+				&g->ecc.gr.gpccs_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->gr.gpc_count,
+				0,
+				"gpc",
+				NULL,
+				"gpccs_ecc_corrected_err_count",
+				&g->ecc.gr.gpccs_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->gr.gpc_count,
+				0,
+				"gpc",
+				NULL,
+				"mmu_l1tlb_ecc_uncorrected_err_count",
+				&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				g->gr.gpc_count,
+				0,
+				"gpc",
+				NULL,
+				"mmu_l1tlb_ecc_corrected_err_count",
+				&g->ecc.gr.mmu_l1tlb_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_l2tlb_ecc_uncorrected_err_count",
+				&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_l2tlb_ecc_corrected_err_count",
+				&g->ecc.fb.mmu_l2tlb_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_hubtlb_ecc_uncorrected_err_count",
+				&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_hubtlb_ecc_corrected_err_count",
+				&g->ecc.fb.mmu_hubtlb_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_fillunit_ecc_uncorrected_err_count",
+				&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"mmu_fillunit_ecc_corrected_err_count",
+				&g->ecc.fb.mmu_fillunit_corrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"pmu_ecc_uncorrected_err_count",
+				&g->ecc.pmu.pmu_uncorrected_err_count);
+
+	error |= nvgpu_ecc_stat_create(dev,
+				1,
+				0,
+				"eng",
+				NULL,
+				"pmu_ecc_corrected_err_count",
+				&g->ecc.pmu.pmu_corrected_err_count);
+
+	if (error)
+		dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
+}
+
+void gr_gv11b_remove_sysfs(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
+		return;
+	gr_gp10b_remove_sysfs(g);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_l1_tag_corrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_cbu_corrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_cbu_uncorrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_l1_data_corrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_l1_data_uncorrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_icache_corrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.sm_icache_uncorrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.gcc_l15_corrected_err_count);
+
+	nvgpu_gr_ecc_stat_remove(dev,
+			0,
+			&g->ecc.gr.gcc_l15_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->ltc_count,
+			0,
+			&g->ecc.ltc.l2_cache_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->ltc_count,
+			0,
+			&g->ecc.ltc.l2_cache_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.gr.fecs_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.gr.fecs_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->gr.gpc_count,
+			0,
+			&g->ecc.gr.gpccs_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->gr.gpc_count,
+			0,
+			&g->ecc.gr.gpccs_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->gr.gpc_count,
+			0,
+			&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			g->gr.gpc_count,
+			0,
+			&g->ecc.gr.mmu_l1tlb_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_l2tlb_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_hubtlb_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.fb.mmu_fillunit_corrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.pmu.pmu_uncorrected_err_count);
+
+	nvgpu_ecc_stat_remove(dev,
+			1,
+			0,
+			&g->ecc.pmu.pmu_corrected_err_count);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/rwsem.c b/drivers/gpu/nvgpu/os/linux/rwsem.c
new file mode 100644
index 00000000..297ddf11
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/rwsem.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <nvgpu/rwsem.h>
+
+void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
+{
+	init_rwsem(&rwsem->rwsem);
+}
+
+void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
+{
+	up_read(&rwsem->rwsem);
+}
+
+void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
+{
+	down_read(&rwsem->rwsem);
+}
+
+void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
+{
+	up_write(&rwsem->rwsem);
+}
+
+void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
+{
+	down_write(&rwsem->rwsem);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c
new file mode 100644
index 00000000..84ac1cfd
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/scale.c
@@ -0,0 +1,428 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/export.h>
+#include <soc/tegra/chip-id.h>
+#include <linux/pm_qos.h>
+
+#include <governor.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "scale.h"
+#include "os_linux.h"
+
+/*
+ * gk20a_scale_qos_notify()
+ *
+ * This function is called when the minimum QoS requirement for the device
+ * has changed. The function calls postscaling callback if it is defined.
+ */
+
+#if defined(CONFIG_COMMON_CLK)
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+			  unsigned long n, void *p)
+{
+	struct gk20a_scale_profile *profile =
+			container_of(nb, struct gk20a_scale_profile,
+			qos_notify_block);
+	struct gk20a *g = get_gk20a(profile->dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct devfreq *devfreq = l->devfreq;
+
+	if (!devfreq)
+		return NOTIFY_OK;
+
+	mutex_lock(&devfreq->lock);
+	/* check for pm_qos min and max frequency requirement */
+	profile->qos_min_freq =
+	  (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
+	profile->qos_max_freq =
+	  (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
+
+	if (profile->qos_min_freq > profile->qos_max_freq) {
+		nvgpu_err(g,
+			"QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
+			profile->qos_min_freq, profile->qos_max_freq);
+		profile->qos_min_freq = profile->qos_max_freq;
+	}
+
+	update_devfreq(devfreq);
+	mutex_unlock(&devfreq->lock);
+
+	return NOTIFY_OK;
+}
+#else
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+			  unsigned long n, void *p)
+{
+	struct gk20a_scale_profile *profile =
+		container_of(nb, struct gk20a_scale_profile,
+			     qos_notify_block);
+	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
+	struct gk20a *g = get_gk20a(profile->dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	unsigned long freq;
+
+	if (!platform->postscale)
+		return NOTIFY_OK;
+
+	/* get the frequency requirement. if devfreq is enabled, check if it
+	 * has higher demand than qos */
+	freq = platform->clk_round_rate(profile->dev,
+			(u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
+	if (l->devfreq)
+		freq = max(l->devfreq->previous_freq, freq);
+
+	/* Update gpu load because we may scale the emc target
+	 * if the gpu load changed. */
+	nvgpu_pmu_load_update(g);
+	platform->postscale(profile->dev, freq);
+
+	return NOTIFY_OK;
+}
+#endif
+
+/*
+ * gk20a_scale_make_freq_table(profile)
+ *
+ * This function initialises the frequency table for the given device profile
+ */
+
+static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
+	int num_freqs, err;
+	unsigned long *freqs;
+
+	if (platform->get_clk_freqs) {
+		/* get gpu frequency table */
+		err = platform->get_clk_freqs(profile->dev, &freqs,
+					&num_freqs);
+		if (err)
+			return -ENOSYS;
+	} else
+		return -ENOSYS;
+
+	profile->devfreq_profile.freq_table = (unsigned long *)freqs;
+	profile->devfreq_profile.max_state = num_freqs;
+
+	return 0;
+}
+
+/*
+ * gk20a_scale_target(dev, *freq, flags)
+ *
+ * This function scales the clock
+ */
+
+static int gk20a_scale_target(struct device *dev, unsigned long *freq,
+			      u32 flags)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a *g = platform->g;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_scale_profile *profile = g->scale_profile;
+	struct devfreq *devfreq = l->devfreq;
+	unsigned long local_freq = *freq;
+	unsigned long rounded_rate;
+	unsigned long min_freq = 0, max_freq = 0;
+
+	/*
+	 * Calculate floor and cap frequency values
+	 *
+	 * Policy :
+	 * We have two APIs to clip the frequency
+	 *  1. devfreq
+	 *  2. pm_qos
+	 *
+	 * To calculate floor (min) freq, we select MAX of floor frequencies
+	 * requested from both APIs
+	 * To get cap (max) freq, we select MIN of max frequencies
+	 *
+	 * In case we have conflict (min_freq > max_freq) after above
+	 * steps, we ensure that max_freq wins over min_freq
+	 */
+	min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
+	max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
+
+	if (min_freq > max_freq)
+		min_freq = max_freq;
+
+	/* Clip requested frequency */
+	if (local_freq < min_freq)
+		local_freq = min_freq;
+
+	if (local_freq > max_freq)
+		local_freq = max_freq;
+
+	/* set the final frequency */
+	rounded_rate = platform->clk_round_rate(dev, local_freq);
+
+	/* Check for duplicate request */
+	if (rounded_rate == g->last_freq)
+		return 0;
+
+	if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
+		*freq = rounded_rate;
+	else {
+		g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
+		*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+	}
+
+	g->last_freq = *freq;
+
+	/* postscale will only scale emc (dram clock) if evaluating
+	 * gk20a_tegra_get_emc_rate() produces a new or different emc
+	 * target because the load or_and gpufreq has changed */
+	if (platform->postscale)
+		platform->postscale(dev, rounded_rate);
+
+	return 0;
+}
+
+/*
+ * update_load_estimate_gpmu(profile)
+ *
+ * Update load estimate using gpmu. The gpmu value is normalised
+ * based on the time it was asked last time.
+ */
+
+static void update_load_estimate_gpmu(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_scale_profile *profile = g->scale_profile;
+	unsigned long dt;
+	u32 busy_time;
+	ktime_t t;
+
+	t = ktime_get();
+	dt = ktime_us_delta(t, profile->last_event_time);
+
+	profile->dev_stat.total_time = dt;
+	profile->last_event_time = t;
+	nvgpu_pmu_load_norm(g, &busy_time);
+	profile->dev_stat.busy_time = (busy_time * dt) / 1000;
+}
+
+/*
+ * gk20a_scale_suspend(dev)
+ *
+ * This function informs devfreq of suspend
+ */
+
+void gk20a_scale_suspend(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct devfreq *devfreq = l->devfreq;
+
+	if (!devfreq)
+		return;
+
+	devfreq_suspend_device(devfreq);
+}
+
+/*
+ * gk20a_scale_resume(dev)
+ *
+ * This functions informs devfreq of resume
+ */
+
+void gk20a_scale_resume(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct devfreq *devfreq = l->devfreq;
+
+	if (!devfreq)
+		return;
+
+	g->last_freq = 0;
+	devfreq_resume_device(devfreq);
+}
+
+/*
+ * gk20a_scale_get_dev_status(dev, *stat)
+ *
+ * This function queries the current device status.
+ */
+
+static int gk20a_scale_get_dev_status(struct device *dev,
+				      struct devfreq_dev_status *stat)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_scale_profile *profile = g->scale_profile;
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	/* update the software shadow */
+	nvgpu_pmu_load_update(g);
+
+	/* inform edp about new constraint */
+	if (platform->prescale)
+		platform->prescale(dev);
+
+	/* Make sure there are correct values for the current frequency */
+	profile->dev_stat.current_frequency =
+				g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+
+	/* Update load estimate */
+	update_load_estimate_gpmu(dev);
+
+	/* Copy the contents of the current device status */
+	*stat = profile->dev_stat;
+
+	/* Finally, clear out the local values */
+	profile->dev_stat.total_time = 0;
+	profile->dev_stat.busy_time = 0;
+
+	return 0;
+}
+
+/*
+ * get_cur_freq(struct device *dev, unsigned long *freq)
+ *
+ * This function gets the current GPU clock rate.
+ */
+
+static int get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct gk20a *g = get_gk20a(dev);
+	*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+	return 0;
+}
+
+
+/*
+ * gk20a_scale_init(dev)
+ */
+
+void gk20a_scale_init(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a *g = platform->g;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_scale_profile *profile;
+	int err;
+
+	if (g->scale_profile)
+		return;
+
+	if (!platform->devfreq_governor && !platform->qos_notify)
+		return;
+
+	profile = nvgpu_kzalloc(g, sizeof(*profile));
+
+	profile->dev = dev;
+	profile->dev_stat.busy = false;
+
+	/* Create frequency table */
+	err = gk20a_scale_make_freq_table(profile);
+	if (err || !profile->devfreq_profile.max_state)
+		goto err_get_freqs;
+
+	profile->qos_min_freq = 0;
+	profile->qos_max_freq = UINT_MAX;
+
+	/* Store device profile so we can access it if devfreq governor
+	 * init needs that */
+	g->scale_profile = profile;
+
+	if (platform->devfreq_governor) {
+		struct devfreq *devfreq;
+
+		profile->devfreq_profile.initial_freq =
+			profile->devfreq_profile.freq_table[0];
+		profile->devfreq_profile.target = gk20a_scale_target;
+		profile->devfreq_profile.get_dev_status =
+			gk20a_scale_get_dev_status;
+		profile->devfreq_profile.get_cur_freq = get_cur_freq;
+		profile->devfreq_profile.polling_ms = 25;
+
+		devfreq = devfreq_add_device(dev,
+					&profile->devfreq_profile,
+					platform->devfreq_governor, NULL);
+
+		if (IS_ERR(devfreq))
+			devfreq = NULL;
+
+		l->devfreq = devfreq;
+	}
+
+	/* Should we register QoS callback for this device? */
+	if (platform->qos_notify) {
+		profile->qos_notify_block.notifier_call =
+					platform->qos_notify;
+
+		pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+					&profile->qos_notify_block);
+		pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+					&profile->qos_notify_block);
+	}
+
+	return;
+
+err_get_freqs:
+	nvgpu_kfree(g, profile);
+}
+
+void gk20a_scale_exit(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a *g = platform->g;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int err;
+
+	if (platform->qos_notify) {
+		pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+				&g->scale_profile->qos_notify_block);
+		pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+				&g->scale_profile->qos_notify_block);
+	}
+
+	if (platform->devfreq_governor) {
+		err = devfreq_remove_device(l->devfreq);
+		l->devfreq = NULL;
+	}
+
+	nvgpu_kfree(g, g->scale_profile);
+	g->scale_profile = NULL;
+}
+
+/*
+ * gk20a_scale_hw_init(dev)
+ *
+ * Initialize hardware portion of the device
+ */
+
+void gk20a_scale_hw_init(struct device *dev)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct gk20a_scale_profile *profile = platform->g->scale_profile;
+
+	/* make sure that scaling has bee initialised */
+	if (!profile)
+		return;
+
+	profile->dev_stat.total_time = 0;
+	profile->last_event_time = ktime_get();
+}
diff --git a/drivers/gpu/nvgpu/os/linux/scale.h b/drivers/gpu/nvgpu/os/linux/scale.h
new file mode 100644
index 00000000..c1e6fe86
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/scale.h
@@ -0,0 +1,66 @@
+/*
+ * gk20a clock scaling profile
+ *
+ * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GK20A_SCALE_H
+#define GK20A_SCALE_H
+
+#include <linux/devfreq.h>
+
+struct clk;
+
+struct gk20a_scale_profile {
+	struct device			*dev;
+	ktime_t				last_event_time;
+	struct devfreq_dev_profile	devfreq_profile;
+	struct devfreq_dev_status	dev_stat;
+	struct notifier_block		qos_notify_block;
+	unsigned long			qos_min_freq;
+	unsigned long			qos_max_freq;
+	void				*private_data;
+};
+
+/* Initialization and de-initialization for module */
+void gk20a_scale_init(struct device *);
+void gk20a_scale_exit(struct device *);
+void gk20a_scale_hw_init(struct device *dev);
+
+#if defined(CONFIG_GK20A_DEVFREQ)
+/*
+ * call when performing submit to notify scaling mechanism that the module is
+ * in use
+ */
+void gk20a_scale_notify_busy(struct device *);
+void gk20a_scale_notify_idle(struct device *);
+
+void gk20a_scale_suspend(struct device *);
+void gk20a_scale_resume(struct device *);
+int gk20a_scale_qos_notify(struct notifier_block *nb,
+			unsigned long n, void *p);
+#else
+static inline void gk20a_scale_notify_busy(struct device *dev) {}
+static inline void gk20a_scale_notify_idle(struct device *dev) {}
+static inline void gk20a_scale_suspend(struct device *dev) {}
+static inline void gk20a_scale_resume(struct device *dev) {}
+static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
+			unsigned long n, void *p)
+{
+	return -ENOSYS;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c
new file mode 100644
index 00000000..2ad5aabf
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sched.c
@@ -0,0 +1,676 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <asm/barrier.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/barrier.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "sched.h"
+#include "os_linux.h"
+#include "ioctl_tsg.h"
+
+#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
+
+ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
+	size_t size, loff_t *off)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct gk20a *g = sched->g;
+	struct nvgpu_sched_event_arg event = { 0 };
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
+		"filp=%p buf=%p size=%zu", filp, buf, size);
+
+	if (size < sizeof(event))
+		return -EINVAL;
+	size = sizeof(event);
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	while (!sched->status) {
+		nvgpu_mutex_release(&sched->status_lock);
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
+			sched->status, 0);
+		if (err)
+			return err;
+		nvgpu_mutex_acquire(&sched->status_lock);
+	}
+
+	event.reserved = 0;
+	event.status = sched->status;
+
+	if (copy_to_user(buf, &event, size)) {
+		nvgpu_mutex_release(&sched->status_lock);
+		return -EFAULT;
+	}
+
+	sched->status = 0;
+
+	nvgpu_mutex_release(&sched->status_lock);
+
+	return size;
+}
+
+unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct gk20a *g = sched->g;
+	unsigned int mask = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	poll_wait(filp, &sched->readout_wq.wq, wait);
+	if (sched->status)
+		mask |= POLLIN | POLLRDNORM;
+	nvgpu_mutex_release(&sched->status_lock);
+
+	return mask;
+}
+
+static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_args *arg)
+{
+	struct gk20a *g = sched->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
+			arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		sched->active_tsg_bitmap, sched->bitmap_size)) {
+		nvgpu_mutex_release(&sched->status_lock);
+		return -EFAULT;
+	}
+	nvgpu_mutex_release(&sched->status_lock);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_args *arg)
+{
+	struct gk20a *g = sched->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
+			arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		sched->recent_tsg_bitmap, sched->bitmap_size)) {
+		nvgpu_mutex_release(&sched->status_lock);
+		return -EFAULT;
+	}
+
+	memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
+	nvgpu_mutex_release(&sched->status_lock);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_get_tsgs_by_pid_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u64 *bitmap;
+	unsigned int tsgid;
+	/* pid at user level corresponds to kernel tgid */
+	pid_t tgid = (pid_t)arg->pid;
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
+			(pid_t)arg->pid, arg->size, arg->buffer);
+
+	if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
+		arg->size = sched->bitmap_size;
+		return -ENOSPC;
+	}
+
+	bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size);
+	if (!bitmap)
+		return -ENOMEM;
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
+		if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
+			tsg = &f->tsg[tsgid];
+			if (tsg->tgid == tgid)
+				NVGPU_SCHED_SET(tsgid, bitmap);
+		}
+	}
+	nvgpu_mutex_release(&sched->status_lock);
+
+	if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
+		bitmap, sched->bitmap_size))
+		err = -EFAULT;
+
+	nvgpu_kfree(sched->g, bitmap);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_get_params_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	tsg = &f->tsg[tsgid];
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
+		return -ENXIO;
+
+	arg->pid = tsg->tgid;	/* kernel tgid corresponds to user pid */
+	arg->runlist_interleave = tsg->interleave_level;
+	arg->timeslice = tsg->timeslice_us;
+
+	arg->graphics_preempt_mode =
+		tsg->gr_ctx.graphics_preempt_mode;
+	arg->compute_preempt_mode =
+		tsg->gr_ctx.compute_preempt_mode;
+
+	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
+	struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_timeslice_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	tsg = &f->tsg[tsgid];
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
+		return -ENXIO;
+
+	err = gk20a_busy(g);
+	if (err)
+		goto done;
+
+	err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
+
+	gk20a_idle(g);
+
+done:
+	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
+	struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_runlist_interleave_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	tsg = &f->tsg[tsgid];
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
+		return -ENXIO;
+
+	err = gk20a_busy(g);
+	if (err)
+		goto done;
+
+	err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
+
+	gk20a_idle(g);
+
+done:
+	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+
+	return err;
+}
+
+static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
+{
+	struct gk20a *g = sched->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
+
+	nvgpu_mutex_acquire(&sched->control_lock);
+	sched->control_locked = true;
+	nvgpu_mutex_release(&sched->control_lock);
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
+{
+	struct gk20a *g = sched->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
+
+	nvgpu_mutex_acquire(&sched->control_lock);
+	sched->control_locked = false;
+	nvgpu_mutex_release(&sched->control_lock);
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_api_version_args *args)
+{
+	struct gk20a *g = sched->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
+
+	args->version = NVGPU_SCHED_API_VERSION;
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_refcount_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	tsg = &f->tsg[tsgid];
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
+		return -ENXIO;
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
+		nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
+		/* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
+		nvgpu_mutex_release(&sched->status_lock);
+		nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+		return -ENXIO;
+	}
+
+	/* keep reference on TSG, will be released on
+	 * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
+	 */
+	NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
+	nvgpu_mutex_release(&sched->status_lock);
+
+	return 0;
+}
+
+static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
+	struct nvgpu_sched_tsg_refcount_args *arg)
+{
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	u32 tsgid = arg->tsgid;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
+
+	if (tsgid >= f->num_channels)
+		return -EINVAL;
+
+	nvgpu_speculation_barrier();
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
+		nvgpu_mutex_release(&sched->status_lock);
+		nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
+		return -ENXIO;
+	}
+	NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
+	nvgpu_mutex_release(&sched->status_lock);
+
+	tsg = &f->tsg[tsgid];
+	nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+
+	return 0;
+}
+
+int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
+{
+	struct nvgpu_os_linux *l = container_of(inode->i_cdev,
+				struct nvgpu_os_linux, sched.cdev);
+	struct gk20a *g;
+	struct gk20a_sched_ctrl *sched;
+	int err = 0;
+
+	g = gk20a_get(&l->g);
+	if (!g)
+		return -ENODEV;
+	sched = &l->sched_ctrl;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
+
+	if (!sched->sw_ready) {
+		err = gk20a_busy(g);
+		if (err)
+			goto free_ref;
+
+		gk20a_idle(g);
+	}
+
+	if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+		err = -EBUSY;
+		goto free_ref;
+	}
+
+	memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
+			sched->bitmap_size);
+	memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
+
+	filp->private_data = sched;
+	nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
+
+free_ref:
+	if (err)
+		gk20a_put(g);
+	return err;
+}
+
+long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
+	unsigned long arg)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct gk20a *g = sched->g;
+	u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
+
+	if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
+		(_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
+		return -EINVAL;
+
+	memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case NVGPU_SCHED_IOCTL_GET_TSGS:
+		err = gk20a_sched_dev_ioctl_get_tsgs(sched,
+			(struct nvgpu_sched_get_tsgs_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
+		err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
+			(struct nvgpu_sched_get_tsgs_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
+		err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
+			(struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
+		err = gk20a_sched_dev_ioctl_get_params(sched,
+			(struct nvgpu_sched_tsg_get_params_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
+		err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
+			(struct nvgpu_sched_tsg_timeslice_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
+		err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
+			(struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
+		err = gk20a_sched_dev_ioctl_lock_control(sched);
+		break;
+	case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
+		err = gk20a_sched_dev_ioctl_unlock_control(sched);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_API_VERSION:
+		err = gk20a_sched_dev_ioctl_get_api_version(sched,
+			(struct nvgpu_sched_api_version_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_GET_TSG:
+		err = gk20a_sched_dev_ioctl_get_tsg(sched,
+			(struct nvgpu_sched_tsg_refcount_args *)buf);
+		break;
+	case NVGPU_SCHED_IOCTL_PUT_TSG:
+		err = gk20a_sched_dev_ioctl_put_tsg(sched,
+			(struct nvgpu_sched_tsg_refcount_args *)buf);
+		break;
+	default:
+		nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
+		err = -ENOTTY;
+	}
+
+	/* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
+	 * purpose with NULL buffer and/or zero size to discover TSG bitmap
+	 * size. We need to update user arguments in this case too, even
+	 * if we return an error.
+	 */
+	if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
+		if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
+			err = -EFAULT;
+	}
+
+	return err;
+}
+
+int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
+{
+	struct gk20a_sched_ctrl *sched = filp->private_data;
+	struct gk20a *g = sched->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct tsg_gk20a *tsg;
+	unsigned int tsgid;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
+
+	/* release any reference to TSGs */
+	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
+		if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
+			tsg = &f->tsg[tsgid];
+			nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
+		}
+	}
+
+	/* unlock control */
+	nvgpu_mutex_acquire(&sched->control_lock);
+	sched->control_locked = false;
+	nvgpu_mutex_release(&sched->control_lock);
+
+	nvgpu_mutex_release(&sched->busy_lock);
+	gk20a_put(g);
+	return 0;
+}
+
+void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	if (!sched->sw_ready) {
+		err = gk20a_busy(g);
+		if (err) {
+			WARN_ON(err);
+			return;
+		}
+
+		gk20a_idle(g);
+	}
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
+	NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
+	sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
+	nvgpu_mutex_release(&sched->status_lock);
+	nvgpu_cond_signal_interruptible(&sched->readout_wq);
+}
+
+void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+	NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
+
+	/* clear recent_tsg_bitmap as well: if app manager did not
+	 * notice that TSG was previously added, no need to notify it
+	 * if the TSG has been released in the meantime. If the
+	 * TSG gets reallocated, app manager will be notified as usual.
+	 */
+	NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
+
+	/* do not set event_pending, we only want to notify app manager
+	 * when TSGs are added, so that it can apply sched params
+	 */
+	nvgpu_mutex_release(&sched->status_lock);
+}
+
+int gk20a_sched_ctrl_init(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+	struct fifo_gk20a *f = &g->fifo;
+	int err;
+
+	if (sched->sw_ready)
+		return 0;
+
+	sched->g = g;
+	sched->bitmap_size = roundup(f->num_channels, 64) / 8;
+	sched->status = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
+			g, sched, sched->bitmap_size);
+
+	sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
+	if (!sched->active_tsg_bitmap)
+		return -ENOMEM;
+
+	sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
+	if (!sched->recent_tsg_bitmap) {
+		err = -ENOMEM;
+		goto free_active;
+	}
+
+	sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
+	if (!sched->ref_tsg_bitmap) {
+		err = -ENOMEM;
+		goto free_recent;
+	}
+
+	nvgpu_cond_init(&sched->readout_wq);
+
+	err = nvgpu_mutex_init(&sched->status_lock);
+	if (err)
+		goto free_ref;
+
+	err = nvgpu_mutex_init(&sched->control_lock);
+	if (err)
+		goto free_status_lock;
+
+	err = nvgpu_mutex_init(&sched->busy_lock);
+	if (err)
+		goto free_control_lock;
+
+	sched->sw_ready = true;
+
+	return 0;
+
+free_control_lock:
+	nvgpu_mutex_destroy(&sched->control_lock);
+free_status_lock:
+	nvgpu_mutex_destroy(&sched->status_lock);
+free_ref:
+	nvgpu_kfree(g, sched->ref_tsg_bitmap);
+free_recent:
+	nvgpu_kfree(g, sched->recent_tsg_bitmap);
+free_active:
+	nvgpu_kfree(g, sched->active_tsg_bitmap);
+
+	return err;
+}
+
+void gk20a_sched_ctrl_cleanup(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
+
+	nvgpu_kfree(g, sched->active_tsg_bitmap);
+	nvgpu_kfree(g, sched->recent_tsg_bitmap);
+	nvgpu_kfree(g, sched->ref_tsg_bitmap);
+	sched->active_tsg_bitmap = NULL;
+	sched->recent_tsg_bitmap = NULL;
+	sched->ref_tsg_bitmap = NULL;
+
+	nvgpu_mutex_destroy(&sched->status_lock);
+	nvgpu_mutex_destroy(&sched->control_lock);
+	nvgpu_mutex_destroy(&sched->busy_lock);
+
+	sched->sw_ready = false;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/sched.h b/drivers/gpu/nvgpu/os/linux/sched.h
new file mode 100644
index 00000000..a699bbea
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sched.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_SCHED_H
+#define __NVGPU_SCHED_H
+
+struct gk20a;
+struct gpu_ops;
+struct tsg_gk20a;
+struct poll_table_struct;
+
+struct gk20a_sched_ctrl {
+	struct gk20a *g;
+
+	struct nvgpu_mutex control_lock;
+	bool control_locked;
+	bool sw_ready;
+	struct nvgpu_mutex status_lock;
+	struct nvgpu_mutex busy_lock;
+
+	u64 status;
+
+	size_t bitmap_size;
+	u64 *active_tsg_bitmap;
+	u64 *recent_tsg_bitmap;
+	u64 *ref_tsg_bitmap;
+
+	struct nvgpu_cond readout_wq;
+};
+
+int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
+int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
+long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
+ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
+unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
+
+void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
+void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
+int gk20a_sched_ctrl_init(struct gk20a *);
+
+void gk20a_sched_ctrl_cleanup(struct gk20a *g);
+
+#endif /* __NVGPU_SCHED_H */
diff --git a/drivers/gpu/nvgpu/os/linux/sim.c b/drivers/gpu/nvgpu/os/linux/sim.c
new file mode 100644
index 00000000..8e964f39
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sim.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/platform_device.h>
+
+#include <nvgpu/log.h>
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/bitops.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/hw_sim.h>
+#include <nvgpu/sim.h>
+#include "gk20a/gk20a.h"
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "module.h"
+
+void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
+{
+	struct sim_nvgpu_linux *sim_linux =
+		container_of(sim, struct sim_nvgpu_linux, sim);
+
+	writel(v, sim_linux->regs + r);
+}
+
+u32 sim_readl(struct sim_nvgpu *sim, u32 r)
+{
+	struct sim_nvgpu_linux *sim_linux =
+		container_of(sim, struct sim_nvgpu_linux, sim);
+
+	return readl(sim_linux->regs + r);
+}
+
+void nvgpu_remove_sim_support_linux(struct gk20a *g)
+{
+	struct sim_nvgpu_linux *sim_linux;
+
+	if (!g->sim)
+		return;
+
+	sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
+	if (sim_linux->regs) {
+		sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
+		iounmap(sim_linux->regs);
+		sim_linux->regs = NULL;
+	}
+	nvgpu_kfree(g, sim_linux);
+	g->sim = NULL;
+}
+
+int nvgpu_init_sim_support_linux(struct gk20a *g,
+		struct platform_device *dev)
+{
+	struct sim_nvgpu_linux *sim_linux;
+	int err = -ENOMEM;
+
+	if (!nvgpu_platform_is_simulation(g))
+		return 0;
+
+	sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
+	if (!sim_linux)
+		return err;
+	g->sim = &sim_linux->sim;
+	g->sim->g = g;
+	sim_linux->regs = nvgpu_ioremap_resource(dev,
+						 GK20A_SIM_IORESOURCE_MEM,
+						 &sim_linux->reg_mem);
+	if (IS_ERR(sim_linux->regs)) {
+		nvgpu_err(g, "failed to remap gk20a sim regs");
+		err = PTR_ERR(sim_linux->regs);
+		goto fail;
+	}
+	sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
+	return 0;
+
+fail:
+	nvgpu_remove_sim_support_linux(g);
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/sim_pci.c b/drivers/gpu/nvgpu/os/linux/sim_pci.c
new file mode 100644
index 00000000..d37767b7
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sim_pci.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/platform_device.h>
+
+#include <nvgpu/log.h>
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/bitops.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/hw_sim_pci.h>
+#include <nvgpu/sim.h>
+#include "gk20a/gk20a.h"
+#include "os_linux.h"
+#include "module.h"
+
+static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
+{
+	u32 cfg;
+	bool is_simulation = false;
+
+	cfg = nvgpu_readl(g, sim_base + sim_config_r());
+	if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
+		is_simulation = true;
+
+	return is_simulation;
+}
+
+void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
+{
+	struct sim_nvgpu_linux *sim_linux;
+	bool is_simulation;
+
+	is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
+
+	if (!is_simulation) {
+		return;
+	}
+
+	if (!g->sim) {
+		nvgpu_warn(g, "sim_gk20a not allocated");
+		return;
+	}
+	sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
+
+	if (sim_linux->regs) {
+		sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
+		sim_linux->regs = NULL;
+	}
+	nvgpu_kfree(g, sim_linux);
+	g->sim = NULL;
+}
+
+int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct sim_nvgpu_linux *sim_linux;
+	int err = -ENOMEM;
+	bool is_simulation;
+
+	is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
+	__nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
+
+	if (!is_simulation)
+		return 0;
+
+	sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
+	if (!sim_linux)
+		return err;
+	g->sim = &sim_linux->sim;
+	g->sim->g = g;
+	sim_linux->regs = l->regs + sim_r();
+	sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/soc.c b/drivers/gpu/nvgpu/os/linux/soc.c
new file mode 100644
index 00000000..1b27d6f1
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/soc.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <soc/tegra/chip-id.h>
+#include <soc/tegra/fuse.h>
+#include <soc/tegra/tegra_bpmp.h>
+#ifdef CONFIG_TEGRA_HV_MANAGER
+#include <soc/tegra/virt/syscalls.h>
+#endif
+
+#include <nvgpu/soc.h>
+#include "os_linux.h"
+#include "platform_gk20a.h"
+
+bool nvgpu_platform_is_silicon(struct gk20a *g)
+{
+	return tegra_platform_is_silicon();
+}
+
+bool nvgpu_platform_is_simulation(struct gk20a *g)
+{
+	return tegra_platform_is_vdk();
+}
+
+bool nvgpu_platform_is_fpga(struct gk20a *g)
+{
+	return tegra_platform_is_fpga();
+}
+
+bool nvgpu_is_hypervisor_mode(struct gk20a *g)
+{
+	return is_tegra_hypervisor_mode();
+}
+
+bool nvgpu_is_bpmp_running(struct gk20a *g)
+{
+	return tegra_bpmp_running();
+}
+
+bool nvgpu_is_soc_t194_a01(struct gk20a *g)
+{
+	return ((tegra_get_chip_id() == TEGRA194 &&
+			tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
+		true : false);
+}
+
+#ifdef CONFIG_TEGRA_HV_MANAGER
+/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
+ * There is no SMMU translation. However, the device initially enumerates as a
+ * PCIe device. As such, when allocation memory for this PCIe device, the DMA
+ * framework ends up allocating memory using SMMU (if enabled in device tree).
+ * As a result, when we switch to nvlink, we need to use underlying physical
+ * addresses, even if memory mappings exist in SMMU.
+ * In addition, when stage-2 SMMU translation is enabled (for instance when HV
+ * is enabled), the addresses we get from dma_alloc are IPAs. We need to
+ * convert them to PA.
+ */
+static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct hyp_ipa_pa_info info;
+	int err;
+	u64 pa = 0ULL;
+
+	err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
+	if (err < 0) {
+		/* WAR for bug 2096877
+		 * hyp_read_ipa_pa_info only looks up RAM mappings.
+		 * assume one to one IPA:PA mapping for syncpt aperture
+		 */
+		u64 start = g->syncpt_unit_base;
+		u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
+		if ((ipa >= start) && (ipa < end)) {
+			pa = ipa;
+			nvgpu_log(g, gpu_dbg_map_v,
+				"ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
+				ipa, platform->vmid, pa);
+		} else {
+			nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
+				ipa, platform->vmid, err);
+		}
+	} else {
+		pa = info.base + info.offset;
+		nvgpu_log(g, gpu_dbg_map_v,
+				"ipa=%llx vmid=%d -> pa=%llx "
+				"base=%llx offset=%llx size=%llx\n",
+				ipa, platform->vmid, pa, info.base,
+				info.offset, info.size);
+	}
+	return pa;
+}
+#endif
+
+int nvgpu_init_soc_vars(struct gk20a *g)
+{
+#ifdef CONFIG_TEGRA_HV_MANAGER
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int err;
+
+	if (nvgpu_is_hypervisor_mode(g)) {
+		err = hyp_read_gid(&platform->vmid);
+		if (err) {
+			nvgpu_err(g, "failed to read vmid");
+			return err;
+		}
+		platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
+	}
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.c b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c
new file mode 100644
index 00000000..4dd10e6e
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c
@@ -0,0 +1,419 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <nvgpu/lock.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include "../linux/channel.h"
+
+#include "../drivers/staging/android/sync.h"
+
+#include "sync_sema_android.h"
+
+static const struct sync_timeline_ops gk20a_sync_timeline_ops;
+
+struct gk20a_sync_timeline {
+	struct sync_timeline		obj;
+	u32				max;
+	u32				min;
+};
+
+/**
+ * The sync framework dups pts when merging fences. We share a single
+ * refcounted gk20a_sync_pt for each duped pt.
+ */
+struct gk20a_sync_pt {
+	struct gk20a			*g;
+	struct nvgpu_ref			refcount;
+	u32				thresh;
+	struct nvgpu_semaphore		*sema;
+	struct gk20a_sync_timeline	*obj;
+
+	/*
+	 * Use a spin lock here since it will have better performance
+	 * than a mutex - there should be very little contention on this
+	 * lock.
+	 */
+	struct nvgpu_spinlock			lock;
+};
+
+struct gk20a_sync_pt_inst {
+	struct sync_pt			pt;
+	struct gk20a_sync_pt		*shared;
+};
+
+/**
+ * Compares sync pt values a and b, both of which will trigger either before
+ * or after ref (i.e. a and b trigger before ref, or a and b trigger after
+ * ref). Supplying ref allows us to handle wrapping correctly.
+ *
+ * Returns -1 if a < b (a triggers before b)
+ *	    0 if a = b (a and b trigger at the same time)
+ *	    1 if a > b (b triggers before a)
+ */
+static int __gk20a_sync_pt_compare_ref(
+	u32 ref,
+	u32 a,
+	u32 b)
+{
+	/*
+	 * We normalize both a and b by subtracting ref from them.
+	 * Denote the normalized values by a_n and b_n. Note that because
+	 * of wrapping, a_n and/or b_n may be negative.
+	 *
+	 * The normalized values a_n and b_n satisfy:
+	 * - a positive value triggers before a negative value
+	 * - a smaller positive value triggers before a greater positive value
+	 * - a smaller negative value (greater in absolute value) triggers
+	 *   before a greater negative value (smaller in absolute value).
+	 *
+	 * Thus we can just stick to unsigned arithmetic and compare
+	 * (u32)a_n to (u32)b_n.
+	 *
+	 * Just to reiterate the possible cases:
+	 *
+	 *	1A) ...ref..a....b....
+	 *	1B) ...ref..b....a....
+	 *	2A) ...b....ref..a....              b_n < 0
+	 *	2B) ...a....ref..b....     a_n > 0
+	 *	3A) ...a....b....ref..     a_n < 0, b_n < 0
+	 *	3A) ...b....a....ref..     a_n < 0, b_n < 0
+	 */
+	u32 a_n = a - ref;
+	u32 b_n = b - ref;
+	if (a_n < b_n)
+		return -1;
+	else if (a_n > b_n)
+		return 1;
+	else
+		return 0;
+}
+
+static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
+{
+	struct gk20a_sync_pt_inst *pti =
+			container_of(pt, struct gk20a_sync_pt_inst, pt);
+	return pti->shared;
+}
+static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
+{
+	if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
+		return NULL;
+	return (struct gk20a_sync_timeline *)obj;
+}
+
+static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
+{
+	struct gk20a_sync_pt *pt =
+		container_of(ref, struct gk20a_sync_pt, refcount);
+	struct gk20a *g = pt->g;
+
+	if (pt->sema)
+		nvgpu_semaphore_put(pt->sema);
+	nvgpu_kfree(g, pt);
+}
+
+static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
+		struct gk20a *g,
+		struct gk20a_sync_timeline *obj,
+		struct nvgpu_semaphore *sema)
+{
+	struct gk20a_sync_pt *shared;
+
+	shared = nvgpu_kzalloc(g, sizeof(*shared));
+	if (!shared)
+		return NULL;
+
+	nvgpu_ref_init(&shared->refcount);
+	shared->g = g;
+	shared->obj = obj;
+	shared->sema = sema;
+	shared->thresh = ++obj->max; /* sync framework has a lock */
+
+	nvgpu_spinlock_init(&shared->lock);
+
+	nvgpu_semaphore_get(sema);
+
+	return shared;
+}
+
+static struct sync_pt *gk20a_sync_pt_create_inst(
+		struct gk20a *g,
+		struct gk20a_sync_timeline *obj,
+		struct nvgpu_semaphore *sema)
+{
+	struct gk20a_sync_pt_inst *pti;
+
+	pti = (struct gk20a_sync_pt_inst *)
+		sync_pt_create(&obj->obj, sizeof(*pti));
+	if (!pti)
+		return NULL;
+
+	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
+	if (!pti->shared) {
+		sync_pt_free(&pti->pt);
+		return NULL;
+	}
+	return &pti->pt;
+}
+
+static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+	if (pt)
+		nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
+}
+
+static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt_inst *pti;
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+
+	pti = (struct gk20a_sync_pt_inst *)
+		sync_pt_create(&pt->obj->obj, sizeof(*pti));
+	if (!pti)
+		return NULL;
+	pti->shared = pt;
+	nvgpu_ref_get(&pt->refcount);
+	return &pti->pt;
+}
+
+/*
+ * This function must be able to run on the same sync_pt concurrently. This
+ * requires a lock to protect access to the sync_pt's internal data structures
+ * which are modified as a side effect of calling this function.
+ */
+static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+	struct gk20a_sync_timeline *obj = pt->obj;
+	bool signaled = true;
+
+	nvgpu_spinlock_acquire(&pt->lock);
+	if (!pt->sema)
+		goto done;
+
+	/* Acquired == not realeased yet == active == not signaled. */
+	signaled = !nvgpu_semaphore_is_acquired(pt->sema);
+
+	if (signaled) {
+		/* Update min if necessary. */
+		if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
+						obj->min) == 1)
+			obj->min = pt->thresh;
+
+		/* Release the semaphore to the pool. */
+		nvgpu_semaphore_put(pt->sema);
+		pt->sema = NULL;
+	}
+done:
+	nvgpu_spinlock_release(&pt->lock);
+
+	return signaled;
+}
+
+static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	bool a_expired;
+	bool b_expired;
+	struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
+	struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
+
+	if (WARN_ON(pt_a->obj != pt_b->obj))
+		return 0;
+
+	/* Early out */
+	if (a == b)
+		return 0;
+
+	a_expired = gk20a_sync_pt_has_signaled(a);
+	b_expired = gk20a_sync_pt_has_signaled(b);
+	if (a_expired && !b_expired) {
+		/* Easy, a was earlier */
+		return -1;
+	} else if (!a_expired && b_expired) {
+		/* Easy, b was earlier */
+		return 1;
+	}
+
+	/* Both a and b are expired (trigger before min) or not
+	 * expired (trigger after min), so we can use min
+	 * as a reference value for __gk20a_sync_pt_compare_ref.
+	 */
+	return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
+			pt_a->thresh, pt_b->thresh);
+}
+
+static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
+{
+	return obj->min;
+}
+
+static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
+		char *str, int size)
+{
+	struct gk20a_sync_timeline *obj =
+		(struct gk20a_sync_timeline *)timeline;
+	snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
+}
+
+static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
+					     char *str, int size)
+{
+	struct nvgpu_semaphore *s = pt->sema;
+
+	snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
+		 s->location.pool->page_idx,
+		 nvgpu_semaphore_get_value(s),
+		 nvgpu_semaphore_read(s));
+}
+
+static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
+		int size)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+
+	if (pt->sema) {
+		gk20a_sync_pt_value_str_for_sema(pt, str, size);
+		return;
+	}
+
+	snprintf(str, size, "%d", pt->thresh);
+}
+
+static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
+	.driver_name = "nvgpu_semaphore",
+	.dup = gk20a_sync_pt_dup_inst,
+	.has_signaled = gk20a_sync_pt_has_signaled,
+	.compare = gk20a_sync_pt_compare,
+	.free_pt = gk20a_sync_pt_free_inst,
+	.timeline_value_str = gk20a_sync_timeline_value_str,
+	.pt_value_str = gk20a_sync_pt_value_str,
+};
+
+/* Public API */
+
+struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+	struct sync_fence *fence = sync_fence_fdget(fd);
+	int i;
+
+	if (!fence)
+		return NULL;
+
+	for (i = 0; i < fence->num_fences; i++) {
+		struct fence *pt = fence->cbs[i].sync_pt;
+		struct sync_pt *spt = sync_pt_from_fence(pt);
+		struct sync_timeline *t;
+
+		if (spt == NULL) {
+			sync_fence_put(fence);
+			return NULL;
+		}
+
+		t = sync_pt_parent(spt);
+		if (t->ops != &gk20a_sync_timeline_ops) {
+			sync_fence_put(fence);
+			return NULL;
+		}
+	}
+
+	return fence;
+}
+
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
+	struct nvgpu_semaphore *sema;
+
+	nvgpu_spinlock_acquire(&pt->lock);
+	sema = pt->sema;
+	if (sema)
+		nvgpu_semaphore_get(sema);
+	nvgpu_spinlock_release(&pt->lock);
+
+	return sema;
+}
+
+void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
+{
+	sync_timeline_signal(timeline, 0);
+}
+
+void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
+{
+	sync_timeline_destroy(timeline);
+}
+
+struct sync_timeline *gk20a_sync_timeline_create(
+		const char *name)
+{
+	struct gk20a_sync_timeline *obj;
+
+	obj = (struct gk20a_sync_timeline *)
+		sync_timeline_create(&gk20a_sync_timeline_ops,
+				     sizeof(struct gk20a_sync_timeline),
+				     name);
+	if (!obj)
+		return NULL;
+	obj->max = 0;
+	obj->min = 0;
+	return &obj->obj;
+}
+
+struct sync_fence *gk20a_sync_fence_create(
+		struct channel_gk20a *c,
+		struct nvgpu_semaphore *sema,
+		const char *fmt, ...)
+{
+	char name[30];
+	va_list args;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+	struct gk20a *g = c->g;
+
+	struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework = NULL;
+	struct gk20a_sync_timeline *timeline = NULL;
+
+	fence_framework = &os_channel_priv->fence_framework;
+
+	timeline = to_gk20a_timeline(fence_framework->timeline);
+
+	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
+	if (pt == NULL)
+		return NULL;
+
+	va_start(args, fmt);
+	vsnprintf(name, sizeof(name), fmt, args);
+	va_end(args);
+
+	fence = sync_fence_create(name, pt);
+	if (fence == NULL) {
+		sync_pt_free(pt);
+		return NULL;
+	}
+	return fence;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.h b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h
new file mode 100644
index 00000000..4fca7bed
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h
@@ -0,0 +1,51 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _GK20A_SYNC_H_
+#define _GK20A_SYNC_H_
+
+struct sync_timeline;
+struct sync_fence;
+struct sync_pt;
+struct nvgpu_semaphore;
+struct fence;
+
+#ifdef CONFIG_SYNC
+struct sync_timeline *gk20a_sync_timeline_create(const char *name);
+void gk20a_sync_timeline_destroy(struct sync_timeline *);
+void gk20a_sync_timeline_signal(struct sync_timeline *);
+struct sync_fence *gk20a_sync_fence_create(
+		struct channel_gk20a *c,
+		struct nvgpu_semaphore *,
+		const char *fmt, ...);
+struct sync_fence *gk20a_sync_fence_fdget(int fd);
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
+#else
+static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
+static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
+static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+	return NULL;
+}
+static inline struct sync_timeline *gk20a_sync_timeline_create(
+	const char *name) {
+		return NULL;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
new file mode 100644
index 00000000..e5995bb8
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -0,0 +1,1205 @@
+/*
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/pm_runtime.h>
+#include <linux/fb.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvhost.h>
+
+#include "sysfs.h"
+#include "platform_gk20a.h"
+#include "gk20a/pmu_gk20a.h"
+#include "gk20a/gr_gk20a.h"
+#include "gv11b/gr_gv11b.h"
+
+#define PTIMER_FP_FACTOR			1000000
+
+#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
+
+static ssize_t elcg_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (val) {
+		g->elcg_enabled = true;
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO);
+	} else {
+		g->elcg_enabled = false;
+		gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
+	}
+
+	gk20a_idle(g);
+
+	nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t elcg_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store);
+
+static ssize_t blcg_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val)
+		g->blcg_enabled = true;
+	else
+		g->blcg_enabled = false;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (g->ops.clock_gating.blcg_bus_load_gating_prod)
+		g->ops.clock_gating.blcg_bus_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_ce_load_gating_prod)
+		g->ops.clock_gating.blcg_ce_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod)
+		g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
+		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_fifo_load_gating_prod)
+		g->ops.clock_gating.blcg_fifo_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_gr_load_gating_prod)
+		g->ops.clock_gating.blcg_gr_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
+		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_pmu_load_gating_prod)
+		g->ops.clock_gating.blcg_pmu_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_xbar_load_gating_prod)
+		g->ops.clock_gating.blcg_xbar_load_gating_prod(g,
+				g->blcg_enabled);
+	gk20a_idle(g);
+
+	nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t blcg_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
+}
+
+
+static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store);
+
+static ssize_t slcg_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val)
+		g->slcg_enabled = true;
+	else
+		g->slcg_enabled = false;
+
+	/*
+	 * TODO: slcg_therm_load_gating is not enabled anywhere during
+	 * init. Therefore, it would be incongruous to add it here. Once
+	 * it is added to init, we should add it here too.
+	 */
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (g->ops.clock_gating.slcg_bus_load_gating_prod)
+		g->ops.clock_gating.slcg_bus_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_ce2_load_gating_prod)
+		g->ops.clock_gating.slcg_ce2_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_chiplet_load_gating_prod)
+		g->ops.clock_gating.slcg_chiplet_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod)
+		g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
+		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_fifo_load_gating_prod)
+		g->ops.clock_gating.slcg_fifo_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_gr_load_gating_prod)
+		g->ops.clock_gating.slcg_gr_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
+		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_perf_load_gating_prod)
+		g->ops.clock_gating.slcg_perf_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_priring_load_gating_prod)
+		g->ops.clock_gating.slcg_priring_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_pmu_load_gating_prod)
+		g->ops.clock_gating.slcg_pmu_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_xbar_load_gating_prod)
+		g->ops.clock_gating.slcg_xbar_load_gating_prod(g,
+				g->slcg_enabled);
+	gk20a_idle(g);
+
+	nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t slcg_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store);
+
+static ssize_t ptimer_scale_factor_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	u32 src_freq_hz = platform->ptimer_src_freq;
+	u32 scaling_factor_fp;
+	ssize_t res;
+
+	if (!src_freq_hz) {
+		nvgpu_err(g, "reference clk_m rate is not set correctly");
+		return -EINVAL;
+	}
+
+	scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) /
+				((u32)(src_freq_hz) /
+				(u32)(PTIMER_FP_FACTOR));
+	res = snprintf(buf,
+				PAGE_SIZE,
+				"%u.%u\n",
+				scaling_factor_fp / PTIMER_FP_FACTOR,
+				scaling_factor_fp % PTIMER_FP_FACTOR);
+
+	return res;
+
+}
+
+static DEVICE_ATTR(ptimer_scale_factor,
+			S_IRUGO,
+			ptimer_scale_factor_show,
+			NULL);
+
+static ssize_t ptimer_ref_freq_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	u32 src_freq_hz = platform->ptimer_src_freq;
+	ssize_t res;
+
+	if (!src_freq_hz) {
+		nvgpu_err(g, "reference clk_m rate is not set correctly");
+		return -EINVAL;
+	}
+
+	res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
+
+	return res;
+
+}
+
+static DEVICE_ATTR(ptimer_ref_freq,
+			S_IRUGO,
+			ptimer_ref_freq_show,
+			NULL);
+
+static ssize_t ptimer_src_freq_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	u32 src_freq_hz = platform->ptimer_src_freq;
+	ssize_t res;
+
+	if (!src_freq_hz) {
+		nvgpu_err(g, "reference clk_m rate is not set correctly");
+		return -EINVAL;
+	}
+
+	res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz);
+
+	return res;
+
+}
+
+static DEVICE_ATTR(ptimer_src_freq,
+			S_IRUGO,
+			ptimer_src_freq_show,
+			NULL);
+
+
+#if defined(CONFIG_PM)
+static ssize_t railgate_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long railgate_enable = 0;
+	/* dev is guaranteed to be valid here. Ok to de-reference */
+	struct gk20a *g = get_gk20a(dev);
+	int err;
+
+	if (kstrtoul(buf, 10, &railgate_enable) < 0)
+		return -EINVAL;
+
+	if (railgate_enable && !g->can_railgate) {
+		g->can_railgate = true;
+		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
+	} else if (railgate_enable == 0 && g->can_railgate) {
+		g->can_railgate = false;
+		pm_runtime_set_autosuspend_delay(dev, -1);
+	}
+	/* wake-up system to make rail-gating setting effective */
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+	gk20a_idle(g);
+
+	nvgpu_info(g, "railgate is %s.", g->can_railgate ?
+		"enabled" : "disabled");
+
+	return count;
+}
+
+static ssize_t railgate_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0);
+}
+
+static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read,
+			railgate_enable_store);
+#endif
+
+static ssize_t railgate_delay_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int railgate_delay = 0, ret = 0;
+	struct gk20a *g = get_gk20a(dev);
+	int err;
+
+	if (!g->can_railgate) {
+		nvgpu_info(g, "does not support power-gating");
+		return count;
+	}
+
+	ret = sscanf(buf, "%d", &railgate_delay);
+	if (ret == 1 && railgate_delay >= 0) {
+		g->railgate_delay = railgate_delay;
+		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
+	} else
+		nvgpu_err(g, "Invalid powergate delay");
+
+	/* wake-up system to make rail-gating delay effective immediately */
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+	gk20a_idle(g);
+
+	return count;
+}
+static ssize_t railgate_delay_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay);
+}
+static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show,
+		   railgate_delay_store);
+
+static ssize_t is_railgated_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	bool is_railgated = 0;
+
+	if (platform->is_railgated)
+		is_railgated = platform->is_railgated(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
+}
+static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL);
+
+static ssize_t counters_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	u32 busy_cycles, total_cycles;
+	ssize_t res;
+
+	nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
+
+	res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
+
+	return res;
+}
+static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL);
+
+static ssize_t counters_show_reset(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	ssize_t res = counters_show(dev, attr, buf);
+	struct gk20a *g = get_gk20a(dev);
+
+	nvgpu_pmu_reset_load_counters(g);
+
+	return res;
+}
+static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL);
+
+static ssize_t gk20a_load_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	u32 busy_time;
+	ssize_t res;
+	int err;
+
+	if (!g->power_on) {
+		busy_time = 0;
+	} else {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		nvgpu_pmu_load_update(g);
+		nvgpu_pmu_load_norm(g, &busy_time);
+		gk20a_idle(g);
+	}
+
+	res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
+
+	return res;
+}
+static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL);
+
+static ssize_t elpg_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (!g->power_on) {
+		g->elpg_enabled = val ? true : false;
+	} else {
+		err = gk20a_busy(g);
+		if (err)
+			return -EAGAIN;
+		/*
+		 * Since elpg is refcounted, we should not unnecessarily call
+		 * enable/disable if it is already so.
+		 */
+		if (val && !g->elpg_enabled) {
+			g->elpg_enabled = true;
+			nvgpu_pmu_pg_global_enable(g, true);
+
+		} else if (!val && g->elpg_enabled) {
+			if (g->ops.pmu.pmu_pg_engines_feature_list &&
+				g->ops.pmu.pmu_pg_engines_feature_list(g,
+				PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+				NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
+				nvgpu_pmu_pg_global_enable(g, false);
+				g->elpg_enabled = false;
+			} else {
+				g->elpg_enabled = false;
+				nvgpu_pmu_pg_global_enable(g, false);
+			}
+		}
+		gk20a_idle(g);
+	}
+	nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t elpg_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store);
+
+static ssize_t ldiv_slowdown_factor_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0) {
+		nvgpu_err(g, "parse error for input SLOWDOWN factor\n");
+		return -EINVAL;
+	}
+
+	if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) {
+		nvgpu_err(g, "Invalid SLOWDOWN factor\n");
+		return -EINVAL;
+	}
+
+	if (val == g->ldiv_slowdown_factor)
+		return count;
+
+	if (!g->power_on) {
+		g->ldiv_slowdown_factor = val;
+	} else {
+		err = gk20a_busy(g);
+		if (err)
+			return -EAGAIN;
+
+		g->ldiv_slowdown_factor = val;
+
+		if (g->ops.pmu.pmu_pg_init_param)
+			g->ops.pmu.pmu_pg_init_param(g,
+				PMU_PG_ELPG_ENGINE_ID_GRAPHICS);
+
+		gk20a_idle(g);
+	}
+
+	nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor);
+
+	return count;
+}
+
+static ssize_t ldiv_slowdown_factor_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
+}
+
+static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW,
+			ldiv_slowdown_factor_read, ldiv_slowdown_factor_store);
+
+static ssize_t mscg_enable_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_pmu *pmu = &g->pmu;
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (!g->power_on) {
+		g->mscg_enabled = val ? true : false;
+	} else {
+		err = gk20a_busy(g);
+		if (err)
+			return -EAGAIN;
+		/*
+		 * Since elpg is refcounted, we should not unnecessarily call
+		 * enable/disable if it is already so.
+		 */
+		if (val && !g->mscg_enabled) {
+			g->mscg_enabled = true;
+			if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
+					PMU_PG_LPWR_FEATURE_MSCG)) {
+				if (!ACCESS_ONCE(pmu->mscg_stat)) {
+					WRITE_ONCE(pmu->mscg_stat,
+						PMU_MSCG_ENABLED);
+					/* make status visible */
+					smp_mb();
+				}
+			}
+
+		} else if (!val && g->mscg_enabled) {
+			if (g->ops.pmu.pmu_is_lpwr_feature_supported(g,
+					PMU_PG_LPWR_FEATURE_MSCG)) {
+				nvgpu_pmu_pg_global_enable(g, false);
+				WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED);
+				/* make status visible */
+				smp_mb();
+				g->mscg_enabled = false;
+				if (g->elpg_enabled)
+					nvgpu_pmu_pg_global_enable(g, true);
+			}
+			g->mscg_enabled = false;
+		}
+		gk20a_idle(g);
+	}
+	nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t mscg_enable_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store);
+
+static ssize_t aelpg_param_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int status = 0;
+	union pmu_ap_cmd ap_cmd;
+	int *paramlist = (int *)g->pmu.aelpg_param;
+	u32 defaultparam[5] = {
+			APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US,
+			APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US,
+			APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US,
+			APCTRL_POWER_BREAKEVEN_DEFAULT_US,
+			APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT
+	};
+
+	/* Get each parameter value from input string*/
+	sscanf(buf, "%d %d %d %d %d", &paramlist[0], &paramlist[1],
+				&paramlist[2], &paramlist[3], &paramlist[4]);
+
+	/* If parameter value is 0 then reset to SW default values*/
+	if ((paramlist[0] | paramlist[1] | paramlist[2]
+		| paramlist[3] | paramlist[4]) == 0x00) {
+		memcpy(paramlist, defaultparam, sizeof(defaultparam));
+	}
+
+	/* If aelpg is enabled & pmu is ready then post values to
+	 * PMU else store then post later
+	 */
+	if (g->aelpg_enabled && g->pmu.pmu_ready) {
+		/* Disable AELPG */
+		ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
+		ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
+		status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
+
+		/* Enable AELPG */
+		nvgpu_aelpg_init(g);
+		nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS);
+	}
+
+	return count;
+}
+
+static ssize_t aelpg_param_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE,
+		"%d %d %d %d %d\n", g->pmu.aelpg_param[0],
+		g->pmu.aelpg_param[1], g->pmu.aelpg_param[2],
+		g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]);
+}
+
+static DEVICE_ATTR(aelpg_param, ROOTRW,
+		aelpg_param_read, aelpg_param_store);
+
+static ssize_t aelpg_enable_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int status = 0;
+	union pmu_ap_cmd ap_cmd;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (g->pmu.pmu_ready) {
+		if (val && !g->aelpg_enabled) {
+			g->aelpg_enabled = true;
+			/* Enable AELPG */
+			ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL;
+			ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
+			status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
+		} else if (!val && g->aelpg_enabled) {
+			g->aelpg_enabled = false;
+			/* Disable AELPG */
+			ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL;
+			ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS;
+			status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false);
+		}
+	} else {
+		nvgpu_info(g, "PMU is not ready, AELPG request failed");
+	}
+	gk20a_idle(g);
+
+	nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" :
+			"disabled");
+
+	return count;
+}
+
+static ssize_t aelpg_enable_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(aelpg_enable, ROOTRW,
+		aelpg_enable_read, aelpg_enable_store);
+
+
+static ssize_t allow_all_enable_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
+}
+
+static ssize_t allow_all_enable_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	err = gk20a_busy(g);
+	g->allow_all = (val ? true : false);
+	gk20a_idle(g);
+
+	return count;
+}
+
+static DEVICE_ATTR(allow_all, ROOTRW,
+		allow_all_enable_read, allow_all_enable_store);
+
+static ssize_t emc3d_ratio_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	g->emc3d_ratio = val;
+
+	return count;
+}
+
+static ssize_t emc3d_ratio_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio);
+}
+
+static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
+
+static ssize_t fmax_at_vmin_safe_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long gpu_fmax_at_vmin_hz = 0;
+
+	if (g->ops.clk.get_fmax_at_vmin_safe)
+		gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
+}
+
+static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL);
+
+#ifdef CONFIG_PM
+static ssize_t force_idle_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+	int err = 0;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		if (g->forced_idle)
+			return count; /* do nothing */
+		else {
+			err = __gk20a_do_idle(g, false);
+			if (!err) {
+				g->forced_idle = 1;
+				nvgpu_info(g, "gpu is idle : %d",
+					g->forced_idle);
+			}
+		}
+	} else {
+		if (!g->forced_idle)
+			return count; /* do nothing */
+		else {
+			err = __gk20a_do_unidle(g);
+			if (!err) {
+				g->forced_idle = 0;
+				nvgpu_info(g, "gpu is idle : %d",
+					g->forced_idle);
+			}
+		}
+	}
+
+	return count;
+}
+
+static ssize_t force_idle_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
+}
+
+static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
+#endif
+
+static ssize_t tpc_fs_mask_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val = 0;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (!g->gr.gpc_tpc_mask)
+		return -ENODEV;
+
+	if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) {
+		g->gr.gpc_tpc_mask[0] = val;
+		g->tpc_fs_mask_user = val;
+
+		g->ops.gr.set_gpc_tpc_mask(g, 0);
+
+		nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image);
+		g->gr.ctx_vars.local_golden_image = NULL;
+		g->gr.ctx_vars.golden_image_initialized = false;
+		g->gr.ctx_vars.golden_image_size = 0;
+		/* Cause next poweron to reinit just gr */
+		g->gr.sw_ready = false;
+	}
+
+	return count;
+}
+
+static ssize_t tpc_fs_mask_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+	u32 gpc_index;
+	u32 tpc_fs_mask = 0;
+	int err = 0;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
+		if (g->ops.gr.get_gpc_tpc_mask)
+			tpc_fs_mask |=
+				g->ops.gr.get_gpc_tpc_mask(g, gpc_index) <<
+				(gr->max_tpc_per_gpc_count * gpc_index);
+	}
+
+	gk20a_idle(g);
+
+	return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask);
+}
+
+static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
+
+static ssize_t min_timeslice_us_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us);
+}
+
+static ssize_t min_timeslice_us_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val > g->max_timeslice_us)
+		return -EINVAL;
+
+	g->min_timeslice_us = val;
+
+	return count;
+}
+
+static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read,
+		   min_timeslice_us_store);
+
+static ssize_t max_timeslice_us_read(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us);
+}
+
+static ssize_t max_timeslice_us_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val < g->min_timeslice_us)
+		return -EINVAL;
+
+	g->max_timeslice_us = val;
+
+	return count;
+}
+
+static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read,
+		   max_timeslice_us_store);
+
+static ssize_t czf_bypass_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val >= 4)
+		return -EINVAL;
+
+	g->gr.czf_bypass = val;
+
+	return count;
+}
+
+static ssize_t czf_bypass_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return sprintf(buf, "%d\n", g->gr.czf_bypass);
+}
+
+static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
+
+static ssize_t pd_max_batches_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val > 64)
+		return -EINVAL;
+
+	g->gr.pd_max_batches = val;
+
+	return count;
+}
+
+static ssize_t pd_max_batches_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return sprintf(buf, "%d\n", g->gr.pd_max_batches);
+}
+
+static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store);
+
+static ssize_t gfxp_wfi_timeout_count_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+	unsigned long val = 0;
+	int err = -1;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (g->ops.gr.get_max_gfxp_wfi_timeout_count) {
+		if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g))
+			return -EINVAL;
+	}
+
+	gr->gfxp_wfi_timeout_count = val;
+
+	if (g->ops.gr.init_preemption_state && g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		err = gr_gk20a_elpg_protected_call(g,
+			g->ops.gr.init_preemption_state(g));
+
+		gk20a_idle(g);
+
+		if (err)
+			return err;
+	}
+	return count;
+}
+
+static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+	int err = -1;
+
+	if (count > 0 && buf[0] == 's')
+		/* sysclk */
+		gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK;
+	else
+		/* usec */
+		gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC;
+
+	if (g->ops.gr.init_preemption_state && g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		err = gr_gk20a_elpg_protected_call(g,
+			g->ops.gr.init_preemption_state(g));
+
+		gk20a_idle(g);
+
+		if (err)
+			return err;
+	}
+
+	return count;
+}
+
+static ssize_t gfxp_wfi_timeout_count_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+	u32 val = gr->gfxp_wfi_timeout_count;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gr_gk20a *gr = &g->gr;
+
+	if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC)
+		return snprintf(buf, PAGE_SIZE, "usec\n");
+	else
+		return snprintf(buf, PAGE_SIZE, "sysclk\n");
+}
+
+static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH),
+		gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store);
+
+static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH),
+		gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store);
+
+void nvgpu_remove_sysfs(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_elcg_enable);
+	device_remove_file(dev, &dev_attr_blcg_enable);
+	device_remove_file(dev, &dev_attr_slcg_enable);
+	device_remove_file(dev, &dev_attr_ptimer_scale_factor);
+	device_remove_file(dev, &dev_attr_ptimer_ref_freq);
+	device_remove_file(dev, &dev_attr_ptimer_src_freq);
+	device_remove_file(dev, &dev_attr_elpg_enable);
+	device_remove_file(dev, &dev_attr_mscg_enable);
+	device_remove_file(dev, &dev_attr_emc3d_ratio);
+	device_remove_file(dev, &dev_attr_ldiv_slowdown_factor);
+
+	device_remove_file(dev, &dev_attr_fmax_at_vmin_safe);
+
+	device_remove_file(dev, &dev_attr_counters);
+	device_remove_file(dev, &dev_attr_counters_reset);
+	device_remove_file(dev, &dev_attr_load);
+	device_remove_file(dev, &dev_attr_railgate_delay);
+	device_remove_file(dev, &dev_attr_is_railgated);
+#ifdef CONFIG_PM
+	device_remove_file(dev, &dev_attr_force_idle);
+	device_remove_file(dev, &dev_attr_railgate_enable);
+#endif
+	device_remove_file(dev, &dev_attr_aelpg_param);
+	device_remove_file(dev, &dev_attr_aelpg_enable);
+	device_remove_file(dev, &dev_attr_allow_all);
+	device_remove_file(dev, &dev_attr_tpc_fs_mask);
+	device_remove_file(dev, &dev_attr_min_timeslice_us);
+	device_remove_file(dev, &dev_attr_max_timeslice_us);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	nvgpu_nvhost_remove_symlink(get_gk20a(dev));
+#endif
+
+	device_remove_file(dev, &dev_attr_czf_bypass);
+	device_remove_file(dev, &dev_attr_pd_max_batches);
+	device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count);
+	device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
+
+	if (strcmp(dev_name(dev), "gpu.0")) {
+		struct kobject *kobj = &dev->kobj;
+		struct device *parent = container_of((kobj->parent),
+				struct device, kobj);
+		sysfs_remove_link(&parent->kobj, "gpu.0");
+	}
+}
+
+int nvgpu_create_sysfs(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int error = 0;
+
+	error |= device_create_file(dev, &dev_attr_elcg_enable);
+	error |= device_create_file(dev, &dev_attr_blcg_enable);
+	error |= device_create_file(dev, &dev_attr_slcg_enable);
+	error |= device_create_file(dev, &dev_attr_ptimer_scale_factor);
+	error |= device_create_file(dev, &dev_attr_ptimer_ref_freq);
+	error |= device_create_file(dev, &dev_attr_ptimer_src_freq);
+	error |= device_create_file(dev, &dev_attr_elpg_enable);
+	error |= device_create_file(dev, &dev_attr_mscg_enable);
+	error |= device_create_file(dev, &dev_attr_emc3d_ratio);
+	error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor);
+
+	error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe);
+
+	error |= device_create_file(dev, &dev_attr_counters);
+	error |= device_create_file(dev, &dev_attr_counters_reset);
+	error |= device_create_file(dev, &dev_attr_load);
+	error |= device_create_file(dev, &dev_attr_railgate_delay);
+	error |= device_create_file(dev, &dev_attr_is_railgated);
+#ifdef CONFIG_PM
+	error |= device_create_file(dev, &dev_attr_force_idle);
+	error |= device_create_file(dev, &dev_attr_railgate_enable);
+#endif
+	error |= device_create_file(dev, &dev_attr_aelpg_param);
+	error |= device_create_file(dev, &dev_attr_aelpg_enable);
+	error |= device_create_file(dev, &dev_attr_allow_all);
+	error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
+	error |= device_create_file(dev, &dev_attr_min_timeslice_us);
+	error |= device_create_file(dev, &dev_attr_max_timeslice_us);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	error |= nvgpu_nvhost_create_symlink(g);
+#endif
+
+	error |= device_create_file(dev, &dev_attr_czf_bypass);
+	error |= device_create_file(dev, &dev_attr_pd_max_batches);
+	error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count);
+	error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit);
+
+	if (strcmp(dev_name(dev), "gpu.0")) {
+		struct kobject *kobj = &dev->kobj;
+		struct device *parent = container_of((kobj->parent),
+					struct device, kobj);
+		error |= sysfs_create_link(&parent->kobj,
+				   &dev->kobj, "gpu.0");
+	}
+
+	if (error)
+		nvgpu_err(g, "Failed to create sysfs attributes!\n");
+
+	return error;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.h b/drivers/gpu/nvgpu/os/linux/sysfs.h
new file mode 100644
index 00000000..80925844
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef NVGPU_SYSFS_H
+#define NVGPU_SYSFS_H
+
+struct device;
+
+int nvgpu_create_sysfs(struct device *dev);
+void nvgpu_remove_sysfs(struct device *dev);
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c
new file mode 100644
index 00000000..92c556f2
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/thread.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kthread.h>
+
+#include <nvgpu/thread.h>
+
+int nvgpu_thread_proxy(void *threaddata)
+{
+	struct nvgpu_thread *thread = threaddata;
+	int ret = thread->fn(thread->data);
+
+	thread->running = false;
+	return ret;
+}
+
+int nvgpu_thread_create(struct nvgpu_thread *thread,
+		void *data,
+		int (*threadfn)(void *data), const char *name)
+{
+	struct task_struct *task = kthread_create(nvgpu_thread_proxy,
+			thread, name);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	thread->task = task;
+	thread->fn = threadfn;
+	thread->data = data;
+	thread->running = true;
+	wake_up_process(task);
+	return 0;
+};
+
+void nvgpu_thread_stop(struct nvgpu_thread *thread)
+{
+	if (thread->task) {
+		kthread_stop(thread->task);
+		thread->task = NULL;
+	}
+};
+
+bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
+{
+	return kthread_should_stop();
+};
+
+bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
+{
+	return ACCESS_ONCE(thread->running);
+};
diff --git a/drivers/gpu/nvgpu/os/linux/timers.c b/drivers/gpu/nvgpu/os/linux/timers.c
new file mode 100644
index 00000000..d1aa641f
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/timers.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/ktime.h>
+#include <linux/delay.h>
+
+#include <nvgpu/timers.h>
+#include <nvgpu/soc.h>
+
+#include "gk20a/gk20a.h"
+
+#include "platform_gk20a.h"
+
+/*
+ * Returns 1 if the platform is pre-Si and should ignore the timeout checking.
+ * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the
+ * timeout check regardless of platform).
+ */
+static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout)
+{
+	if (timeout->flags & NVGPU_TIMER_NO_PRE_SI)
+		return 0;
+
+	return !nvgpu_platform_is_silicon(timeout->g);
+}
+
+/**
+ * nvgpu_timeout_init - Init timer.
+ *
+ * @g        - nvgpu device.
+ * @timeout  - The timer.
+ * @duration - Timeout in milliseconds or number of retries.
+ * @flags    - Flags for timer.
+ *
+ * This configures the timeout to start the timeout duration now, i.e: when this
+ * function is called. Available flags to pass to @flags:
+ *
+ *   %NVGPU_TIMER_CPU_TIMER
+ *   %NVGPU_TIMER_RETRY_TIMER
+ *   %NVGPU_TIMER_NO_PRE_SI
+ *   %NVGPU_TIMER_SILENT_TIMEOUT
+ *
+ * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then
+ * a CPU timer is used by default.
+ */
+int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout,
+		       u32 duration, unsigned long flags)
+{
+	if (flags & ~NVGPU_TIMER_FLAG_MASK)
+		return -EINVAL;
+
+	memset(timeout, 0, sizeof(*timeout));
+
+	timeout->g = g;
+	timeout->flags = flags;
+
+	if (flags & NVGPU_TIMER_RETRY_TIMER)
+		timeout->retries.max = duration;
+	else
+		timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(),
+					(s64)NSEC_PER_MSEC * duration));
+
+	return 0;
+}
+
+static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout,
+					 void *caller,
+					 const char *fmt, va_list args)
+{
+	struct gk20a *g = timeout->g;
+	ktime_t now = ktime_get();
+
+	if (nvgpu_timeout_is_pre_silicon(timeout))
+		return 0;
+
+	if (ktime_after(now, ns_to_ktime(timeout->time))) {
+		if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
+			char buf[128];
+
+			vsnprintf(buf, sizeof(buf), fmt, args);
+
+			nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf);
+		}
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout,
+					   void *caller,
+					   const char *fmt, va_list args)
+{
+	struct gk20a *g = timeout->g;
+
+	if (nvgpu_timeout_is_pre_silicon(timeout))
+		return 0;
+
+	if (timeout->retries.attempted >= timeout->retries.max) {
+		if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) {
+			char buf[128];
+
+			vsnprintf(buf, sizeof(buf), fmt, args);
+
+			nvgpu_err(g, "No more retries @ %pF %s", caller, buf);
+		}
+
+		return -ETIMEDOUT;
+	}
+
+	timeout->retries.attempted++;
+
+	return 0;
+}
+
+/**
+ * __nvgpu_timeout_expired_msg - Check if a timeout has expired.
+ *
+ * @timeout - The timeout to check.
+ * @caller  - Address of the caller of this function.
+ * @fmt     - The fmt string.
+ *
+ * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise.
+ *
+ * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout
+ * then a message is printed based on %fmt.
+ */
+int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout,
+			      void *caller, const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, fmt);
+	if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
+		ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt,
+						      args);
+	else
+		ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt,
+						    args);
+	va_end(args);
+
+	return ret;
+}
+
+/**
+ * nvgpu_timeout_peek_expired - Check the status of a timeout.
+ *
+ * @timeout - The timeout to check.
+ *
+ * Returns non-zero if the timeout is expired, zero otherwise. In the case of
+ * retry timers this will not increment the underlying retry count. Also if the
+ * timer has expired no messages will be printed.
+ *
+ * This function honors the pre-Si check as well.
+ */
+int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout)
+{
+	if (nvgpu_timeout_is_pre_silicon(timeout))
+		return 0;
+
+	if (timeout->flags & NVGPU_TIMER_RETRY_TIMER)
+		return timeout->retries.attempted >= timeout->retries.max;
+	else
+		return ktime_after(ktime_get(), ns_to_ktime(timeout->time));
+}
+
+/**
+ * nvgpu_udelay - Delay for some number of microseconds.
+ *
+ * @usecs - Microseconds to wait for.
+ *
+ * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly
+ * accurate. This is normally backed by a busy-loop so this means waits should
+ * be kept short, below 100us. If longer delays are necessary then
+ * nvgpu_msleep() should be preferred.
+ *
+ * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This
+ * function will attempt to not use a busy-loop.
+ */
+void nvgpu_udelay(unsigned int usecs)
+{
+	udelay(usecs);
+}
+
+/**
+ * nvgpu_usleep_range - Sleep for a range of microseconds.
+ *
+ * @min_us - Minimum wait time.
+ * @max_us - Maximum wait time.
+ *
+ * Wait for some number of microseconds between @min_us and @max_us. This,
+ * unlike nvgpu_udelay(), will attempt to sleep for the passed number of
+ * microseconds instead of busy looping. Not all platforms support this,
+ * and in that case this reduces to nvgpu_udelay(min_us).
+ *
+ * Linux note: this is not safe to use in atomic context. If you are in
+ * atomic context you must use nvgpu_udelay().
+ */
+void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us)
+{
+	usleep_range(min_us, max_us);
+}
+
+/**
+ * nvgpu_msleep - Sleep for some milliseconds.
+ *
+ * @msecs - Sleep for at least this many milliseconds.
+ *
+ * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms
+ * or so) the sleep will be significantly longer due to scheduling overhead and
+ * mechanics.
+ */
+void nvgpu_msleep(unsigned int msecs)
+{
+	msleep(msecs);
+}
+
+/**
+ * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock.
+ *
+ * Return a clock in millisecond units. The start time of the clock is
+ * unspecified; the time returned can be compared with older ones to measure
+ * durations. The source clock does not jump when the system clock is adjusted.
+ */
+s64 nvgpu_current_time_ms(void)
+{
+	return ktime_to_ms(ktime_get());
+}
+
+/**
+ * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock.
+ *
+ * Return a clock in nanosecond units. The start time of the clock is
+ * unspecified; the time returned can be compared with older ones to measure
+ * durations. The source clock does not jump when the system clock is adjusted.
+ */
+s64 nvgpu_current_time_ns(void)
+{
+	return ktime_to_ns(ktime_get());
+}
+
+/**
+ * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp.
+ *
+ * Return a "high resolution" time stamp. It does not really matter exactly what
+ * it is, so long as it generally returns unique values and monotonically
+ * increases - wrap around _is_ possible though in a system running for long
+ * enough.
+ *
+ * Note: what high resolution means is system dependent.
+ */
+u64 nvgpu_hr_timestamp(void)
+{
+	return get_cycles();
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c
new file mode 100644
index 00000000..9f6017d3
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c
@@ -0,0 +1,168 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/vgpu/vgpu.h>
+
+#include "gk20a/gk20a.h"
+#include "clk_vgpu.h"
+#include "ctrl/ctrlclk.h"
+#include "os/linux/platform_gk20a.h"
+
+static unsigned long
+vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE];
+
+static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+	int err;
+	unsigned long ret = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE;
+		msg.handle = vgpu_get_handle(g);
+		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+		err = err ? err : msg.ret;
+		if (err)
+			nvgpu_err(g, "%s failed - %d", __func__, err);
+		else
+			/* return frequency in Hz */
+			ret = p->rate * 1000;
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		nvgpu_err(g, "unsupported clock: %u", api_domain);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		break;
+	}
+
+	return ret;
+}
+
+static int vgpu_clk_set_rate(struct gk20a *g,
+				u32 api_domain, unsigned long rate)
+{
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+	int err = -EINVAL;
+
+	nvgpu_log_fn(g, " ");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
+		msg.handle = vgpu_get_handle(g);
+
+		/* server dvfs framework requires frequency in kHz */
+		p->rate = (u32)(rate / 1000);
+		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+		err = err ? err : msg.ret;
+		if (err)
+			nvgpu_err(g, "%s failed - %d", __func__, err);
+		break;
+	case CTRL_CLK_DOMAIN_PWRCLK:
+		nvgpu_err(g, "unsupported clock: %u", api_domain);
+		break;
+	default:
+		nvgpu_err(g, "unknown clock: %u", api_domain);
+		break;
+	}
+
+	return err;
+}
+
+static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain)
+{
+	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+
+	return priv->constants.max_freq;
+}
+
+void vgpu_init_clk_support(struct gk20a *g)
+{
+	g->ops.clk.get_rate = vgpu_clk_get_rate;
+	g->ops.clk.set_rate = vgpu_clk_set_rate;
+	g->ops.clk.get_maxrate = vgpu_clk_get_maxrate;
+}
+
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate)
+{
+	/* server will handle frequency rounding */
+	return rate;
+}
+
+int vgpu_clk_get_freqs(struct device *dev,
+		unsigned long **freqs, int *num_freqs)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_get_gpu_freq_table_params *p =
+					&msg.params.get_gpu_freq_table;
+	unsigned int i;
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE;
+	msg.handle = vgpu_get_handle(g);
+
+	p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	if (err) {
+		nvgpu_err(g, "%s failed - %d", __func__, err);
+		return err;
+	}
+
+	/* return frequency in Hz */
+	for (i = 0; i < p->num_freqs; i++)
+		vgpu_freq_table[i] = p->freqs[i] * 1000;
+
+	*freqs = vgpu_freq_table;
+	*num_freqs = p->num_freqs;
+
+	return 0;
+}
+
+int vgpu_clk_cap_rate(struct device *dev, unsigned long rate)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct gk20a *g = platform->g;
+	struct tegra_vgpu_cmd_msg msg = {};
+	struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE;
+	msg.handle = vgpu_get_handle(g);
+	p->rate = (u32)rate;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	if (err) {
+		nvgpu_err(g, "%s failed - %d", __func__, err);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h
new file mode 100644
index 00000000..8d477643
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h
@@ -0,0 +1,27 @@
+/*
+ * Virtualized GPU Clock Interface
+ *
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CLK_VIRT_H_
+#define _CLK_VIRT_H_
+
+void vgpu_init_clk_support(struct gk20a *g);
+long vgpu_clk_round_rate(struct device *dev, unsigned long rate);
+int vgpu_clk_get_freqs(struct device *dev,
+			unsigned long **freqs, int *num_freqs);
+int vgpu_clk_cap_rate(struct device *dev, unsigned long rate);
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c
new file mode 100644
index 00000000..6339aef9
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/vgpu/vgpu_ivm.h>
+#include <nvgpu/vgpu/tegra_vgpu.h>
+#include <nvgpu/vgpu/vgpu.h>
+
+#include "gk20a/gk20a.h"
+#include "os/linux/os_linux.h"
+#include "vgpu/fecs_trace_vgpu.h"
+
+struct vgpu_fecs_trace {
+	struct tegra_hv_ivm_cookie *cookie;
+	struct nvgpu_ctxsw_ring_header *header;
+	struct nvgpu_ctxsw_trace_entry *entries;
+	int num_entries;
+	bool enabled;
+	void *buf;
+};
+
+int vgpu_fecs_trace_init(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args args;
+	struct vgpu_fecs_trace *vcst;
+	u32 mempool;
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	vcst = nvgpu_kzalloc(g, sizeof(*vcst));
+	if (!vcst)
+		return -ENOMEM;
+
+	err = of_parse_phandle_with_fixed_args(np,
+			"mempool-fecs-trace", 1, 0, &args);
+	if (err) {
+		nvgpu_info(g, "does not support fecs trace");
+		goto fail;
+	}
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
+
+	mempool = args.args[0];
+	vcst->cookie = vgpu_ivm_mempool_reserve(mempool);
+	if (IS_ERR(vcst->cookie)) {
+		nvgpu_info(g,
+			"mempool  %u reserve failed", mempool);
+		vcst->cookie = NULL;
+		err = -EINVAL;
+		goto fail;
+	}
+
+	vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie),
+			vgpu_ivm_get_size(vcst->cookie));
+	if (!vcst->buf) {
+		nvgpu_info(g, "ioremap_cache failed");
+		err = -EINVAL;
+		goto fail;
+	}
+	vcst->header = vcst->buf;
+	vcst->num_entries = vcst->header->num_ents;
+	if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) {
+		nvgpu_err(g, "entry size mismatch");
+		goto fail;
+	}
+	vcst->entries = vcst->buf + sizeof(*vcst->header);
+	g->fecs_trace = (struct gk20a_fecs_trace *)vcst;
+
+	return 0;
+fail:
+	iounmap(vcst->buf);
+	if (vcst->cookie)
+		vgpu_ivm_mempool_unreserve(vcst->cookie);
+	nvgpu_kfree(g, vcst);
+	return err;
+}
+
+int vgpu_fecs_trace_deinit(struct gk20a *g)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+
+	iounmap(vcst->buf);
+	vgpu_ivm_mempool_unreserve(vcst->cookie);
+	nvgpu_kfree(g, vcst);
+	return 0;
+}
+
+int vgpu_fecs_trace_enable(struct gk20a *g)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+	struct tegra_vgpu_cmd_msg msg = {
+		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE,
+		.handle = vgpu_get_handle(g),
+	};
+	int err;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	WARN_ON(err);
+	vcst->enabled = !err;
+	return err;
+}
+
+int vgpu_fecs_trace_disable(struct gk20a *g)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+	struct tegra_vgpu_cmd_msg msg = {
+		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE,
+		.handle = vgpu_get_handle(g),
+	};
+	int err;
+
+	vcst->enabled = false;
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	WARN_ON(err);
+	return err;
+}
+
+bool vgpu_fecs_trace_is_enabled(struct gk20a *g)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+
+	return (vcst && vcst->enabled);
+}
+
+int vgpu_fecs_trace_poll(struct gk20a *g)
+{
+	struct tegra_vgpu_cmd_msg msg = {
+		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL,
+		.handle = vgpu_get_handle(g),
+	};
+	int err;
+
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	WARN_ON(err);
+	return err;
+}
+
+int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+
+	*buf = vcst->buf;
+	*size = vgpu_ivm_get_size(vcst->cookie);
+	return 0;
+}
+
+int vgpu_free_user_buffer(struct gk20a *g)
+{
+	return 0;
+}
+
+int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+	unsigned long size = vgpu_ivm_get_size(vcst->cookie);
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+
+	size = min(size, vsize);
+	size = round_up(size, PAGE_SIZE);
+
+	return remap_pfn_range(vma, vma->vm_start,
+			vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT,
+			size,
+			vma->vm_page_prot);
+}
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+int vgpu_fecs_trace_max_entries(struct gk20a *g,
+			struct nvgpu_ctxsw_trace_filter *filter)
+{
+	struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace;
+
+	return vcst->header->num_ents;
+}
+
+#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE
+#error "FECS trace filter size mismatch!"
+#endif
+
+int vgpu_fecs_trace_set_filter(struct gk20a *g,
+			struct nvgpu_ctxsw_trace_filter *filter)
+{
+	struct tegra_vgpu_cmd_msg msg = {
+		.cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER,
+		.handle = vgpu_get_handle(g),
+	};
+	struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter;
+	int err;
+
+	memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits));
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	err = err ? err : msg.ret;
+	WARN_ON(err);
+	return err;
+}
+
+void vgpu_fecs_trace_data_update(struct gk20a *g)
+{
+	gk20a_ctxsw_trace_wake_up(g, 0);
+}
+#endif /* CONFIG_GK20A_CTXSW_TRACE */
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
new file mode 100644
index 00000000..66911626
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/platform_device.h>
+
+#include <nvgpu/nvhost.h>
+
+#include "gk20a/gk20a.h"
+#include "os/linux/vgpu/clk_vgpu.h"
+#include "os/linux/platform_gk20a.h"
+#include "os/linux/os_linux.h"
+
+static int gv11b_vgpu_probe(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	struct resource *r;
+	void __iomem *regs;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g);
+	struct gk20a *g = platform->g;
+	int ret;
+
+	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode");
+	if (!r) {
+		nvgpu_err(g, "failed to get usermode regs");
+		return -ENXIO;
+	}
+	regs = devm_ioremap_resource(dev, r);
+	if (IS_ERR(regs)) {
+		nvgpu_err(g, "failed to map usermode regs");
+		return PTR_ERR(regs);
+	}
+	l->usermode_regs = regs;
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	ret = nvgpu_get_nvhost_dev(g);
+	if (ret) {
+		l->usermode_regs = NULL;
+		return ret;
+	}
+
+	ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev,
+							&g->syncpt_unit_base,
+							&g->syncpt_unit_size);
+	if (ret) {
+		nvgpu_err(g, "Failed to get syncpt interface");
+		return -ENOSYS;
+	}
+	g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
+	nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
+		g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size);
+#endif
+	vgpu_init_clk_support(platform->g);
+
+	return 0;
+}
+
+struct gk20a_platform gv11b_vgpu_tegra_platform = {
+	.has_syncpoints = true,
+
+	/* power management configuration */
+	.can_railgate_init	= false,
+	.can_elpg_init          = false,
+	.enable_slcg            = false,
+	.enable_blcg            = false,
+	.enable_elcg            = false,
+	.enable_elpg            = false,
+	.enable_aelpg           = false,
+	.can_slcg               = false,
+	.can_blcg               = false,
+	.can_elcg               = false,
+
+	.ch_wdt_timeout_ms = 5000,
+
+	.probe = gv11b_vgpu_probe,
+
+	.clk_round_rate = vgpu_clk_round_rate,
+	.get_clk_freqs = vgpu_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.devfreq_governor = "userspace",
+
+	.virtual_dev = true,
+};
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c
new file mode 100644
index 00000000..e4819e7d
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c
@@ -0,0 +1,69 @@
+/*
+ * Tegra Virtualized GPU Platform Interface
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/nvhost.h>
+
+#include "gk20a/gk20a.h"
+#include "os/linux/platform_gk20a.h"
+#include "clk_vgpu.h"
+
+static int gk20a_tegra_probe(struct device *dev)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret;
+
+	ret = nvgpu_get_nvhost_dev(platform->g);
+	if (ret)
+		return ret;
+
+	vgpu_init_clk_support(platform->g);
+	return 0;
+#else
+	return 0;
+#endif
+}
+
+struct gk20a_platform vgpu_tegra_platform = {
+	.has_syncpoints = true,
+	.aggressive_sync_destroy_thresh = 64,
+
+	/* power management configuration */
+	.can_railgate_init	= false,
+	.can_elpg_init          = false,
+	.enable_slcg            = false,
+	.enable_blcg            = false,
+	.enable_elcg            = false,
+	.enable_elpg            = false,
+	.enable_aelpg           = false,
+	.can_slcg               = false,
+	.can_blcg               = false,
+	.can_elcg               = false,
+
+	.ch_wdt_timeout_ms = 5000,
+
+	.probe = gk20a_tegra_probe,
+
+	.clk_round_rate = vgpu_clk_round_rate,
+	.get_clk_freqs = vgpu_clk_get_freqs,
+
+	/* frequency scaling configuration */
+	.devfreq_governor = "userspace",
+
+	.virtual_dev = true,
+};
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
new file mode 100644
index 00000000..57aad4b4
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <nvgpu/vgpu/vgpu.h>
+
+#include "os/linux/platform_gk20a.h"
+
+static ssize_t vgpu_load_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct tegra_vgpu_cmd_msg msg = {0};
+	struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load;
+	int err;
+
+	msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD;
+	msg.handle = vgpu_get_handle(g);
+	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+	if (err)
+		return err;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
+}
+static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
+
+void vgpu_create_sysfs(struct device *dev)
+{
+	if (device_create_file(dev, &dev_attr_load))
+		dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
+}
+
+void vgpu_remove_sysfs(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_load);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c
new file mode 100644
index 00000000..950f0d49
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/types.h>
+#include <linux/tegra_gr_comm.h>
+
+#include "os/linux/os_linux.h"
+
+int vgpu_ivc_init(struct gk20a *g, u32 elems,
+		const size_t *queue_sizes, u32 queue_start, u32 num_queues)
+{
+	struct platform_device *pdev = to_platform_device(dev_from_gk20a(g));
+
+	return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start,
+				num_queues);
+}
+
+void vgpu_ivc_deinit(u32 queue_start, u32 num_queues)
+{
+	tegra_gr_comm_deinit(queue_start, num_queues);
+}
+
+void vgpu_ivc_release(void *handle)
+{
+	tegra_gr_comm_release(handle);
+}
+
+u32 vgpu_ivc_get_server_vmid(void)
+{
+	return tegra_gr_comm_get_server_vmid();
+}
+
+int vgpu_ivc_recv(u32 index, void **handle, void **data,
+				size_t *size, u32 *sender)
+{
+	return tegra_gr_comm_recv(index, handle, data, size, sender);
+}
+
+int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size)
+{
+	return tegra_gr_comm_send(peer, index, data, size);
+}
+
+int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle,
+				void **data, size_t *size)
+{
+	return tegra_gr_comm_sendrecv(peer, index, handle, data, size);
+}
+
+u32 vgpu_ivc_get_peer_self(void)
+{
+	return TEGRA_GR_COMM_ID_SELF;
+}
+
+void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr,
+					size_t *size)
+{
+	return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size);
+}
+
+void vgpu_ivc_oob_put_ptr(void *handle)
+{
+	tegra_gr_comm_oob_put_ptr(handle);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c
new file mode 100644
index 00000000..bbd444da
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/vgpu/vgpu_ivm.h>
+
+#include <linux/tegra-ivc.h>
+
+#include "os/linux/os_linux.h"
+
+struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id)
+{
+	return tegra_hv_mempool_reserve(id);
+}
+
+int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie)
+{
+	return tegra_hv_mempool_unreserve(cookie);
+}
+
+u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie)
+{
+	return cookie->ipa;
+}
+
+u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie)
+{
+	return cookie->size;
+}
+
+void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie)
+{
+	return ioremap_cache(vgpu_ivm_get_ipa(cookie),
+				vgpu_ivm_get_size(cookie));
+}
+
+void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie,
+		void *addr)
+{
+	iounmap(addr);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
new file mode 100644
index 00000000..a7612e54
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
@@ -0,0 +1,475 @@
+/*
+ * Virtualized GPU for Linux
+ *
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_qos.h>
+#include <linux/platform_device.h>
+#include <soc/tegra/chip-id.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/ctxsw_trace.h>
+#include <nvgpu/defaults.h>
+#include <nvgpu/ltc.h>
+
+#include "vgpu_linux.h"
+#include "vgpu/fecs_trace_vgpu.h"
+#include "clk_vgpu.h"
+#include "gk20a/tsg_gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/regops_gk20a.h"
+#include "gm20b/hal_gm20b.h"
+
+#include "os/linux/module.h"
+#include "os/linux/os_linux.h"
+#include "os/linux/ioctl.h"
+#include "os/linux/scale.h"
+#include "os/linux/driver_common.h"
+#include "os/linux/platform_gk20a.h"
+
+#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
+
+struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g)
+{
+	struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g));
+
+	return (struct vgpu_priv_data *)plat->vgpu_priv;
+}
+
+static void vgpu_remove_support(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	vgpu_remove_support_common(g);
+
+	/* free mappings to registers, etc*/
+
+	if (l->bar1) {
+		iounmap(l->bar1);
+		l->bar1 = NULL;
+	}
+}
+
+static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	nvgpu_mutex_init(&g->poweron_lock);
+	nvgpu_mutex_init(&g->poweroff_lock);
+	nvgpu_mutex_init(&g->ctxsw_disable_lock);
+	l->regs_saved = l->regs;
+	l->bar1_saved = l->bar1;
+
+	g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
+	g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
+	g->has_syncpoints = platform->has_syncpoints;
+	g->ptimer_src_freq = platform->ptimer_src_freq;
+	g->can_railgate = platform->can_railgate_init;
+	g->railgate_delay = platform->railgate_delay_init;
+
+	__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
+			    platform->unify_address_spaces);
+}
+
+static int vgpu_init_support(struct platform_device *pdev)
+{
+	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct gk20a *g = get_gk20a(&pdev->dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	void __iomem *regs;
+	int err = 0;
+
+	if (!r) {
+		nvgpu_err(g, "failed to get gk20a bar1");
+		err = -ENXIO;
+		goto fail;
+	}
+
+	if (r->name && !strcmp(r->name, "/vgpu")) {
+		regs = devm_ioremap_resource(&pdev->dev, r);
+		if (IS_ERR(regs)) {
+			nvgpu_err(g, "failed to remap gk20a bar1");
+			err = PTR_ERR(regs);
+			goto fail;
+		}
+		l->bar1 = regs;
+		l->bar1_mem = r;
+	}
+
+	nvgpu_mutex_init(&g->dbg_sessions_lock);
+	nvgpu_mutex_init(&g->client_lock);
+
+	nvgpu_init_list_node(&g->profiler_objects);
+
+	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
+	if (!g->dbg_regops_tmp_buf) {
+		nvgpu_err(g, "couldn't allocate regops tmp buf");
+		return -ENOMEM;
+	}
+	g->dbg_regops_tmp_buf_ops =
+		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
+
+	g->remove_support = vgpu_remove_support;
+	return 0;
+
+ fail:
+	vgpu_remove_support(g);
+	return err;
+}
+
+int vgpu_pm_prepare_poweroff(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int ret = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!g->power_on)
+		return 0;
+
+	ret = gk20a_channel_suspend(g);
+	if (ret)
+		return ret;
+
+	g->power_on = false;
+
+	return ret;
+}
+
+int vgpu_pm_finalize_poweron(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->power_on)
+		return 0;
+
+	g->power_on = true;
+
+	vgpu_detect_chip(g);
+	err = vgpu_init_hal(g);
+	if (err)
+		goto done;
+
+	if (g->ops.ltc.init_fs_state)
+		g->ops.ltc.init_fs_state(g);
+
+	err = nvgpu_init_ltc_support(g);
+	if (err) {
+		nvgpu_err(g, "failed to init ltc");
+		goto done;
+	}
+
+	err = vgpu_init_mm_support(g);
+	if (err) {
+		nvgpu_err(g, "failed to init gk20a mm");
+		goto done;
+	}
+
+	err = vgpu_init_fifo_support(g);
+	if (err) {
+		nvgpu_err(g, "failed to init gk20a fifo");
+		goto done;
+	}
+
+	err = vgpu_init_gr_support(g);
+	if (err) {
+		nvgpu_err(g, "failed to init gk20a gr");
+		goto done;
+	}
+
+	err = g->ops.chip_init_gpu_characteristics(g);
+	if (err) {
+		nvgpu_err(g, "failed to init gk20a gpu characteristics");
+		goto done;
+	}
+
+	err = nvgpu_finalize_poweron_linux(l);
+	if (err)
+		goto done;
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+	gk20a_ctxsw_trace_init(g);
+#endif
+	gk20a_sched_ctrl_init(g);
+	gk20a_channel_resume(g);
+
+	g->sw_ready = true;
+
+done:
+	return err;
+}
+
+static int vgpu_qos_notify(struct notifier_block *nb,
+			  unsigned long n, void *data)
+{
+	struct gk20a_scale_profile *profile =
+			container_of(nb, struct gk20a_scale_profile,
+			qos_notify_block);
+	struct gk20a *g = get_gk20a(profile->dev);
+	u32 max_freq;
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
+	err = vgpu_clk_cap_rate(profile->dev, max_freq);
+	if (err)
+		nvgpu_err(g, "%s failed, err=%d", __func__, err);
+
+	return NOTIFY_OK; /* need notify call further */
+}
+
+static int vgpu_pm_qos_init(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct gk20a_scale_profile *profile = g->scale_profile;
+
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) {
+		if (!profile)
+			return -EINVAL;
+	} else {
+		profile = nvgpu_kzalloc(g, sizeof(*profile));
+		if (!profile)
+			return -ENOMEM;
+		g->scale_profile = profile;
+	}
+
+	profile->dev = dev;
+	profile->qos_notify_block.notifier_call = vgpu_qos_notify;
+	pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+				&profile->qos_notify_block);
+	return 0;
+}
+
+static void vgpu_pm_qos_remove(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
+				&g->scale_profile->qos_notify_block);
+	nvgpu_kfree(g, g->scale_profile);
+	g->scale_profile = NULL;
+}
+
+static int vgpu_pm_init(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	unsigned long *freqs;
+	int num_freqs;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_platform_is_simulation(g))
+		return 0;
+
+	__pm_runtime_disable(dev, false);
+
+	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
+		gk20a_scale_init(dev);
+
+	if (l->devfreq) {
+		/* set min/max frequency based on frequency table */
+		err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs);
+		if (err)
+			return err;
+
+		if (num_freqs < 1)
+			return -EINVAL;
+
+		l->devfreq->min_freq = freqs[0];
+		l->devfreq->max_freq = freqs[num_freqs - 1];
+	}
+
+	err = vgpu_pm_qos_init(dev);
+	if (err)
+		return err;
+
+	return err;
+}
+
+int vgpu_probe(struct platform_device *pdev)
+{
+	struct nvgpu_os_linux *l;
+	struct gk20a *gk20a;
+	int err;
+	struct device *dev = &pdev->dev;
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	struct vgpu_priv_data *priv;
+
+	if (!platform) {
+		dev_err(dev, "no platform data\n");
+		return -ENODATA;
+	}
+
+	l = kzalloc(sizeof(*l), GFP_KERNEL);
+	if (!l) {
+		dev_err(dev, "couldn't allocate gk20a support");
+		return -ENOMEM;
+	}
+	gk20a = &l->g;
+
+	nvgpu_log_fn(gk20a, " ");
+
+	nvgpu_init_gk20a(gk20a);
+
+	nvgpu_kmem_init(gk20a);
+
+	err = nvgpu_init_enabled_flags(gk20a);
+	if (err) {
+		kfree(gk20a);
+		return err;
+	}
+
+	l->dev = dev;
+	if (tegra_platform_is_vdk())
+		__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
+
+	gk20a->is_virtual = true;
+
+	priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
+	if (!priv) {
+		kfree(gk20a);
+		return -ENOMEM;
+	}
+
+	platform->g = gk20a;
+	platform->vgpu_priv = priv;
+
+	err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
+	if (err)
+		return err;
+
+	vgpu_init_support(pdev);
+
+	vgpu_init_vars(gk20a, platform);
+
+	init_rwsem(&l->busy_lock);
+
+	nvgpu_spinlock_init(&gk20a->mc_enable_lock);
+
+	gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
+
+	/* Initialize the platform interface. */
+	err = platform->probe(dev);
+	if (err) {
+		if (err == -EPROBE_DEFER)
+			nvgpu_info(gk20a, "platform probe failed");
+		else
+			nvgpu_err(gk20a, "platform probe failed");
+		return err;
+	}
+
+	if (platform->late_probe) {
+		err = platform->late_probe(dev);
+		if (err) {
+			nvgpu_err(gk20a, "late probe failed");
+			return err;
+		}
+	}
+
+	err = vgpu_comm_init(gk20a);
+	if (err) {
+		nvgpu_err(gk20a, "failed to init comm interface");
+		return -ENOSYS;
+	}
+
+	priv->virt_handle = vgpu_connect();
+	if (!priv->virt_handle) {
+		nvgpu_err(gk20a, "failed to connect to server node");
+		vgpu_comm_deinit();
+		return -ENOSYS;
+	}
+
+	err = vgpu_get_constants(gk20a);
+	if (err) {
+		vgpu_comm_deinit();
+		return err;
+	}
+
+	err = vgpu_pm_init(dev);
+	if (err) {
+		nvgpu_err(gk20a, "pm init failed");
+		return err;
+	}
+
+	err = nvgpu_thread_create(&priv->intr_handler, gk20a,
+			vgpu_intr_thread, "gk20a");
+	if (err)
+		return err;
+
+	gk20a_debug_init(gk20a, "gpu.0");
+
+	/* Set DMA parameters to allow larger sgt lists */
+	dev->dma_parms = &l->dma_parms;
+	dma_set_max_seg_size(dev, UINT_MAX);
+
+	gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
+	gk20a->timeouts_disabled_by_user = false;
+	nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0);
+
+	vgpu_create_sysfs(dev);
+	gk20a_init_gr(gk20a);
+
+	nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages);
+	gk20a->gr.max_comptag_mem = totalram_pages
+				 >> (10 - (PAGE_SHIFT - 10));
+
+	nvgpu_ref_init(&gk20a->refcount);
+
+	return 0;
+}
+
+int vgpu_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct gk20a *g = get_gk20a(dev);
+
+	nvgpu_log_fn(g, " ");
+
+	vgpu_pm_qos_remove(dev);
+	if (g->remove_support)
+		g->remove_support(g);
+
+	vgpu_comm_deinit();
+	gk20a_sched_ctrl_cleanup(g);
+	gk20a_user_deinit(dev, &nvgpu_class);
+	vgpu_remove_sysfs(dev);
+	gk20a_get_platform(dev)->g = NULL;
+	gk20a_put(g);
+
+	return 0;
+}
+
+bool vgpu_is_reduced_bar1(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+
+	return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size;
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h
new file mode 100644
index 00000000..38379cf2
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h
@@ -0,0 +1,57 @@
+/*
+ * Virtualized GPU Linux Interfaces
+ *
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VGPU_LINUX_H__
+#define __VGPU_LINUX_H__
+
+struct device;
+struct platform_device;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+
+#include <nvgpu/vgpu/vgpu.h>
+
+int vgpu_pm_prepare_poweroff(struct device *dev);
+int vgpu_pm_finalize_poweron(struct device *dev);
+int vgpu_probe(struct platform_device *dev);
+int vgpu_remove(struct platform_device *dev);
+
+void vgpu_create_sysfs(struct device *dev);
+void vgpu_remove_sysfs(struct device *dev);
+#else
+/* define placeholders for functions used outside of vgpu */
+
+static inline int vgpu_pm_prepare_poweroff(struct device *dev)
+{
+	return -ENOSYS;
+}
+static inline int vgpu_pm_finalize_poweron(struct device *dev)
+{
+	return -ENOSYS;
+}
+static inline int vgpu_probe(struct platform_device *dev)
+{
+	return -ENOSYS;
+}
+static inline int vgpu_remove(struct platform_device *dev)
+{
+	return -ENOSYS;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/vidmem.c b/drivers/gpu/nvgpu/os/linux/vidmem.c
new file mode 100644
index 00000000..136d4a10
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vidmem.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+#include <uapi/linux/nvgpu.h>
+
+#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
+#include <linux/platform/tegra/tegra_fd.h>
+#endif
+
+#include <nvgpu/dma.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/page_allocator.h>
+
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/linux/dma.h>
+#include <nvgpu/linux/vidmem.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+bool nvgpu_addr_is_vidmem_page_alloc(u64 addr)
+{
+	return !!(addr & 1ULL);
+}
+
+void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr)
+{
+	/* set bit 0 to indicate vidmem allocation */
+	sg_dma_address(sgl) = (addr | 1ULL);
+}
+
+struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl)
+{
+	u64 addr;
+
+	addr = sg_dma_address(sgl);
+
+	if (nvgpu_addr_is_vidmem_page_alloc(addr))
+		addr = addr & ~1ULL;
+	else
+		WARN_ON(1);
+
+	return (struct nvgpu_page_alloc *)(uintptr_t)addr;
+}
+
+static struct sg_table *gk20a_vidbuf_map_dma_buf(
+	struct dma_buf_attachment *attach, enum dma_data_direction dir)
+{
+	struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv;
+
+	return buf->mem->priv.sgt;
+}
+
+static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
+				       struct sg_table *sgt,
+				       enum dma_data_direction dir)
+{
+}
+
+static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
+{
+	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
+	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
+	struct gk20a *g = buf->g;
+
+	vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
+		   dmabuf, buf->mem->size >> 10);
+
+	if (linux_buf && linux_buf->dmabuf_priv_delete)
+		linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv);
+
+	nvgpu_kfree(g, linux_buf);
+	nvgpu_vidmem_buf_free(g, buf);
+
+	gk20a_put(g);
+}
+
+static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
+{
+	WARN_ON("Not supported");
+	return NULL;
+}
+
+static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
+				      unsigned long page_num)
+{
+	WARN_ON("Not supported");
+	return NULL;
+}
+
+static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	return -EINVAL;
+}
+
+static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
+		struct device *dev, void *priv, void (*delete)(void *priv))
+{
+	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
+	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
+
+	linux_buf->dmabuf_priv = priv;
+	linux_buf->dmabuf_priv_delete = delete;
+
+	return 0;
+}
+
+static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
+		struct device *dev)
+{
+	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
+	struct nvgpu_vidmem_linux *linux_buf = buf->priv;
+
+	return linux_buf->dmabuf_priv;
+}
+
+static const struct dma_buf_ops gk20a_vidbuf_ops = {
+	.map_dma_buf      = gk20a_vidbuf_map_dma_buf,
+	.unmap_dma_buf    = gk20a_vidbuf_unmap_dma_buf,
+	.release          = gk20a_vidbuf_release,
+	.kmap_atomic      = gk20a_vidbuf_kmap_atomic,
+	.kmap             = gk20a_vidbuf_kmap,
+	.mmap             = gk20a_vidbuf_mmap,
+	.set_drvdata      = gk20a_vidbuf_set_private,
+	.get_drvdata      = gk20a_vidbuf_get_private,
+};
+
+static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	exp_info.priv = buf;
+	exp_info.ops = &gk20a_vidbuf_ops;
+	exp_info.size = buf->mem->size;
+	exp_info.flags = O_RDWR;
+
+	return dma_buf_export(&exp_info);
+}
+
+struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
+{
+	struct nvgpu_vidmem_buf *buf = dmabuf->priv;
+
+	if (dmabuf->ops != &gk20a_vidbuf_ops)
+		return NULL;
+
+	return buf->g;
+}
+
+int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
+{
+	struct nvgpu_vidmem_buf *buf = NULL;
+	struct nvgpu_vidmem_linux *priv;
+	int err, fd;
+
+	/*
+	 * This ref is released when the dma_buf is closed.
+	 */
+	if (!gk20a_get(g))
+		return -ENODEV;
+
+	vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes);
+
+	priv = nvgpu_kzalloc(g, sizeof(*priv));
+	if (!priv) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	buf = nvgpu_vidmem_user_alloc(g, bytes);
+	if (IS_ERR(buf)) {
+		err = PTR_ERR(buf);
+		goto fail;
+	}
+
+	priv->dmabuf = gk20a_vidbuf_export(buf);
+	if (IS_ERR(priv->dmabuf)) {
+		err = PTR_ERR(priv->dmabuf);
+		goto fail;
+	}
+
+	buf->priv = priv;
+
+#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
+	fd = tegra_alloc_fd(current->files, 1024, O_RDWR);
+#else
+	fd = get_unused_fd_flags(O_RDWR);
+#endif
+	if (fd < 0) {
+		/* ->release frees what we have done */
+		dma_buf_put(priv->dmabuf);
+		return fd;
+	}
+
+	/* fclose() on this drops one ref, freeing the dma buf */
+	fd_install(fd, priv->dmabuf->file);
+
+	vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
+		   priv->dmabuf, buf->mem->size >> 10);
+
+	return fd;
+
+fail:
+	nvgpu_vidmem_buf_free(g, buf);
+	nvgpu_kfree(g, priv);
+	gk20a_put(g);
+
+	vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err);
+	return err;
+}
+
+int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
+		void *buffer, u64 offset, u64 size, u32 cmd)
+{
+	struct nvgpu_vidmem_buf *vidmem_buf;
+	struct nvgpu_mem *mem;
+	int err = 0;
+
+	if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM)
+		return -EINVAL;
+
+	vidmem_buf = dmabuf->priv;
+	mem = vidmem_buf->mem;
+
+	switch (cmd) {
+	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
+		nvgpu_mem_rd_n(g, mem, offset, buffer, size);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE:
+		nvgpu_mem_wr_n(g, mem, offset, buffer, size);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
+{
+	nvgpu_free(vidmem->allocator,
+		   (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
+	nvgpu_free_sgtable(g, &vidmem->priv.sgt);
+}
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
new file mode 100644
index 00000000..baa77515
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <uapi/linux/nvgpu.h>
+
+#include <nvgpu/log.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/vidmem.h>
+
+#include <nvgpu/linux/vm.h>
+#include <nvgpu/linux/vidmem.h>
+#include <nvgpu/linux/nvgpu_mem.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "platform_gk20a.h"
+#include "os_linux.h"
+#include "dmabuf.h"
+
+static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags)
+{
+	u32 core_flags = 0;
+
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
+		core_flags |= NVGPU_VM_MAP_FIXED_OFFSET;
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE)
+		core_flags |= NVGPU_VM_MAP_CACHEABLE;
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT)
+		core_flags |= NVGPU_VM_MAP_IO_COHERENT;
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE)
+		core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE;
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC)
+		core_flags |= NVGPU_VM_MAP_L3_ALLOC;
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)
+		core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL;
+
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS)
+		nvgpu_warn(g, "Ignoring deprecated flag: "
+			   "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS");
+
+	return core_flags;
+}
+
+static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
+	struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind)
+{
+	struct nvgpu_rbtree_node *node = NULL;
+	struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+
+	nvgpu_rbtree_enum_start(0, &node, root);
+
+	while (node) {
+		struct nvgpu_mapped_buf *mapped_buffer =
+				mapped_buffer_from_rbtree_node(node);
+
+		if (mapped_buffer->os_priv.dmabuf == dmabuf &&
+		    mapped_buffer->kind == kind)
+			return mapped_buffer;
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return NULL;
+}
+
+int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
+		      struct dma_buf **dmabuf,
+		      u64 *offset)
+{
+	struct nvgpu_mapped_buf *mapped_buffer;
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va);
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va);
+	if (!mapped_buffer) {
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
+		return -EINVAL;
+	}
+
+	*dmabuf = mapped_buffer->os_priv.dmabuf;
+	*offset = gpu_va - mapped_buffer->addr;
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	return 0;
+}
+
+u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
+{
+	return os_buf->dmabuf->size;
+}
+
+/*
+ * vm->update_gmmu_lock must be held. This checks to see if we already have
+ * mapped the passed buffer into this VM. If so, just return the existing
+ * mapping address.
+ */
+struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
+					       struct nvgpu_os_buffer *os_buf,
+					       u64 map_addr,
+					       u32 flags,
+					       int kind)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+
+	if (flags & NVGPU_VM_MAP_FIXED_OFFSET) {
+		mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
+		if (!mapped_buffer)
+			return NULL;
+
+		if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
+		    mapped_buffer->kind != (u32)kind)
+			return NULL;
+	} else {
+		mapped_buffer =
+			__nvgpu_vm_find_mapped_buf_reverse(vm,
+							   os_buf->dmabuf,
+							   kind);
+		if (!mapped_buffer)
+			return NULL;
+	}
+
+	if (mapped_buffer->flags != flags)
+		return NULL;
+
+	/*
+	 * If we find the mapping here then that means we have mapped it already
+	 * and the prior pin and get must be undone.
+	 */
+	gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment,
+		       mapped_buffer->os_priv.sgt);
+	dma_buf_put(os_buf->dmabuf);
+
+	nvgpu_log(g, gpu_dbg_map,
+		  "gv: 0x%04x_%08x + 0x%-7zu "
+		  "[dma: 0x%010llx, pa: 0x%010llx] "
+		  "pgsz=%-3dKb as=%-2d "
+		  "flags=0x%x apt=%s (reused)",
+		  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
+		  os_buf->dmabuf->size,
+		  (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
+		  (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
+		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
+		  vm_aspace_id(vm),
+		  mapped_buffer->flags,
+		  nvgpu_aperture_str(g,
+				     gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
+
+	return mapped_buffer;
+}
+
+int nvgpu_vm_map_linux(struct vm_gk20a *vm,
+		       struct dma_buf *dmabuf,
+		       u64 offset_align,
+		       u32 flags,
+		       s16 compr_kind,
+		       s16 incompr_kind,
+		       int rw_flag,
+		       u64 buffer_offset,
+		       u64 mapping_size,
+		       struct vm_gk20a_mapping_batch *batch,
+		       u64 *gpu_va)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct device *dev = dev_from_gk20a(g);
+	struct nvgpu_os_buffer os_buf;
+	struct sg_table *sgt;
+	struct nvgpu_sgt *nvgpu_sgt = NULL;
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+	struct dma_buf_attachment *attachment;
+	u64 map_addr = 0ULL;
+	int err = 0;
+
+	if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
+		map_addr = offset_align;
+
+	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
+	if (IS_ERR(sgt)) {
+		nvgpu_warn(g, "Failed to pin dma_buf!");
+		return PTR_ERR(sgt);
+	}
+	os_buf.dmabuf = dmabuf;
+	os_buf.attachment = attachment;
+	os_buf.dev = dev;
+
+	if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
+	if (!nvgpu_sgt) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	mapped_buffer = nvgpu_vm_map(vm,
+				     &os_buf,
+				     nvgpu_sgt,
+				     map_addr,
+				     mapping_size,
+				     buffer_offset,
+				     rw_flag,
+				     flags,
+				     compr_kind,
+				     incompr_kind,
+				     batch,
+				     gk20a_dmabuf_aperture(g, dmabuf));
+
+	nvgpu_sgt_free(g, nvgpu_sgt);
+
+	if (IS_ERR(mapped_buffer)) {
+		err = PTR_ERR(mapped_buffer);
+		goto clean_up;
+	}
+
+	mapped_buffer->os_priv.dmabuf = dmabuf;
+	mapped_buffer->os_priv.attachment = attachment;
+	mapped_buffer->os_priv.sgt    = sgt;
+
+	*gpu_va = mapped_buffer->addr;
+	return 0;
+
+clean_up:
+	gk20a_mm_unpin(dev, dmabuf, attachment, sgt);
+
+	return err;
+}
+
+int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
+			int dmabuf_fd,
+			u64 *offset_align,
+			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
+			s16 compr_kind,
+			s16 incompr_kind,
+			u64 buffer_offset,
+			u64 mapping_size,
+			struct vm_gk20a_mapping_batch *batch)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct dma_buf *dmabuf;
+	u64 ret_va;
+	int err = 0;
+
+	/* get ref to the mem handle (released on unmap_locked) */
+	dmabuf = dma_buf_get(dmabuf_fd);
+	if (IS_ERR(dmabuf)) {
+		nvgpu_warn(g, "%s: fd %d is not a dmabuf",
+			   __func__, dmabuf_fd);
+		return PTR_ERR(dmabuf);
+	}
+
+	/* verify that we're not overflowing the buffer, i.e.
+	 * (buffer_offset + mapping_size)> dmabuf->size.
+	 *
+	 * Since buffer_offset + mapping_size could overflow, first check
+	 * that mapping size < dmabuf_size, at which point we can subtract
+	 * mapping_size from both sides for the final comparison.
+	 */
+	if ((mapping_size > dmabuf->size) ||
+			(buffer_offset > (dmabuf->size - mapping_size))) {
+		nvgpu_err(g,
+			  "buf size %llx < (offset(%llx) + map_size(%llx))\n",
+			  (u64)dmabuf->size, buffer_offset, mapping_size);
+		dma_buf_put(dmabuf);
+		return -EINVAL;
+	}
+
+	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
+	if (err) {
+		dma_buf_put(dmabuf);
+		return err;
+	}
+
+	err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
+				 nvgpu_vm_translate_linux_flags(g, flags),
+				 compr_kind, incompr_kind,
+				 gk20a_mem_flag_none,
+				 buffer_offset,
+				 mapping_size,
+				 batch,
+				 &ret_va);
+
+	if (!err)
+		*offset_align = ret_va;
+	else
+		dma_buf_put(dmabuf);
+
+	return err;
+}
+
+/*
+ * This is the function call-back for freeing OS specific components of an
+ * nvgpu_mapped_buf. This should most likely never be called outside of the
+ * core MM framework!
+ *
+ * Note: the VM lock will be held.
+ */
+void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
+{
+	struct vm_gk20a *vm = mapped_buffer->vm;
+
+	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
+		       mapped_buffer->os_priv.attachment,
+		       mapped_buffer->os_priv.sgt);
+
+	dma_buf_put(mapped_buffer->os_priv.dmabuf);
+}
diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.c b/drivers/gpu/nvgpu/pmgr/pmgr.c
index 2a9f9673..3d028c98 100644
--- a/drivers/gpu/nvgpu/pmgr/pmgr.c
+++ b/drivers/gpu/nvgpu/pmgr/pmgr.c
@@ -26,7 +26,7 @@
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
-#include "common/linux/os_linux.h"
+#include "os/linux/os_linux.h"
 #endif
 
 int pmgr_pwr_devices_get_power(struct gk20a *g, u32 *val)
-- 
cgit v1.2.2