From 2a2c16af5f9f1ccfc93a13e820d5381e5c881e92 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 18 Apr 2018 12:59:00 -0700 Subject: gpu: nvgpu: Move Linux files away from common Move all Linux source code files to drivers/gpu/nvgpu/os/linux from drivers/gpu/nvgpu/common/linux. This changes the meaning of common to be OS independent. JIRA NVGPU-598 JIRA NVGPU-601 Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9 Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1747714 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 213 ++- drivers/gpu/nvgpu/common/linux/cde.c | 1786 ----------------- drivers/gpu/nvgpu/common/linux/cde.h | 326 ---- drivers/gpu/nvgpu/common/linux/cde_gm20b.c | 64 - drivers/gpu/nvgpu/common/linux/cde_gm20b.h | 32 - drivers/gpu/nvgpu/common/linux/cde_gp10b.c | 161 -- drivers/gpu/nvgpu/common/linux/cde_gp10b.h | 32 - drivers/gpu/nvgpu/common/linux/ce2.c | 155 -- drivers/gpu/nvgpu/common/linux/channel.c | 1021 ---------- drivers/gpu/nvgpu/common/linux/channel.h | 96 - drivers/gpu/nvgpu/common/linux/clk.c | 165 -- drivers/gpu/nvgpu/common/linux/clk.h | 22 - drivers/gpu/nvgpu/common/linux/comptags.c | 140 -- drivers/gpu/nvgpu/common/linux/cond.c | 73 - drivers/gpu/nvgpu/common/linux/ctxsw_trace.c | 730 ------- drivers/gpu/nvgpu/common/linux/ctxsw_trace.h | 39 - drivers/gpu/nvgpu/common/linux/debug.c | 452 ----- drivers/gpu/nvgpu/common/linux/debug_allocator.c | 69 - drivers/gpu/nvgpu/common/linux/debug_allocator.h | 21 - drivers/gpu/nvgpu/common/linux/debug_cde.c | 53 - drivers/gpu/nvgpu/common/linux/debug_cde.h | 21 - drivers/gpu/nvgpu/common/linux/debug_ce.c | 30 - drivers/gpu/nvgpu/common/linux/debug_ce.h | 21 - drivers/gpu/nvgpu/common/linux/debug_clk.c | 271 --- drivers/gpu/nvgpu/common/linux/debug_fifo.c | 378 ---- drivers/gpu/nvgpu/common/linux/debug_fifo.h | 22 - drivers/gpu/nvgpu/common/linux/debug_gr.c | 31 - drivers/gpu/nvgpu/common/linux/debug_gr.h | 21 - drivers/gpu/nvgpu/common/linux/debug_hal.c | 95 - drivers/gpu/nvgpu/common/linux/debug_hal.h | 22 - drivers/gpu/nvgpu/common/linux/debug_kmem.c | 312 --- drivers/gpu/nvgpu/common/linux/debug_kmem.h | 23 - drivers/gpu/nvgpu/common/linux/debug_pmu.c | 481 ----- drivers/gpu/nvgpu/common/linux/debug_pmu.h | 21 - drivers/gpu/nvgpu/common/linux/debug_sched.c | 80 - drivers/gpu/nvgpu/common/linux/debug_sched.h | 21 - drivers/gpu/nvgpu/common/linux/debug_xve.c | 176 -- drivers/gpu/nvgpu/common/linux/debug_xve.h | 21 - drivers/gpu/nvgpu/common/linux/dma.c | 694 ------- drivers/gpu/nvgpu/common/linux/dmabuf.c | 218 --- drivers/gpu/nvgpu/common/linux/dmabuf.h | 62 - drivers/gpu/nvgpu/common/linux/driver_common.c | 334 ---- drivers/gpu/nvgpu/common/linux/driver_common.h | 22 - drivers/gpu/nvgpu/common/linux/dt.c | 29 - drivers/gpu/nvgpu/common/linux/firmware.c | 117 -- drivers/gpu/nvgpu/common/linux/fuse.c | 55 - drivers/gpu/nvgpu/common/linux/intr.c | 122 -- drivers/gpu/nvgpu/common/linux/intr.h | 22 - drivers/gpu/nvgpu/common/linux/io.c | 118 -- drivers/gpu/nvgpu/common/linux/io_usermode.c | 29 - drivers/gpu/nvgpu/common/linux/ioctl.c | 296 --- drivers/gpu/nvgpu/common/linux/ioctl.h | 23 - drivers/gpu/nvgpu/common/linux/ioctl_as.c | 423 ----- drivers/gpu/nvgpu/common/linux/ioctl_as.h | 30 - drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 1388 -------------- drivers/gpu/nvgpu/common/linux/ioctl_channel.h | 50 - drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c | 562 ------ drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 1962 ------------------- drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h | 23 - drivers/gpu/nvgpu/common/linux/ioctl_dbg.c | 2003 -------------------- drivers/gpu/nvgpu/common/linux/ioctl_dbg.h | 54 - drivers/gpu/nvgpu/common/linux/ioctl_tsg.c | 677 ------- drivers/gpu/nvgpu/common/linux/ioctl_tsg.h | 28 - drivers/gpu/nvgpu/common/linux/kmem.c | 654 ------- drivers/gpu/nvgpu/common/linux/kmem_priv.h | 105 - drivers/gpu/nvgpu/common/linux/log.c | 132 -- drivers/gpu/nvgpu/common/linux/module.c | 1365 ------------- drivers/gpu/nvgpu/common/linux/module.h | 32 - drivers/gpu/nvgpu/common/linux/module_usermode.c | 62 - drivers/gpu/nvgpu/common/linux/module_usermode.h | 27 - drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 613 ------ drivers/gpu/nvgpu/common/linux/nvhost.c | 294 --- drivers/gpu/nvgpu/common/linux/nvhost_priv.h | 24 - drivers/gpu/nvgpu/common/linux/nvlink.c | 106 -- drivers/gpu/nvgpu/common/linux/os_fence_android.c | 79 - .../gpu/nvgpu/common/linux/os_fence_android_sema.c | 111 -- .../nvgpu/common/linux/os_fence_android_syncpt.c | 121 -- drivers/gpu/nvgpu/common/linux/os_linux.h | 166 -- drivers/gpu/nvgpu/common/linux/os_sched.c | 26 - drivers/gpu/nvgpu/common/linux/pci.c | 861 --------- drivers/gpu/nvgpu/common/linux/pci.h | 27 - drivers/gpu/nvgpu/common/linux/pci_usermode.c | 24 - drivers/gpu/nvgpu/common/linux/pci_usermode.h | 23 - .../gpu/nvgpu/common/linux/platform_ecc_sysfs.c | 269 --- .../gpu/nvgpu/common/linux/platform_ecc_sysfs.h | 37 - drivers/gpu/nvgpu/common/linux/platform_gk20a.h | 317 ---- .../gpu/nvgpu/common/linux/platform_gk20a_tegra.c | 957 ---------- .../gpu/nvgpu/common/linux/platform_gk20a_tegra.h | 23 - .../gpu/nvgpu/common/linux/platform_gp10b_tegra.c | 607 ------ .../gpu/nvgpu/common/linux/platform_gp10b_tegra.h | 23 - .../gpu/nvgpu/common/linux/platform_gv11b_tegra.c | 588 ------ drivers/gpu/nvgpu/common/linux/rwsem.c | 39 - drivers/gpu/nvgpu/common/linux/scale.c | 428 ----- drivers/gpu/nvgpu/common/linux/scale.h | 66 - drivers/gpu/nvgpu/common/linux/sched.c | 676 ------- drivers/gpu/nvgpu/common/linux/sched.h | 55 - drivers/gpu/nvgpu/common/linux/sim.c | 95 - drivers/gpu/nvgpu/common/linux/sim_pci.c | 91 - drivers/gpu/nvgpu/common/linux/soc.c | 122 -- drivers/gpu/nvgpu/common/linux/sync_sema_android.c | 419 ---- drivers/gpu/nvgpu/common/linux/sync_sema_android.h | 51 - drivers/gpu/nvgpu/common/linux/sysfs.c | 1205 ------------ drivers/gpu/nvgpu/common/linux/sysfs.h | 24 - drivers/gpu/nvgpu/common/linux/thread.c | 63 - drivers/gpu/nvgpu/common/linux/timers.c | 270 --- drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c | 168 -- drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h | 27 - .../gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c | 224 --- .../linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c | 97 - .../nvgpu/common/linux/vgpu/platform_vgpu_tegra.c | 69 - drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c | 50 - drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c | 77 - drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c | 53 - drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c | 475 ----- drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h | 57 - drivers/gpu/nvgpu/common/linux/vidmem.c | 262 --- drivers/gpu/nvgpu/common/linux/vm.c | 332 ---- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 2 +- drivers/gpu/nvgpu/gp106/clk_gp106.c | 2 +- drivers/gpu/nvgpu/gp106/mclk_gp106.c | 2 +- drivers/gpu/nvgpu/gp106/therm_gp106.c | 2 +- drivers/gpu/nvgpu/gp10b/platform_gp10b.h | 39 - drivers/gpu/nvgpu/include/nvgpu/nvhost.h | 2 + drivers/gpu/nvgpu/os/linux/cde.c | 1786 +++++++++++++++++ drivers/gpu/nvgpu/os/linux/cde.h | 326 ++++ drivers/gpu/nvgpu/os/linux/cde_gm20b.c | 64 + drivers/gpu/nvgpu/os/linux/cde_gm20b.h | 32 + drivers/gpu/nvgpu/os/linux/cde_gp10b.c | 161 ++ drivers/gpu/nvgpu/os/linux/cde_gp10b.h | 32 + drivers/gpu/nvgpu/os/linux/ce2.c | 155 ++ drivers/gpu/nvgpu/os/linux/channel.c | 1021 ++++++++++ drivers/gpu/nvgpu/os/linux/channel.h | 96 + drivers/gpu/nvgpu/os/linux/clk.c | 165 ++ drivers/gpu/nvgpu/os/linux/clk.h | 22 + drivers/gpu/nvgpu/os/linux/comptags.c | 140 ++ drivers/gpu/nvgpu/os/linux/cond.c | 73 + drivers/gpu/nvgpu/os/linux/ctxsw_trace.c | 730 +++++++ drivers/gpu/nvgpu/os/linux/ctxsw_trace.h | 39 + drivers/gpu/nvgpu/os/linux/debug.c | 452 +++++ drivers/gpu/nvgpu/os/linux/debug_allocator.c | 69 + drivers/gpu/nvgpu/os/linux/debug_allocator.h | 21 + drivers/gpu/nvgpu/os/linux/debug_cde.c | 53 + drivers/gpu/nvgpu/os/linux/debug_cde.h | 21 + drivers/gpu/nvgpu/os/linux/debug_ce.c | 30 + drivers/gpu/nvgpu/os/linux/debug_ce.h | 21 + drivers/gpu/nvgpu/os/linux/debug_clk.c | 271 +++ drivers/gpu/nvgpu/os/linux/debug_fifo.c | 378 ++++ drivers/gpu/nvgpu/os/linux/debug_fifo.h | 22 + drivers/gpu/nvgpu/os/linux/debug_gr.c | 31 + drivers/gpu/nvgpu/os/linux/debug_gr.h | 21 + drivers/gpu/nvgpu/os/linux/debug_hal.c | 95 + drivers/gpu/nvgpu/os/linux/debug_hal.h | 22 + drivers/gpu/nvgpu/os/linux/debug_kmem.c | 312 +++ drivers/gpu/nvgpu/os/linux/debug_kmem.h | 23 + drivers/gpu/nvgpu/os/linux/debug_pmu.c | 481 +++++ drivers/gpu/nvgpu/os/linux/debug_pmu.h | 21 + drivers/gpu/nvgpu/os/linux/debug_sched.c | 80 + drivers/gpu/nvgpu/os/linux/debug_sched.h | 21 + drivers/gpu/nvgpu/os/linux/debug_xve.c | 176 ++ drivers/gpu/nvgpu/os/linux/debug_xve.h | 21 + drivers/gpu/nvgpu/os/linux/dma.c | 694 +++++++ drivers/gpu/nvgpu/os/linux/dmabuf.c | 218 +++ drivers/gpu/nvgpu/os/linux/dmabuf.h | 62 + drivers/gpu/nvgpu/os/linux/driver_common.c | 334 ++++ drivers/gpu/nvgpu/os/linux/driver_common.h | 22 + drivers/gpu/nvgpu/os/linux/dt.c | 29 + drivers/gpu/nvgpu/os/linux/firmware.c | 117 ++ drivers/gpu/nvgpu/os/linux/fuse.c | 55 + drivers/gpu/nvgpu/os/linux/intr.c | 122 ++ drivers/gpu/nvgpu/os/linux/intr.h | 22 + drivers/gpu/nvgpu/os/linux/io.c | 118 ++ drivers/gpu/nvgpu/os/linux/io_usermode.c | 29 + drivers/gpu/nvgpu/os/linux/ioctl.c | 296 +++ drivers/gpu/nvgpu/os/linux/ioctl.h | 23 + drivers/gpu/nvgpu/os/linux/ioctl_as.c | 423 +++++ drivers/gpu/nvgpu/os/linux/ioctl_as.h | 30 + drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 1388 ++++++++++++++ drivers/gpu/nvgpu/os/linux/ioctl_channel.h | 50 + drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c | 562 ++++++ drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 1962 +++++++++++++++++++ drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h | 23 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 2003 ++++++++++++++++++++ drivers/gpu/nvgpu/os/linux/ioctl_dbg.h | 54 + drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 677 +++++++ drivers/gpu/nvgpu/os/linux/ioctl_tsg.h | 28 + drivers/gpu/nvgpu/os/linux/kmem.c | 654 +++++++ drivers/gpu/nvgpu/os/linux/kmem_priv.h | 105 + drivers/gpu/nvgpu/os/linux/log.c | 132 ++ drivers/gpu/nvgpu/os/linux/module.c | 1365 +++++++++++++ drivers/gpu/nvgpu/os/linux/module.h | 32 + drivers/gpu/nvgpu/os/linux/module_usermode.c | 62 + drivers/gpu/nvgpu/os/linux/module_usermode.h | 27 + drivers/gpu/nvgpu/os/linux/nvgpu_mem.c | 613 ++++++ drivers/gpu/nvgpu/os/linux/nvhost.c | 294 +++ drivers/gpu/nvgpu/os/linux/nvhost_priv.h | 24 + drivers/gpu/nvgpu/os/linux/nvlink.c | 106 ++ drivers/gpu/nvgpu/os/linux/os_fence_android.c | 79 + drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c | 111 ++ .../gpu/nvgpu/os/linux/os_fence_android_syncpt.c | 121 ++ drivers/gpu/nvgpu/os/linux/os_linux.h | 166 ++ drivers/gpu/nvgpu/os/linux/os_sched.c | 26 + drivers/gpu/nvgpu/os/linux/pci.c | 861 +++++++++ drivers/gpu/nvgpu/os/linux/pci.h | 27 + drivers/gpu/nvgpu/os/linux/pci_usermode.c | 24 + drivers/gpu/nvgpu/os/linux/pci_usermode.h | 23 + drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c | 269 +++ drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h | 37 + drivers/gpu/nvgpu/os/linux/platform_gk20a.h | 317 ++++ drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c | 957 ++++++++++ drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h | 23 + drivers/gpu/nvgpu/os/linux/platform_gp10b.h | 39 + drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | 607 ++++++ drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h | 23 + drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 588 ++++++ drivers/gpu/nvgpu/os/linux/rwsem.c | 39 + drivers/gpu/nvgpu/os/linux/scale.c | 428 +++++ drivers/gpu/nvgpu/os/linux/scale.h | 66 + drivers/gpu/nvgpu/os/linux/sched.c | 676 +++++++ drivers/gpu/nvgpu/os/linux/sched.h | 55 + drivers/gpu/nvgpu/os/linux/sim.c | 95 + drivers/gpu/nvgpu/os/linux/sim_pci.c | 91 + drivers/gpu/nvgpu/os/linux/soc.c | 122 ++ drivers/gpu/nvgpu/os/linux/sync_sema_android.c | 419 ++++ drivers/gpu/nvgpu/os/linux/sync_sema_android.h | 51 + drivers/gpu/nvgpu/os/linux/sysfs.c | 1205 ++++++++++++ drivers/gpu/nvgpu/os/linux/sysfs.h | 24 + drivers/gpu/nvgpu/os/linux/thread.c | 63 + drivers/gpu/nvgpu/os/linux/timers.c | 270 +++ drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c | 168 ++ drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h | 27 + drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c | 224 +++ .../linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c | 97 + .../gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c | 69 + drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c | 50 + drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c | 77 + drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c | 53 + drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c | 475 +++++ drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h | 57 + drivers/gpu/nvgpu/os/linux/vidmem.c | 262 +++ drivers/gpu/nvgpu/os/linux/vm.c | 332 ++++ drivers/gpu/nvgpu/pmgr/pmgr.c | 2 +- 241 files changed, 30151 insertions(+), 30138 deletions(-) delete mode 100644 drivers/gpu/nvgpu/common/linux/cde.c delete mode 100644 drivers/gpu/nvgpu/common/linux/cde.h delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gm20b.c delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gm20b.h delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gp10b.c delete mode 100644 drivers/gpu/nvgpu/common/linux/cde_gp10b.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ce2.c delete mode 100644 drivers/gpu/nvgpu/common/linux/channel.c delete mode 100644 drivers/gpu/nvgpu/common/linux/channel.h delete mode 100644 drivers/gpu/nvgpu/common/linux/clk.c delete mode 100644 drivers/gpu/nvgpu/common/linux/clk.h delete mode 100644 drivers/gpu/nvgpu/common/linux/comptags.c delete mode 100644 drivers/gpu/nvgpu/common/linux/cond.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ctxsw_trace.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ctxsw_trace.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_clk.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_hal.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_hal.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.h delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_xve.c delete mode 100644 drivers/gpu/nvgpu/common/linux/debug_xve.h delete mode 100644 drivers/gpu/nvgpu/common/linux/dma.c delete mode 100644 drivers/gpu/nvgpu/common/linux/dmabuf.c delete mode 100644 drivers/gpu/nvgpu/common/linux/dmabuf.h delete mode 100644 drivers/gpu/nvgpu/common/linux/driver_common.c delete mode 100644 drivers/gpu/nvgpu/common/linux/driver_common.h delete mode 100644 drivers/gpu/nvgpu/common/linux/dt.c delete mode 100644 drivers/gpu/nvgpu/common/linux/firmware.c delete mode 100644 drivers/gpu/nvgpu/common/linux/fuse.c delete mode 100644 drivers/gpu/nvgpu/common/linux/intr.c delete mode 100644 drivers/gpu/nvgpu/common/linux/intr.h delete mode 100644 drivers/gpu/nvgpu/common/linux/io.c delete mode 100644 drivers/gpu/nvgpu/common/linux/io_usermode.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_as.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_as.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_channel.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_channel.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_dbg.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_dbg.h delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg.c delete mode 100644 drivers/gpu/nvgpu/common/linux/ioctl_tsg.h delete mode 100644 drivers/gpu/nvgpu/common/linux/kmem.c delete mode 100644 drivers/gpu/nvgpu/common/linux/kmem_priv.h delete mode 100644 drivers/gpu/nvgpu/common/linux/log.c delete mode 100644 drivers/gpu/nvgpu/common/linux/module.c delete mode 100644 drivers/gpu/nvgpu/common/linux/module.h delete mode 100644 drivers/gpu/nvgpu/common/linux/module_usermode.c delete mode 100644 drivers/gpu/nvgpu/common/linux/module_usermode.h delete mode 100644 drivers/gpu/nvgpu/common/linux/nvgpu_mem.c delete mode 100644 drivers/gpu/nvgpu/common/linux/nvhost.c delete mode 100644 drivers/gpu/nvgpu/common/linux/nvhost_priv.h delete mode 100644 drivers/gpu/nvgpu/common/linux/nvlink.c delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android.c delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c delete mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c delete mode 100644 drivers/gpu/nvgpu/common/linux/os_linux.h delete mode 100644 drivers/gpu/nvgpu/common/linux/os_sched.c delete mode 100644 drivers/gpu/nvgpu/common/linux/pci.c delete mode 100644 drivers/gpu/nvgpu/common/linux/pci.h delete mode 100644 drivers/gpu/nvgpu/common/linux/pci_usermode.c delete mode 100644 drivers/gpu/nvgpu/common/linux/pci_usermode.h delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a.h delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h delete mode 100644 drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c delete mode 100644 drivers/gpu/nvgpu/common/linux/rwsem.c delete mode 100644 drivers/gpu/nvgpu/common/linux/scale.c delete mode 100644 drivers/gpu/nvgpu/common/linux/scale.h delete mode 100644 drivers/gpu/nvgpu/common/linux/sched.c delete mode 100644 drivers/gpu/nvgpu/common/linux/sched.h delete mode 100644 drivers/gpu/nvgpu/common/linux/sim.c delete mode 100644 drivers/gpu/nvgpu/common/linux/sim_pci.c delete mode 100644 drivers/gpu/nvgpu/common/linux/soc.c delete mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.c delete mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.h delete mode 100644 drivers/gpu/nvgpu/common/linux/sysfs.c delete mode 100644 drivers/gpu/nvgpu/common/linux/sysfs.h delete mode 100644 drivers/gpu/nvgpu/common/linux/thread.c delete mode 100644 drivers/gpu/nvgpu/common/linux/timers.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h delete mode 100644 drivers/gpu/nvgpu/common/linux/vidmem.c delete mode 100644 drivers/gpu/nvgpu/common/linux/vm.c delete mode 100644 drivers/gpu/nvgpu/gp10b/platform_gp10b.h create mode 100644 drivers/gpu/nvgpu/os/linux/cde.c create mode 100644 drivers/gpu/nvgpu/os/linux/cde.h create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gm20b.c create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gm20b.h create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gp10b.c create mode 100644 drivers/gpu/nvgpu/os/linux/cde_gp10b.h create mode 100644 drivers/gpu/nvgpu/os/linux/ce2.c create mode 100644 drivers/gpu/nvgpu/os/linux/channel.c create mode 100644 drivers/gpu/nvgpu/os/linux/channel.h create mode 100644 drivers/gpu/nvgpu/os/linux/clk.c create mode 100644 drivers/gpu/nvgpu/os/linux/clk.h create mode 100644 drivers/gpu/nvgpu/os/linux/comptags.c create mode 100644 drivers/gpu/nvgpu/os/linux/cond.c create mode 100644 drivers/gpu/nvgpu/os/linux/ctxsw_trace.c create mode 100644 drivers/gpu/nvgpu/os/linux/ctxsw_trace.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_allocator.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_allocator.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_cde.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_cde.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_ce.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_ce.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_clk.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_fifo.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_fifo.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_gr.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_gr.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_hal.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_hal.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_kmem.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_kmem.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_pmu.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_pmu.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_sched.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_sched.h create mode 100644 drivers/gpu/nvgpu/os/linux/debug_xve.c create mode 100644 drivers/gpu/nvgpu/os/linux/debug_xve.h create mode 100644 drivers/gpu/nvgpu/os/linux/dma.c create mode 100644 drivers/gpu/nvgpu/os/linux/dmabuf.c create mode 100644 drivers/gpu/nvgpu/os/linux/dmabuf.h create mode 100644 drivers/gpu/nvgpu/os/linux/driver_common.c create mode 100644 drivers/gpu/nvgpu/os/linux/driver_common.h create mode 100644 drivers/gpu/nvgpu/os/linux/dt.c create mode 100644 drivers/gpu/nvgpu/os/linux/firmware.c create mode 100644 drivers/gpu/nvgpu/os/linux/fuse.c create mode 100644 drivers/gpu/nvgpu/os/linux/intr.c create mode 100644 drivers/gpu/nvgpu/os/linux/intr.h create mode 100644 drivers/gpu/nvgpu/os/linux/io.c create mode 100644 drivers/gpu/nvgpu/os/linux/io_usermode.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl.h create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_as.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_as.h create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_channel.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_channel.h create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_dbg.h create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_tsg.c create mode 100644 drivers/gpu/nvgpu/os/linux/ioctl_tsg.h create mode 100644 drivers/gpu/nvgpu/os/linux/kmem.c create mode 100644 drivers/gpu/nvgpu/os/linux/kmem_priv.h create mode 100644 drivers/gpu/nvgpu/os/linux/log.c create mode 100644 drivers/gpu/nvgpu/os/linux/module.c create mode 100644 drivers/gpu/nvgpu/os/linux/module.h create mode 100644 drivers/gpu/nvgpu/os/linux/module_usermode.c create mode 100644 drivers/gpu/nvgpu/os/linux/module_usermode.h create mode 100644 drivers/gpu/nvgpu/os/linux/nvgpu_mem.c create mode 100644 drivers/gpu/nvgpu/os/linux/nvhost.c create mode 100644 drivers/gpu/nvgpu/os/linux/nvhost_priv.h create mode 100644 drivers/gpu/nvgpu/os/linux/nvlink.c create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android.c create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c create mode 100644 drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c create mode 100644 drivers/gpu/nvgpu/os/linux/os_linux.h create mode 100644 drivers/gpu/nvgpu/os/linux/os_sched.c create mode 100644 drivers/gpu/nvgpu/os/linux/pci.c create mode 100644 drivers/gpu/nvgpu/os/linux/pci.h create mode 100644 drivers/gpu/nvgpu/os/linux/pci_usermode.c create mode 100644 drivers/gpu/nvgpu/os/linux/pci_usermode.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c create mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h create mode 100644 drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c create mode 100644 drivers/gpu/nvgpu/os/linux/rwsem.c create mode 100644 drivers/gpu/nvgpu/os/linux/scale.c create mode 100644 drivers/gpu/nvgpu/os/linux/scale.h create mode 100644 drivers/gpu/nvgpu/os/linux/sched.c create mode 100644 drivers/gpu/nvgpu/os/linux/sched.h create mode 100644 drivers/gpu/nvgpu/os/linux/sim.c create mode 100644 drivers/gpu/nvgpu/os/linux/sim_pci.c create mode 100644 drivers/gpu/nvgpu/os/linux/soc.c create mode 100644 drivers/gpu/nvgpu/os/linux/sync_sema_android.c create mode 100644 drivers/gpu/nvgpu/os/linux/sync_sema_android.h create mode 100644 drivers/gpu/nvgpu/os/linux/sysfs.c create mode 100644 drivers/gpu/nvgpu/os/linux/sysfs.h create mode 100644 drivers/gpu/nvgpu/os/linux/thread.c create mode 100644 drivers/gpu/nvgpu/os/linux/timers.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c create mode 100644 drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h create mode 100644 drivers/gpu/nvgpu/os/linux/vidmem.c create mode 100644 drivers/gpu/nvgpu/os/linux/vm.c diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 5c7bb767..42d9855f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -30,39 +30,118 @@ nvgpu-y += common/bus/bus_gk20a.o \ common/ptimer/ptimer.o \ common/ptimer/ptimer_gk20a.o +# Linux specific parts of nvgpu. +nvgpu-y += \ + os/linux/kmem.o \ + os/linux/timers.o \ + os/linux/ioctl.o \ + os/linux/ioctl_ctrl.o \ + os/linux/ioctl_as.o \ + os/linux/ioctl_channel.o \ + os/linux/ioctl_tsg.o \ + os/linux/ioctl_dbg.o \ + os/linux/ioctl_clk_arb.o \ + os/linux/log.o \ + os/linux/cond.o \ + os/linux/nvgpu_mem.o \ + os/linux/dma.o \ + os/linux/driver_common.o \ + os/linux/firmware.o \ + os/linux/thread.o \ + os/linux/vm.o \ + os/linux/intr.o \ + os/linux/sysfs.o \ + os/linux/io.o \ + os/linux/io_usermode.o \ + os/linux/rwsem.o \ + os/linux/comptags.o \ + os/linux/dmabuf.o \ + os/linux/sched.o \ + os/linux/channel.o \ + os/linux/ce2.o \ + os/linux/sim.o \ + os/linux/sim_pci.o \ + os/linux/os_sched.o \ + os/linux/nvlink.o \ + os/linux/dt.o + +nvgpu-$(CONFIG_GK20A_VIDMEM) += \ + os/linux/vidmem.o + +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug.o \ + os/linux/debug_gr.o \ + os/linux/debug_fifo.o \ + os/linux/debug_ce.o \ + os/linux/debug_pmu.o \ + os/linux/debug_sched.o \ + os/linux/debug_allocator.o \ + os/linux/debug_hal.o \ + os/linux/debug_clk.o \ + os/linux/debug_xve.o + +ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y) +nvgpu-$(CONFIG_DEBUG_FS) += \ + os/linux/debug_kmem.o +endif + +nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \ + os/linux/ctxsw_trace.o + +nvgpu-$(CONFIG_TEGRA_GK20A) += \ + os/linux/module.o \ + os/linux/module_usermode.o \ + os/linux/soc.o \ + os/linux/fuse.o \ + os/linux/platform_ecc_sysfs.o \ + os/linux/platform_gk20a_tegra.o \ + os/linux/platform_gp10b_tegra.o \ + os/linux/platform_gv11b_tegra.o + +nvgpu-$(CONFIG_SYNC) += \ + os/linux/sync_sema_android.o \ + os/linux/os_fence_android.o \ + os/linux/os_fence_android_sema.o + +ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y) +nvgpu-$(CONFIG_SYNC) += \ + os/linux/os_fence_android_syncpt.o +endif + +nvgpu-$(CONFIG_GK20A_PCI) += \ + os/linux/pci.o \ + os/linux/pci_usermode.o + +nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \ + os/linux/nvhost.o + +nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ + os/linux/vgpu/platform_vgpu_tegra.o \ + os/linux/vgpu/fecs_trace_vgpu.o \ + os/linux/vgpu/clk_vgpu.o \ + os/linux/vgpu/sysfs_vgpu.o \ + os/linux/vgpu/vgpu_ivc.o \ + os/linux/vgpu/vgpu_ivm.o \ + os/linux/vgpu/vgpu_linux.o \ + os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o + +nvgpu-$(CONFIG_COMMON_CLK) += \ + os/linux/clk.o + +nvgpu-$(CONFIG_GK20A_DEVFREQ) += \ + os/linux/scale.o + +nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ + os/linux/cde.o \ + os/linux/cde_gm20b.o \ + os/linux/cde_gp10b.o + +ifeq ($(CONFIG_DEBUG_FS),y) +nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ + os/linux/debug_cde.o +endif + nvgpu-y += \ - common/linux/kmem.o \ - common/linux/timers.o \ - common/linux/ioctl.o \ - common/linux/ioctl_ctrl.o \ - common/linux/ioctl_as.o \ - common/linux/ioctl_channel.o \ - common/linux/ioctl_tsg.o \ - common/linux/ioctl_dbg.o \ - common/linux/ioctl_clk_arb.o \ - common/linux/log.o \ - common/linux/cond.o \ - common/linux/nvgpu_mem.o \ - common/linux/dma.o \ - common/linux/driver_common.o \ - common/linux/firmware.o \ - common/linux/thread.o \ - common/linux/vm.o \ - common/linux/intr.o \ - common/linux/sysfs.o \ - common/linux/io.o \ - common/linux/io_usermode.o \ - common/linux/rwsem.o \ - common/linux/comptags.o \ - common/linux/dmabuf.o \ - common/linux/sched.o \ - common/linux/channel.o \ - common/linux/ce2.o \ - common/linux/sim.o \ - common/linux/sim_pci.o \ - common/linux/os_sched.o \ - common/linux/nvlink.o \ - common/linux/dt.o \ common/mm/nvgpu_allocator.o \ common/mm/bitmap_allocator.o \ common/mm/buddy_allocator.o \ @@ -138,51 +217,7 @@ nvgpu-y += \ boardobj/boardobjgrp_e32.o nvgpu-$(CONFIG_GK20A_VIDMEM) += \ - common/mm/vidmem.o \ - common/linux/vidmem.o - -nvgpu-$(CONFIG_DEBUG_FS) += \ - common/linux/debug.o \ - common/linux/debug_gr.o \ - common/linux/debug_fifo.o \ - common/linux/debug_ce.o \ - common/linux/debug_pmu.o \ - common/linux/debug_sched.o \ - common/linux/debug_allocator.o \ - common/linux/debug_hal.o \ - common/linux/debug_clk.o \ - common/linux/debug_xve.o - -ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y) -nvgpu-$(CONFIG_DEBUG_FS) += \ - common/linux/debug_kmem.o -endif - -nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \ - common/linux/ctxsw_trace.o - -nvgpu-$(CONFIG_TEGRA_GK20A) += \ - common/linux/module.o \ - common/linux/module_usermode.o \ - common/linux/soc.o \ - common/linux/fuse.o \ - common/linux/platform_ecc_sysfs.o \ - common/linux/platform_gk20a_tegra.o \ - common/linux/platform_gp10b_tegra.o \ - common/linux/platform_gv11b_tegra.o - -nvgpu-$(CONFIG_SYNC) += common/linux/sync_sema_android.o \ - common/linux/os_fence_android.o \ - common/linux/os_fence_android_sema.o - -ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y) -nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o -endif - -nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \ - common/linux/pci_usermode.o \ - -nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += common/linux/nvhost.o + common/mm/vidmem.o nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ vgpu/ltc_vgpu.o \ @@ -205,20 +240,6 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ vgpu/gv11b/vgpu_fifo_gv11b.o \ vgpu/gv11b/vgpu_subctx_gv11b.o \ vgpu/gv11b/vgpu_tsg_gv11b.o \ - common/linux/vgpu/platform_vgpu_tegra.o \ - common/linux/vgpu/fecs_trace_vgpu.o \ - common/linux/vgpu/clk_vgpu.o \ - common/linux/vgpu/sysfs_vgpu.o \ - common/linux/vgpu/vgpu_ivc.o \ - common/linux/vgpu/vgpu_ivm.o \ - common/linux/vgpu/vgpu_linux.o \ - common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o - -nvgpu-$(CONFIG_COMMON_CLK) += \ - common/linux/clk.o - -nvgpu-$(CONFIG_GK20A_DEVFREQ) += \ - common/linux/scale.o nvgpu-$(CONFIG_GK20A_CYCLE_STATS) += \ gk20a/css_gr_gk20a.o @@ -316,13 +337,3 @@ nvgpu-y += \ therm/thrmpmu.o \ lpwr/rppg.o \ lpwr/lpwr.o - -nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ - common/linux/cde.o \ - common/linux/cde_gm20b.o \ - common/linux/cde_gp10b.o - -ifeq ($(CONFIG_DEBUG_FS),y) -nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \ - common/linux/debug_cde.o -endif diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c deleted file mode 100644 index 32b333f1..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ /dev/null @@ -1,1786 +0,0 @@ -/* - * Color decompression engine support - * - * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "gk20a/gk20a.h" -#include "gk20a/channel_gk20a.h" -#include "gk20a/mm_gk20a.h" -#include "gk20a/fence_gk20a.h" -#include "gk20a/gr_gk20a.h" - -#include "cde.h" -#include "os_linux.h" -#include "dmabuf.h" -#include "channel.h" -#include "cde_gm20b.h" -#include "cde_gp10b.h" - -#include -#include - -static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); -static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); - -#define CTX_DELETE_TIME 1000 - -#define MAX_CTX_USE_COUNT 42 -#define MAX_CTX_RETRY_TIME 2000 - -static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) -{ - struct nvgpu_mapped_buf *buffer; - dma_addr_t addr = 0; - struct gk20a *g = gk20a_from_vm(vm); - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); - if (buffer) - addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); - nvgpu_mutex_release(&vm->update_gmmu_lock); - - return addr; -} - -static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) -{ - unsigned int i; - - for (i = 0; i < cde_ctx->num_bufs; i++) { - struct nvgpu_mem *mem = cde_ctx->mem + i; - nvgpu_dma_unmap_free(cde_ctx->vm, mem); - } - - nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); - - cde_ctx->convert_cmd = NULL; - cde_ctx->init_convert_cmd = NULL; - cde_ctx->num_bufs = 0; - cde_ctx->num_params = 0; - cde_ctx->init_cmd_num_entries = 0; - cde_ctx->convert_cmd_num_entries = 0; - cde_ctx->init_cmd_executed = false; -} - -static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) -__must_hold(&cde_app->mutex) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct channel_gk20a *ch = cde_ctx->ch; - struct vm_gk20a *vm = ch->vm; - - trace_gk20a_cde_remove_ctx(cde_ctx); - - /* release mapped memory */ - gk20a_deinit_cde_img(cde_ctx); - nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, - cde_ctx->backing_store_vaddr); - - /* - * free the channel - * gk20a_channel_close() will also unbind the channel from TSG - */ - gk20a_channel_close(ch); - nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); - - /* housekeeping on app */ - nvgpu_list_del(&cde_ctx->list); - l->cde_app.ctx_count--; - nvgpu_kfree(g, cde_ctx); -} - -static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, - bool wait_finish) -__releases(&cde_app->mutex) -__acquires(&cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; - - /* permanent contexts do not have deleter works */ - if (!cde_ctx->is_temporary) - return; - - if (wait_finish) { - nvgpu_mutex_release(&cde_app->mutex); - cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); - nvgpu_mutex_acquire(&cde_app->mutex); - } else { - cancel_delayed_work(&cde_ctx->ctx_deleter_work); - } -} - -static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) -__must_hold(&l->cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; - - /* safe to go off the mutex in cancel_deleter since app is - * deinitialised; no new jobs are started. deleter works may be only at - * waiting for the mutex or before, going to abort */ - - nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, - &cde_app->free_contexts, gk20a_cde_ctx, list) { - gk20a_cde_cancel_deleter(cde_ctx, true); - gk20a_cde_remove_ctx(cde_ctx); - } - - nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, - &cde_app->used_contexts, gk20a_cde_ctx, list) { - gk20a_cde_cancel_deleter(cde_ctx, true); - gk20a_cde_remove_ctx(cde_ctx); - } -} - -static void gk20a_cde_stop(struct nvgpu_os_linux *l) -__must_hold(&l->cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - - /* prevent further conversions and delayed works from working */ - cde_app->initialised = false; - /* free all data, empty the list */ - gk20a_cde_remove_contexts(l); -} - -void gk20a_cde_destroy(struct nvgpu_os_linux *l) -__acquires(&l->cde_app->mutex) -__releases(&l->cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - - if (!cde_app->initialised) - return; - - nvgpu_mutex_acquire(&cde_app->mutex); - gk20a_cde_stop(l); - nvgpu_mutex_release(&cde_app->mutex); - - nvgpu_mutex_destroy(&cde_app->mutex); -} - -void gk20a_cde_suspend(struct nvgpu_os_linux *l) -__acquires(&l->cde_app->mutex) -__releases(&l->cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; - - if (!cde_app->initialised) - return; - - nvgpu_mutex_acquire(&cde_app->mutex); - - nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, - &cde_app->free_contexts, gk20a_cde_ctx, list) { - gk20a_cde_cancel_deleter(cde_ctx, false); - } - - nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, - &cde_app->used_contexts, gk20a_cde_ctx, list) { - gk20a_cde_cancel_deleter(cde_ctx, false); - } - - nvgpu_mutex_release(&cde_app->mutex); - -} - -static int gk20a_cde_create_context(struct nvgpu_os_linux *l) -__must_hold(&l->cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a_cde_ctx *cde_ctx; - - cde_ctx = gk20a_cde_allocate_context(l); - if (IS_ERR(cde_ctx)) - return PTR_ERR(cde_ctx); - - nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); - cde_app->ctx_count++; - if (cde_app->ctx_count > cde_app->ctx_count_top) - cde_app->ctx_count_top = cde_app->ctx_count; - - return 0; -} - -static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) -__must_hold(&l->cde_app->mutex) -{ - int err; - int i; - - for (i = 0; i < NUM_CDE_CONTEXTS; i++) { - err = gk20a_cde_create_context(l); - if (err) - goto out; - } - - return 0; -out: - gk20a_cde_remove_contexts(l); - return err; -} - -static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img, - struct gk20a_cde_hdr_buf *buf) -{ - struct nvgpu_mem *mem; - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - int err; - - /* check that the file can hold the buf */ - if (buf->data_byte_offset != 0 && - buf->data_byte_offset + buf->num_bytes > img->size) { - nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", - cde_ctx->num_bufs); - return -EINVAL; - } - - /* check that we have enough buf elems available */ - if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { - nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", - cde_ctx->num_bufs); - return -ENOMEM; - } - - /* allocate buf */ - mem = cde_ctx->mem + cde_ctx->num_bufs; - err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); - if (err) { - nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", - cde_ctx->num_bufs); - return -ENOMEM; - } - - /* copy the content */ - if (buf->data_byte_offset != 0) - memcpy(mem->cpu_va, img->data + buf->data_byte_offset, - buf->num_bytes); - - cde_ctx->num_bufs++; - - return 0; -} - -static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, - int type, s32 shift, u64 mask, u64 value) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - u32 *target_mem_ptr = target; - u64 *target_mem_ptr_u64 = target; - u64 current_value, new_value; - - value = (shift >= 0) ? value << shift : value >> -shift; - value &= mask; - - /* read current data from the location */ - current_value = 0; - if (type == TYPE_PARAM_TYPE_U32) { - if (mask != 0xfffffffful) - current_value = *target_mem_ptr; - } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { - if (mask != ~0ul) - current_value = *target_mem_ptr_u64; - } else if (type == TYPE_PARAM_TYPE_U64_BIG) { - current_value = *target_mem_ptr_u64; - current_value = (u64)(current_value >> 32) | - (u64)(current_value << 32); - } else { - nvgpu_warn(g, "cde: unknown type. type=%d", - type); - return -EINVAL; - } - - current_value &= ~mask; - new_value = current_value | value; - - /* store the element data back */ - if (type == TYPE_PARAM_TYPE_U32) - *target_mem_ptr = (u32)new_value; - else if (type == TYPE_PARAM_TYPE_U64_LITTLE) - *target_mem_ptr_u64 = new_value; - else { - new_value = (u64)(new_value >> 32) | - (u64)(new_value << 32); - *target_mem_ptr_u64 = new_value; - } - - return 0; -} - -static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img, - struct gk20a_cde_hdr_replace *replace) -{ - struct nvgpu_mem *source_mem; - struct nvgpu_mem *target_mem; - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - u32 *target_mem_ptr; - u64 vaddr; - int err; - - if (replace->target_buf >= cde_ctx->num_bufs || - replace->source_buf >= cde_ctx->num_bufs) { - nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", - replace->target_buf, replace->source_buf, - cde_ctx->num_bufs); - return -EINVAL; - } - - source_mem = cde_ctx->mem + replace->source_buf; - target_mem = cde_ctx->mem + replace->target_buf; - target_mem_ptr = target_mem->cpu_va; - - if (source_mem->size < (replace->source_byte_offset + 3) || - target_mem->size < (replace->target_byte_offset + 3)) { - nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", - replace->target_byte_offset, - replace->source_byte_offset, - source_mem->size, - target_mem->size); - return -EINVAL; - } - - /* calculate the target pointer */ - target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); - - /* determine patch value */ - vaddr = source_mem->gpu_va + replace->source_byte_offset; - err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, - replace->shift, replace->mask, - vaddr); - if (err) { - nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", - err, replace->target_buf, - replace->target_byte_offset, - replace->source_buf, - replace->source_byte_offset); - } - - return err; -} - -static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct nvgpu_mem *target_mem; - u32 *target_mem_ptr; - u64 new_data; - int user_id = 0, err; - unsigned int i; - - for (i = 0; i < cde_ctx->num_params; i++) { - struct gk20a_cde_hdr_param *param = cde_ctx->params + i; - target_mem = cde_ctx->mem + param->target_buf; - target_mem_ptr = target_mem->cpu_va; - target_mem_ptr += (param->target_byte_offset / sizeof(u32)); - - switch (param->id) { - case TYPE_PARAM_COMPTAGS_PER_CACHELINE: - new_data = g->gr.comptags_per_cacheline; - break; - case TYPE_PARAM_GPU_CONFIGURATION: - new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * - g->gr.cacheline_size; - break; - case TYPE_PARAM_FIRSTPAGEOFFSET: - new_data = cde_ctx->surf_param_offset; - break; - case TYPE_PARAM_NUMPAGES: - new_data = cde_ctx->surf_param_lines; - break; - case TYPE_PARAM_BACKINGSTORE: - new_data = cde_ctx->backing_store_vaddr; - break; - case TYPE_PARAM_DESTINATION: - new_data = cde_ctx->compbit_vaddr; - break; - case TYPE_PARAM_DESTINATION_SIZE: - new_data = cde_ctx->compbit_size; - break; - case TYPE_PARAM_BACKINGSTORE_SIZE: - new_data = g->gr.compbit_store.mem.size; - break; - case TYPE_PARAM_SOURCE_SMMU_ADDR: - new_data = gpuva_to_iova_base(cde_ctx->vm, - cde_ctx->surf_vaddr); - if (new_data == 0) { - nvgpu_warn(g, "cde: failed to find 0x%llx", - cde_ctx->surf_vaddr); - return -EINVAL; - } - break; - case TYPE_PARAM_BACKINGSTORE_BASE_HW: - new_data = g->gr.compbit_store.base_hw; - break; - case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: - new_data = g->gr.gobs_per_comptagline_per_slice; - break; - case TYPE_PARAM_SCATTERBUFFER: - new_data = cde_ctx->scatterbuffer_vaddr; - break; - case TYPE_PARAM_SCATTERBUFFER_SIZE: - new_data = cde_ctx->scatterbuffer_size; - break; - default: - user_id = param->id - NUM_RESERVED_PARAMS; - if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) - continue; - new_data = cde_ctx->user_param_values[user_id]; - } - - nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", - i, param->id, param->target_buf, - param->target_byte_offset, new_data, - param->data_offset, param->type, param->shift, - param->mask); - - new_data += param->data_offset; - - err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, - param->shift, param->mask, new_data); - - if (err) { - nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", - err, i, param->id, param->target_buf, - param->target_byte_offset, new_data); - return err; - } - } - - return 0; -} - -static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img, - struct gk20a_cde_hdr_param *param) -{ - struct nvgpu_mem *target_mem; - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - - if (param->target_buf >= cde_ctx->num_bufs) { - nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", - cde_ctx->num_params, param->target_buf, - cde_ctx->num_bufs); - return -EINVAL; - } - - target_mem = cde_ctx->mem + param->target_buf; - if (target_mem->size < (param->target_byte_offset + 3)) { - nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", - cde_ctx->num_params, param->target_byte_offset, - target_mem->size); - return -EINVAL; - } - - /* does this parameter fit into our parameter structure */ - if (cde_ctx->num_params >= MAX_CDE_PARAMS) { - nvgpu_warn(g, "cde: no room for new parameters param idx = %d", - cde_ctx->num_params); - return -ENOMEM; - } - - /* is the given id valid? */ - if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { - nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", - param->id, cde_ctx->num_params, - NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); - return -EINVAL; - } - - cde_ctx->params[cde_ctx->num_params] = *param; - cde_ctx->num_params++; - - return 0; -} - -static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img, - u32 required_class) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - int err; - - /* CDE enabled */ - cde_ctx->ch->cde = true; - - err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); - if (err) { - nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", - err); - return err; - } - - return 0; -} - -static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img, - u32 op, - struct gk20a_cde_cmd_elem *cmd_elem, - u32 num_elems) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; - u32 *num_entries; - unsigned int i; - - /* check command type */ - if (op == TYPE_BUF_COMMAND_INIT) { - gpfifo = &cde_ctx->init_convert_cmd; - num_entries = &cde_ctx->init_cmd_num_entries; - } else if (op == TYPE_BUF_COMMAND_CONVERT) { - gpfifo = &cde_ctx->convert_cmd; - num_entries = &cde_ctx->convert_cmd_num_entries; - } else { - nvgpu_warn(g, "cde: unknown command. op=%u", - op); - return -EINVAL; - } - - /* allocate gpfifo entries to be pushed */ - *gpfifo = nvgpu_kzalloc(g, - sizeof(struct nvgpu_gpfifo_entry) * num_elems); - if (!*gpfifo) { - nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); - return -ENOMEM; - } - - gpfifo_elem = *gpfifo; - for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { - struct nvgpu_mem *target_mem; - - /* validate the current entry */ - if (cmd_elem->target_buf >= cde_ctx->num_bufs) { - nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", - cmd_elem->target_buf, cde_ctx->num_bufs); - return -EINVAL; - } - - target_mem = cde_ctx->mem + cmd_elem->target_buf; - if (target_mem->size< - cmd_elem->target_byte_offset + cmd_elem->num_bytes) { - nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", - target_mem->size, - cmd_elem->target_byte_offset, - cmd_elem->num_bytes); - return -EINVAL; - } - - /* store the element into gpfifo */ - gpfifo_elem->entry0 = - u64_lo32(target_mem->gpu_va + - cmd_elem->target_byte_offset); - gpfifo_elem->entry1 = - u64_hi32(target_mem->gpu_va + - cmd_elem->target_byte_offset) | - pbdma_gp_entry1_length_f(cmd_elem->num_bytes / - sizeof(u32)); - } - - *num_entries = num_elems; - return 0; -} - -static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - unsigned long init_bytes = cde_ctx->init_cmd_num_entries * - sizeof(struct nvgpu_gpfifo_entry); - unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * - sizeof(struct nvgpu_gpfifo_entry); - unsigned long total_bytes = init_bytes + conv_bytes; - struct nvgpu_gpfifo_entry *combined_cmd; - - /* allocate buffer that has space for both */ - combined_cmd = nvgpu_kzalloc(g, total_bytes); - if (!combined_cmd) { - nvgpu_warn(g, - "cde: could not allocate memory for gpfifo entries"); - return -ENOMEM; - } - - /* move the original init here and append convert */ - memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); - memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, - cde_ctx->convert_cmd, conv_bytes); - - nvgpu_kfree(g, cde_ctx->init_convert_cmd); - nvgpu_kfree(g, cde_ctx->convert_cmd); - - cde_ctx->init_convert_cmd = combined_cmd; - cde_ctx->convert_cmd = combined_cmd - + cde_ctx->init_cmd_num_entries; - - return 0; -} - -static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, - struct nvgpu_firmware *img) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct gk20a_cde_app *cde_app = &l->cde_app; - u32 *data = (u32 *)img->data; - u32 num_of_elems; - struct gk20a_cde_hdr_elem *elem; - u32 min_size = 0; - int err = 0; - unsigned int i; - - min_size += 2 * sizeof(u32); - if (img->size < min_size) { - nvgpu_warn(g, "cde: invalid image header"); - return -EINVAL; - } - - cde_app->firmware_version = data[0]; - num_of_elems = data[1]; - - min_size += num_of_elems * sizeof(*elem); - if (img->size < min_size) { - nvgpu_warn(g, "cde: bad image"); - return -EINVAL; - } - - elem = (struct gk20a_cde_hdr_elem *)&data[2]; - for (i = 0; i < num_of_elems; i++) { - int err = 0; - switch (elem->type) { - case TYPE_BUF: - err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); - break; - case TYPE_REPLACE: - err = gk20a_init_cde_replace(cde_ctx, img, - &elem->replace); - break; - case TYPE_PARAM: - err = gk20a_init_cde_param(cde_ctx, img, &elem->param); - break; - case TYPE_REQUIRED_CLASS: - err = gk20a_init_cde_required_class(cde_ctx, img, - elem->required_class); - break; - case TYPE_COMMAND: - { - struct gk20a_cde_cmd_elem *cmd = (void *) - &img->data[elem->command.data_byte_offset]; - err = gk20a_init_cde_command(cde_ctx, img, - elem->command.op, cmd, - elem->command.num_entries); - break; - } - case TYPE_ARRAY: - memcpy(&cde_app->arrays[elem->array.id][0], - elem->array.data, - MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); - break; - default: - nvgpu_warn(g, "cde: unknown header element"); - err = -EINVAL; - } - - if (err) - goto deinit_image; - - elem++; - } - - if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { - nvgpu_warn(g, "cde: convert command not defined"); - err = -EINVAL; - goto deinit_image; - } - - if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { - nvgpu_warn(g, "cde: convert command not defined"); - err = -EINVAL; - goto deinit_image; - } - - err = gk20a_cde_pack_cmdbufs(cde_ctx); - if (err) - goto deinit_image; - - return 0; - -deinit_image: - gk20a_deinit_cde_img(cde_ctx); - return err; -} - -static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, - u32 op, struct nvgpu_channel_fence *fence, - u32 flags, struct gk20a_fence **fence_out) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct nvgpu_gpfifo_entry *gpfifo = NULL; - int num_entries = 0; - - /* check command type */ - if (op == TYPE_BUF_COMMAND_INIT) { - /* both init and convert combined */ - gpfifo = cde_ctx->init_convert_cmd; - num_entries = cde_ctx->init_cmd_num_entries - + cde_ctx->convert_cmd_num_entries; - } else if (op == TYPE_BUF_COMMAND_CONVERT) { - gpfifo = cde_ctx->convert_cmd; - num_entries = cde_ctx->convert_cmd_num_entries; - } else if (op == TYPE_BUF_COMMAND_NOOP) { - /* Any non-null gpfifo will suffice with 0 num_entries */ - gpfifo = cde_ctx->init_convert_cmd; - num_entries = 0; - } else { - nvgpu_warn(g, "cde: unknown buffer"); - return -EINVAL; - } - - if (gpfifo == NULL) { - nvgpu_warn(g, "cde: buffer not available"); - return -ENOSYS; - } - - return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, - num_entries, flags, fence, fence_out, - NULL); -} - -static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; - struct gk20a *g = &cde_ctx->l->g; - - nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); - trace_gk20a_cde_release(cde_ctx); - - nvgpu_mutex_acquire(&cde_app->mutex); - - if (cde_ctx->in_use) { - cde_ctx->in_use = false; - nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); - cde_app->ctx_usecount--; - } else { - nvgpu_log_info(g, "double release cde context %p", cde_ctx); - } - - nvgpu_mutex_release(&cde_app->mutex); -} - -static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) -{ - struct delayed_work *delay_work = to_delayed_work(work); - struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, - struct gk20a_cde_ctx, ctx_deleter_work); - struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - int err; - - /* someone has just taken it? engine deletion started? */ - if (cde_ctx->in_use || !cde_app->initialised) - return; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, - "cde: attempting to delete temporary %p", cde_ctx); - - err = gk20a_busy(g); - if (err) { - /* this context would find new use anyway later, so not freeing - * here does not leak anything */ - nvgpu_warn(g, "cde: cannot set gk20a on, postponing" - " temp ctx deletion"); - return; - } - - nvgpu_mutex_acquire(&cde_app->mutex); - if (cde_ctx->in_use || !cde_app->initialised) { - nvgpu_log(g, gpu_dbg_cde_ctx, - "cde: context use raced, not deleting %p", - cde_ctx); - goto out; - } - - WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), - "double pending %p", cde_ctx); - - gk20a_cde_remove_ctx(cde_ctx); - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, - "cde: destroyed %p count=%d use=%d max=%d", - cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, - cde_app->ctx_count_top); - -out: - nvgpu_mutex_release(&cde_app->mutex); - gk20a_idle(g); -} - -static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) -__must_hold(&cde_app->mutex) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a_cde_ctx *cde_ctx; - - /* exhausted? */ - - if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) - return ERR_PTR(-EAGAIN); - - /* idle context available? */ - - if (!nvgpu_list_empty(&cde_app->free_contexts)) { - cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, - gk20a_cde_ctx, list); - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, - "cde: got free %p count=%d use=%d max=%d", - cde_ctx, cde_app->ctx_count, - cde_app->ctx_usecount, - cde_app->ctx_count_top); - trace_gk20a_cde_get_context(cde_ctx); - - /* deleter work may be scheduled, but in_use prevents it */ - cde_ctx->in_use = true; - nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); - cde_app->ctx_usecount++; - - /* cancel any deletions now that ctx is in use */ - gk20a_cde_cancel_deleter(cde_ctx, true); - return cde_ctx; - } - - /* no free contexts, get a temporary one */ - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, - "cde: no free contexts, count=%d", - cde_app->ctx_count); - - cde_ctx = gk20a_cde_allocate_context(l); - if (IS_ERR(cde_ctx)) { - nvgpu_warn(g, "cde: cannot allocate context: %ld", - PTR_ERR(cde_ctx)); - return cde_ctx; - } - - trace_gk20a_cde_get_context(cde_ctx); - cde_ctx->in_use = true; - cde_ctx->is_temporary = true; - cde_app->ctx_usecount++; - cde_app->ctx_count++; - if (cde_app->ctx_count > cde_app->ctx_count_top) - cde_app->ctx_count_top = cde_app->ctx_count; - nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); - - return cde_ctx; -} - -static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) -__releases(&cde_app->mutex) -__acquires(&cde_app->mutex) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a_cde_ctx *cde_ctx = NULL; - struct nvgpu_timeout timeout; - - nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, - NVGPU_TIMER_CPU_TIMER); - - do { - cde_ctx = gk20a_cde_do_get_context(l); - if (PTR_ERR(cde_ctx) != -EAGAIN) - break; - - /* exhausted, retry */ - nvgpu_mutex_release(&cde_app->mutex); - cond_resched(); - nvgpu_mutex_acquire(&cde_app->mutex); - } while (!nvgpu_timeout_expired(&timeout)); - - return cde_ctx; -} - -static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_ctx *cde_ctx; - int ret; - - cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); - if (!cde_ctx) - return ERR_PTR(-ENOMEM); - - cde_ctx->l = l; - cde_ctx->dev = dev_from_gk20a(g); - - ret = gk20a_cde_load(cde_ctx); - if (ret) { - nvgpu_kfree(g, cde_ctx); - return ERR_PTR(ret); - } - - nvgpu_init_list_node(&cde_ctx->list); - cde_ctx->is_temporary = false; - cde_ctx->in_use = false; - INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, - gk20a_cde_ctx_deleter_fn); - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); - trace_gk20a_cde_allocate_context(cde_ctx); - return cde_ctx; -} - -int gk20a_cde_convert(struct nvgpu_os_linux *l, - struct dma_buf *compbits_scatter_buf, - u64 compbits_byte_offset, - u64 scatterbuffer_byte_offset, - struct nvgpu_channel_fence *fence, - u32 __flags, struct gk20a_cde_param *params, - int num_params, struct gk20a_fence **fence_out) -__acquires(&l->cde_app->mutex) -__releases(&l->cde_app->mutex) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_ctx *cde_ctx = NULL; - struct gk20a_comptags comptags; - struct nvgpu_os_buffer os_buf = { - compbits_scatter_buf, - NULL, - dev_from_gk20a(g) - }; - u64 mapped_compbits_offset = 0; - u64 compbits_size = 0; - u64 mapped_scatterbuffer_offset = 0; - u64 scatterbuffer_size = 0; - u64 map_vaddr = 0; - u64 map_offset = 0; - u64 map_size = 0; - u8 *surface = NULL; - u64 big_page_mask = 0; - u32 flags; - int err, i; - const s16 compbits_kind = 0; - u32 submit_op; - struct dma_buf_attachment *attachment; - - nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", - compbits_byte_offset, scatterbuffer_byte_offset); - - /* scatter buffer must be after compbits buffer */ - if (scatterbuffer_byte_offset && - scatterbuffer_byte_offset < compbits_byte_offset) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_mutex_acquire(&l->cde_app.mutex); - cde_ctx = gk20a_cde_get_context(l); - nvgpu_mutex_release(&l->cde_app.mutex); - if (IS_ERR(cde_ctx)) { - err = PTR_ERR(cde_ctx); - goto exit_idle; - } - - /* First, map the buffer to local va */ - - /* ensure that the compbits buffer has drvdata */ - err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, - dev_from_gk20a(g)); - if (err) - goto exit_idle; - - /* compbits don't start at page aligned offset, so we need to align - the region to be mapped */ - big_page_mask = cde_ctx->vm->big_page_size - 1; - map_offset = compbits_byte_offset & ~big_page_mask; - map_size = compbits_scatter_buf->size - map_offset; - - - /* compute compbit start offset from the beginning of the mapped - area */ - mapped_compbits_offset = compbits_byte_offset - map_offset; - if (scatterbuffer_byte_offset) { - compbits_size = scatterbuffer_byte_offset - - compbits_byte_offset; - mapped_scatterbuffer_offset = scatterbuffer_byte_offset - - map_offset; - scatterbuffer_size = compbits_scatter_buf->size - - scatterbuffer_byte_offset; - } else { - compbits_size = compbits_scatter_buf->size - - compbits_byte_offset; - } - - nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", - map_offset, map_size); - nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", - mapped_compbits_offset, compbits_size); - nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", - mapped_scatterbuffer_offset, scatterbuffer_size); - - - /* map the destination buffer */ - get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ - err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, - NVGPU_VM_MAP_CACHEABLE | - NVGPU_VM_MAP_DIRECT_KIND_CTRL, - NVGPU_KIND_INVALID, - compbits_kind, /* incompressible kind */ - gk20a_mem_flag_none, - map_offset, map_size, - NULL, - &map_vaddr); - if (err) { - dma_buf_put(compbits_scatter_buf); - err = -EINVAL; - goto exit_idle; - } - - if (scatterbuffer_byte_offset && - l->ops.cde.need_scatter_buffer && - l->ops.cde.need_scatter_buffer(g)) { - struct sg_table *sgt; - void *scatter_buffer; - - surface = dma_buf_vmap(compbits_scatter_buf); - if (IS_ERR(surface)) { - nvgpu_warn(g, - "dma_buf_vmap failed"); - err = -EINVAL; - goto exit_unmap_vaddr; - } - - scatter_buffer = surface + scatterbuffer_byte_offset; - - nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", - surface, scatter_buffer); - sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, - &attachment); - if (IS_ERR(sgt)) { - nvgpu_warn(g, - "mm_pin failed"); - err = -EINVAL; - goto exit_unmap_surface; - } else { - err = l->ops.cde.populate_scatter_buffer(g, sgt, - compbits_byte_offset, scatter_buffer, - scatterbuffer_size); - WARN_ON(err); - - gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, - attachment, sgt); - if (err) - goto exit_unmap_surface; - } - - __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); - dma_buf_vunmap(compbits_scatter_buf, surface); - surface = NULL; - } - - /* store source buffer compression tags */ - gk20a_get_comptags(&os_buf, &comptags); - cde_ctx->surf_param_offset = comptags.offset; - cde_ctx->surf_param_lines = comptags.lines; - - /* store surface vaddr. This is actually compbit vaddr, but since - compbits live in the same surface, and we can get the alloc base - address by using gpuva_to_iova_base, this will do */ - cde_ctx->surf_vaddr = map_vaddr; - - /* store information about destination */ - cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; - cde_ctx->compbit_size = compbits_size; - - cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; - cde_ctx->scatterbuffer_size = scatterbuffer_size; - - /* remove existing argument data */ - memset(cde_ctx->user_param_values, 0, - sizeof(cde_ctx->user_param_values)); - - /* read user space arguments for the conversion */ - for (i = 0; i < num_params; i++) { - struct gk20a_cde_param *param = params + i; - int id = param->id - NUM_RESERVED_PARAMS; - - if (id < 0 || id >= MAX_CDE_USER_PARAMS) { - nvgpu_warn(g, "cde: unknown user parameter"); - err = -EINVAL; - goto exit_unmap_surface; - } - cde_ctx->user_param_values[id] = param->value; - } - - /* patch data */ - err = gk20a_cde_patch_params(cde_ctx); - if (err) { - nvgpu_warn(g, "cde: failed to patch parameters"); - goto exit_unmap_surface; - } - - nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", - g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); - nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", - cde_ctx->compbit_size, cde_ctx->compbit_vaddr); - nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", - cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); - - /* take always the postfence as it is needed for protecting the - * cde context */ - flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; - - /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ - gk20a_idle(g); - - if (comptags.lines == 0) { - /* - * Nothing to do on the buffer, but do a null kickoff for - * managing the pre and post fences. - */ - submit_op = TYPE_BUF_COMMAND_NOOP; - } else if (!cde_ctx->init_cmd_executed) { - /* - * First time, so include the init pushbuf too in addition to - * the conversion code. - */ - submit_op = TYPE_BUF_COMMAND_INIT; - } else { - /* - * The usual condition: execute just the conversion. - */ - submit_op = TYPE_BUF_COMMAND_CONVERT; - } - err = gk20a_cde_execute_buffer(cde_ctx, submit_op, - fence, flags, fence_out); - - if (comptags.lines != 0 && !err) - cde_ctx->init_cmd_executed = true; - - /* unmap the buffers - channel holds references to them now */ - nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); - - return err; - -exit_unmap_surface: - if (surface) - dma_buf_vunmap(compbits_scatter_buf, surface); -exit_unmap_vaddr: - nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); -exit_idle: - gk20a_idle(g); - return err; -} - -static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) -{ - struct gk20a_cde_ctx *cde_ctx = data; - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct gk20a_cde_app *cde_app = &l->cde_app; - bool channel_idle; - - channel_gk20a_joblist_lock(ch); - channel_idle = channel_gk20a_joblist_is_empty(ch); - channel_gk20a_joblist_unlock(ch); - - if (!channel_idle) - return; - - trace_gk20a_cde_finished_ctx_cb(cde_ctx); - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); - if (!cde_ctx->in_use) - nvgpu_log_info(g, "double finish cde context %p on channel %p", - cde_ctx, ch); - - if (ch->has_timedout) { - if (cde_ctx->is_temporary) { - nvgpu_warn(g, - "cde: channel had timed out" - " (temporary channel)"); - /* going to be deleted anyway */ - } else { - nvgpu_warn(g, - "cde: channel had timed out" - ", reloading"); - /* mark it to be deleted, replace with a new one */ - nvgpu_mutex_acquire(&cde_app->mutex); - cde_ctx->is_temporary = true; - if (gk20a_cde_create_context(l)) { - nvgpu_err(g, "cde: can't replace context"); - } - nvgpu_mutex_release(&cde_app->mutex); - } - } - - /* delete temporary contexts later (watch for doubles) */ - if (cde_ctx->is_temporary && cde_ctx->in_use) { - WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); - schedule_delayed_work(&cde_ctx->ctx_deleter_work, - msecs_to_jiffies(CTX_DELETE_TIME)); - } - - if (!ch->has_timedout) - gk20a_cde_ctx_release(cde_ctx); -} - -static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) -{ - struct nvgpu_os_linux *l = cde_ctx->l; - struct gk20a *g = &l->g; - struct nvgpu_firmware *img; - struct channel_gk20a *ch; - struct tsg_gk20a *tsg; - struct gr_gk20a *gr = &g->gr; - struct nvgpu_gpfifo_args gpfifo_args; - int err = 0; - u64 vaddr; - - img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); - if (!img) { - nvgpu_err(g, "cde: could not fetch the firmware"); - return -ENOSYS; - } - - tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); - if (!tsg) { - nvgpu_err(g, "cde: could not create TSG"); - err = -ENOMEM; - goto err_get_gk20a_channel; - } - - ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, - cde_ctx, - -1, - false); - if (!ch) { - nvgpu_warn(g, "cde: gk20a channel not available"); - err = -ENOMEM; - goto err_get_gk20a_channel; - } - - ch->timeout.enabled = false; - - /* bind the channel to the vm */ - err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); - if (err) { - nvgpu_warn(g, "cde: could not bind vm"); - goto err_commit_va; - } - - err = gk20a_tsg_bind_channel(tsg, ch); - if (err) { - nvgpu_err(g, "cde: unable to bind to tsg"); - goto err_alloc_gpfifo; - } - - gpfifo_args.num_entries = 1024; - gpfifo_args.num_inflight_jobs = 0; - gpfifo_args.flags = 0; - /* allocate gpfifo (1024 should be more than enough) */ - err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); - if (err) { - nvgpu_warn(g, "cde: unable to allocate gpfifo"); - goto err_alloc_gpfifo; - } - - /* map backing store to gpu virtual space */ - vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, - g->gr.compbit_store.mem.size, - NVGPU_VM_MAP_CACHEABLE, - gk20a_mem_flag_read_only, - false, - gr->compbit_store.mem.aperture); - - if (!vaddr) { - nvgpu_warn(g, "cde: cannot map compression bit backing store"); - err = -ENOMEM; - goto err_map_backingstore; - } - - /* store initialisation data */ - cde_ctx->ch = ch; - cde_ctx->tsg = tsg; - cde_ctx->vm = ch->vm; - cde_ctx->backing_store_vaddr = vaddr; - - /* initialise the firmware */ - err = gk20a_init_cde_img(cde_ctx, img); - if (err) { - nvgpu_warn(g, "cde: image initialisation failed"); - goto err_init_cde_img; - } - - /* initialisation done */ - nvgpu_release_firmware(g, img); - - return 0; - -err_init_cde_img: - nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); -err_map_backingstore: -err_alloc_gpfifo: - nvgpu_vm_put(ch->vm); -err_commit_va: -err_get_gk20a_channel: - nvgpu_release_firmware(g, img); - nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); - return err; -} - -int gk20a_cde_reload(struct nvgpu_os_linux *l) -__acquires(&l->cde_app->mutex) -__releases(&l->cde_app->mutex) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_app *cde_app = &l->cde_app; - int err; - - if (!cde_app->initialised) - return -ENOSYS; - - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_mutex_acquire(&cde_app->mutex); - - gk20a_cde_stop(l); - - err = gk20a_cde_create_contexts(l); - if (!err) - cde_app->initialised = true; - - nvgpu_mutex_release(&cde_app->mutex); - - gk20a_idle(g); - return err; -} - -int gk20a_init_cde_support(struct nvgpu_os_linux *l) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) -{ - struct gk20a_cde_app *cde_app = &l->cde_app; - struct gk20a *g = &l->g; - int err; - - if (cde_app->initialised) - return 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); - - err = nvgpu_mutex_init(&cde_app->mutex); - if (err) - return err; - - nvgpu_mutex_acquire(&cde_app->mutex); - - nvgpu_init_list_node(&cde_app->free_contexts); - nvgpu_init_list_node(&cde_app->used_contexts); - cde_app->ctx_count = 0; - cde_app->ctx_count_top = 0; - cde_app->ctx_usecount = 0; - - err = gk20a_cde_create_contexts(l); - if (!err) - cde_app->initialised = true; - - nvgpu_mutex_release(&cde_app->mutex); - nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); - - if (err) - nvgpu_mutex_destroy(&cde_app->mutex); - - return err; -} - -enum cde_launch_patch_id { - PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, - PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, - PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ - PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, - PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, - PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ - PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ - PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ - PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, - PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ - PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ - PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, - PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, - PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, - PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, - PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, - PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, - PATCH_USER_CONST_XBLOCKS_ID = 1041, - PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, - PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, - PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, - PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, - PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, - PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, - PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, - PATCH_H_LAUNCH_WORD1_ID = 1049, - PATCH_H_LAUNCH_WORD2_ID = 1050, - PATCH_V_LAUNCH_WORD1_ID = 1051, - PATCH_V_LAUNCH_WORD2_ID = 1052, - PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, - PATCH_H_QMD_REGISTER_COUNT_ID = 1054, - PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, - PATCH_V_QMD_REGISTER_COUNT_ID = 1056, -}; - -/* maximum number of WRITE_PATCHes in the below function */ -#define MAX_CDE_LAUNCH_PATCHES 32 - -static int gk20a_buffer_convert_gpu_to_cde_v1( - struct nvgpu_os_linux *l, - struct dma_buf *dmabuf, u32 consumer, - u64 offset, u64 compbits_hoffset, u64 compbits_voffset, - u64 scatterbuffer_offset, - u32 width, u32 height, u32 block_height_log2, - u32 submit_flags, struct nvgpu_channel_fence *fence_in, - struct gk20a_buffer_state *state) -{ - struct gk20a *g = &l->g; - struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; - int param = 0; - int err = 0; - struct gk20a_fence *new_fence = NULL; - const int wgx = 8; - const int wgy = 8; - const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ - const int xalign = compbits_per_byte * wgx; - const int yalign = wgy; - - /* Compute per launch parameters */ - const int xtiles = (width + 7) >> 3; - const int ytiles = (height + 7) >> 3; - const int gridw_h = roundup(xtiles, xalign) / xalign; - const int gridh_h = roundup(ytiles, yalign) / yalign; - const int gridw_v = roundup(ytiles, xalign) / xalign; - const int gridh_v = roundup(xtiles, yalign) / yalign; - const int xblocks = (xtiles + 1) >> 1; - const int voffset = compbits_voffset - compbits_hoffset; - - int hprog = -1; - int vprog = -1; - - if (l->ops.cde.get_program_numbers) - l->ops.cde.get_program_numbers(g, block_height_log2, - l->cde_app.shader_parameter, - &hprog, &vprog); - else { - nvgpu_warn(g, "cde: chip not supported"); - return -ENOSYS; - } - - if (hprog < 0 || vprog < 0) { - nvgpu_warn(g, "cde: could not determine programs"); - return -ENOSYS; - } - - if (xtiles > 8192 / 8 || ytiles > 8192 / 8) - nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", - xtiles, ytiles); - - nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", - width, height, block_height_log2, - compbits_hoffset, compbits_voffset, scatterbuffer_offset); - nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", - width, height, xtiles, ytiles); - nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", - wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); - nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", - hprog, - l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], - l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], - vprog, - l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], - l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); - - /* Write parameters */ -#define WRITE_PATCH(NAME, VALUE) \ - params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} - WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); - WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, - block_height_log2); - WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); - WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); - WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); - WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); - WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); - - WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); - WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); - WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); - WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); - WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); - WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); - - WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); - WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); - WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); - WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); - WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); - WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); - - WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, - l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); - WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, - l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); - WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, - l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); - WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, - l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); - - if (consumer & NVGPU_GPU_COMPBITS_CDEH) { - WRITE_PATCH(PATCH_H_LAUNCH_WORD1, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); - WRITE_PATCH(PATCH_H_LAUNCH_WORD2, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); - } else { - WRITE_PATCH(PATCH_H_LAUNCH_WORD1, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); - WRITE_PATCH(PATCH_H_LAUNCH_WORD2, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); - } - - if (consumer & NVGPU_GPU_COMPBITS_CDEV) { - WRITE_PATCH(PATCH_V_LAUNCH_WORD1, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); - WRITE_PATCH(PATCH_V_LAUNCH_WORD2, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); - } else { - WRITE_PATCH(PATCH_V_LAUNCH_WORD1, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); - WRITE_PATCH(PATCH_V_LAUNCH_WORD2, - l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); - } -#undef WRITE_PATCH - - err = gk20a_cde_convert(l, dmabuf, - compbits_hoffset, - scatterbuffer_offset, - fence_in, submit_flags, - params, param, &new_fence); - if (err) - goto out; - - /* compbits generated, update state & fence */ - gk20a_fence_put(state->fence); - state->fence = new_fence; - state->valid_compbits |= consumer & - (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); -out: - return err; -} - -static int gk20a_buffer_convert_gpu_to_cde( - struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, - u64 offset, u64 compbits_hoffset, u64 compbits_voffset, - u64 scatterbuffer_offset, - u32 width, u32 height, u32 block_height_log2, - u32 submit_flags, struct nvgpu_channel_fence *fence_in, - struct gk20a_buffer_state *state) -{ - struct gk20a *g = &l->g; - int err = 0; - - if (!l->cde_app.initialised) - return -ENOSYS; - - nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", - l->cde_app.firmware_version); - - if (l->cde_app.firmware_version == 1) { - err = gk20a_buffer_convert_gpu_to_cde_v1( - l, dmabuf, consumer, offset, compbits_hoffset, - compbits_voffset, scatterbuffer_offset, - width, height, block_height_log2, - submit_flags, fence_in, state); - } else { - nvgpu_err(g, "unsupported CDE firmware version %d", - l->cde_app.firmware_version); - err = -EINVAL; - } - - return err; -} - -int gk20a_prepare_compressible_read( - struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, - u64 compbits_hoffset, u64 compbits_voffset, - u64 scatterbuffer_offset, - u32 width, u32 height, u32 block_height_log2, - u32 submit_flags, struct nvgpu_channel_fence *fence, - u32 *valid_compbits, u32 *zbc_color, - struct gk20a_fence **fence_out) -{ - struct gk20a *g = &l->g; - int err = 0; - struct gk20a_buffer_state *state; - struct dma_buf *dmabuf; - u32 missing_bits; - - dmabuf = dma_buf_get(buffer_fd); - if (IS_ERR(dmabuf)) - return -EINVAL; - - err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); - if (err) { - dma_buf_put(dmabuf); - return err; - } - - missing_bits = (state->valid_compbits ^ request) & request; - - nvgpu_mutex_acquire(&state->lock); - - if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { - - gk20a_fence_put(state->fence); - state->fence = NULL; - /* state->fence = decompress(); - state->valid_compbits = 0; */ - err = -EINVAL; - goto out; - } else if (missing_bits) { - u32 missing_cde_bits = missing_bits & - (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); - if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && - missing_cde_bits) { - err = gk20a_buffer_convert_gpu_to_cde( - l, dmabuf, - missing_cde_bits, - offset, compbits_hoffset, - compbits_voffset, scatterbuffer_offset, - width, height, block_height_log2, - submit_flags, fence, - state); - if (err) - goto out; - } - } - - if (state->fence && fence_out) - *fence_out = gk20a_fence_get(state->fence); - - if (valid_compbits) - *valid_compbits = state->valid_compbits; - - if (zbc_color) - *zbc_color = state->zbc_color; - -out: - nvgpu_mutex_release(&state->lock); - dma_buf_put(dmabuf); - return err; -} - -int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, - u32 valid_compbits, u64 offset, u32 zbc_color) -{ - int err; - struct gk20a_buffer_state *state; - struct dma_buf *dmabuf; - - dmabuf = dma_buf_get(buffer_fd); - if (IS_ERR(dmabuf)) { - nvgpu_err(g, "invalid dmabuf"); - return -EINVAL; - } - - err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); - if (err) { - nvgpu_err(g, "could not get state from dmabuf"); - dma_buf_put(dmabuf); - return err; - } - - nvgpu_mutex_acquire(&state->lock); - - /* Update the compbits state. */ - state->valid_compbits = valid_compbits; - state->zbc_color = zbc_color; - - /* Discard previous compbit job fence. */ - gk20a_fence_put(state->fence); - state->fence = NULL; - - nvgpu_mutex_release(&state->lock); - dma_buf_put(dmabuf); - return 0; -} - -int nvgpu_cde_init_ops(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - u32 ver = g->params.gpu_arch + g->params.gpu_impl; - - switch (ver) { - case GK20A_GPUID_GM20B: - case GK20A_GPUID_GM20B_B: - l->ops.cde = gm20b_cde_ops.cde; - break; - case NVGPU_GPUID_GP10B: - l->ops.cde = gp10b_cde_ops.cde; - break; - default: - /* CDE is optional, so today ignoring unknown chip is fine */ - break; - } - - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h deleted file mode 100644 index 5928b624..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde.h +++ /dev/null @@ -1,326 +0,0 @@ -/* - * GK20A color decompression engine support - * - * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _CDE_GK20A_H_ -#define _CDE_GK20A_H_ - -#include -#include -#include - -#include -#include - -#define MAX_CDE_BUFS 10 -#define MAX_CDE_PARAMS 64 -#define MAX_CDE_USER_PARAMS 40 -#define MAX_CDE_ARRAY_ENTRIES 9 - -/* - * The size of the context ring buffer that is dedicated for handling cde - * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu - * wait on the previous job to that channel, so increasing this value - * reduces the likelihood of stalls. - */ -#define NUM_CDE_CONTEXTS 4 - -struct dma_buf; -struct device; -struct nvgpu_os_linux; -struct gk20a; -struct gk20a_fence; -struct nvgpu_channel_fence; -struct channel_gk20a; -struct vm_gk20a; -struct nvgpu_gpfifo_entry; - -/* - * this element defines a buffer that is allocated and mapped into gpu address - * space. data_byte_offset defines the beginning of the buffer inside the - * firmare. num_bytes defines how many bytes the firmware contains. - * - * If data_byte_offset is zero, we allocate an empty buffer. - */ - -struct gk20a_cde_hdr_buf { - u64 data_byte_offset; - u64 num_bytes; -}; - -/* - * this element defines a constant patching in buffers. It basically - * computes physical address to +source_byte_offset. The - * address is then modified into patch value as per: - * value = (current_value & ~mask) | (address << shift) & mask . - * - * The type field defines the register size as: - * 0=u32, - * 1=u64 (little endian), - * 2=u64 (big endian) - */ - -struct gk20a_cde_hdr_replace { - u32 target_buf; - u32 source_buf; - s32 shift; - u32 type; - u64 target_byte_offset; - u64 source_byte_offset; - u64 mask; -}; - -enum { - TYPE_PARAM_TYPE_U32 = 0, - TYPE_PARAM_TYPE_U64_LITTLE, - TYPE_PARAM_TYPE_U64_BIG -}; - -/* - * this element defines a runtime patching in buffers. Parameters with id from - * 0 to 1024 are reserved for special usage as follows: - * 0 = comptags_per_cacheline, - * 1 = slices_per_fbp, - * 2 = num_fbps - * 3 = source buffer first page offset - * 4 = source buffer block height log2 - * 5 = backing store memory address - * 6 = destination memory address - * 7 = destination size (bytes) - * 8 = backing store size (bytes) - * 9 = cache line size - * - * Parameters above id 1024 are user-specified. I.e. they determine where a - * parameters from user space should be placed in buffers, what is their - * type, etc. - * - * Once the value is available, we add data_offset to the value. - * - * The value address is then modified into patch value as per: - * value = (current_value & ~mask) | (address << shift) & mask . - * - * The type field defines the register size as: - * 0=u32, - * 1=u64 (little endian), - * 2=u64 (big endian) - */ - -struct gk20a_cde_hdr_param { - u32 id; - u32 target_buf; - s32 shift; - u32 type; - s64 data_offset; - u64 target_byte_offset; - u64 mask; -}; - -enum { - TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, - TYPE_PARAM_GPU_CONFIGURATION, - TYPE_PARAM_FIRSTPAGEOFFSET, - TYPE_PARAM_NUMPAGES, - TYPE_PARAM_BACKINGSTORE, - TYPE_PARAM_DESTINATION, - TYPE_PARAM_DESTINATION_SIZE, - TYPE_PARAM_BACKINGSTORE_SIZE, - TYPE_PARAM_SOURCE_SMMU_ADDR, - TYPE_PARAM_BACKINGSTORE_BASE_HW, - TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, - TYPE_PARAM_SCATTERBUFFER, - TYPE_PARAM_SCATTERBUFFER_SIZE, - NUM_RESERVED_PARAMS = 1024, -}; - -/* - * This header element defines a command. The op field determines whether the - * element is defining an init (0) or convert command (1). data_byte_offset - * denotes the beginning address of command elements in the file. - */ - -struct gk20a_cde_hdr_command { - u32 op; - u32 num_entries; - u64 data_byte_offset; -}; - -enum { - TYPE_BUF_COMMAND_INIT = 0, - TYPE_BUF_COMMAND_CONVERT, - TYPE_BUF_COMMAND_NOOP -}; - -/* - * This is a command element defines one entry inside push buffer. target_buf - * defines the buffer including the pushbuffer entries, target_byte_offset the - * offset inside the buffer and num_bytes the number of words in the buffer. - */ - -struct gk20a_cde_cmd_elem { - u32 target_buf; - u32 padding; - u64 target_byte_offset; - u64 num_bytes; -}; - -/* - * This element is used for storing a small array of data. - */ - -enum { - ARRAY_PROGRAM_OFFSET = 0, - ARRAY_REGISTER_COUNT, - ARRAY_LAUNCH_COMMAND, - NUM_CDE_ARRAYS -}; - -struct gk20a_cde_hdr_array { - u32 id; - u32 data[MAX_CDE_ARRAY_ENTRIES]; -}; - -/* - * Following defines a single header element. Each element has a type and - * some of the data structures. - */ - -struct gk20a_cde_hdr_elem { - u32 type; - u32 padding; - union { - struct gk20a_cde_hdr_buf buf; - struct gk20a_cde_hdr_replace replace; - struct gk20a_cde_hdr_param param; - u32 required_class; - struct gk20a_cde_hdr_command command; - struct gk20a_cde_hdr_array array; - }; -}; - -enum { - TYPE_BUF = 0, - TYPE_REPLACE, - TYPE_PARAM, - TYPE_REQUIRED_CLASS, - TYPE_COMMAND, - TYPE_ARRAY -}; - -struct gk20a_cde_param { - u32 id; - u32 padding; - u64 value; -}; - -struct gk20a_cde_ctx { - struct nvgpu_os_linux *l; - struct device *dev; - - /* channel related data */ - struct channel_gk20a *ch; - struct tsg_gk20a *tsg; - struct vm_gk20a *vm; - - /* buf converter configuration */ - struct nvgpu_mem mem[MAX_CDE_BUFS]; - unsigned int num_bufs; - - /* buffer patching params (where should patching be done) */ - struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; - unsigned int num_params; - - /* storage for user space parameter values */ - u32 user_param_values[MAX_CDE_USER_PARAMS]; - - u32 surf_param_offset; - u32 surf_param_lines; - u64 surf_vaddr; - - u64 compbit_vaddr; - u64 compbit_size; - - u64 scatterbuffer_vaddr; - u64 scatterbuffer_size; - - u64 backing_store_vaddr; - - struct nvgpu_gpfifo_entry *init_convert_cmd; - int init_cmd_num_entries; - - struct nvgpu_gpfifo_entry *convert_cmd; - int convert_cmd_num_entries; - - struct kobj_attribute attr; - - bool init_cmd_executed; - - struct nvgpu_list_node list; - bool is_temporary; - bool in_use; - struct delayed_work ctx_deleter_work; -}; - -static inline struct gk20a_cde_ctx * -gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) -{ - return (struct gk20a_cde_ctx *) - ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); -}; - -struct gk20a_cde_app { - bool initialised; - struct nvgpu_mutex mutex; - - struct nvgpu_list_node free_contexts; - struct nvgpu_list_node used_contexts; - unsigned int ctx_count; - unsigned int ctx_usecount; - unsigned int ctx_count_top; - - u32 firmware_version; - - u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; - - u32 shader_parameter; -}; - -void gk20a_cde_destroy(struct nvgpu_os_linux *l); -void gk20a_cde_suspend(struct nvgpu_os_linux *l); -int gk20a_init_cde_support(struct nvgpu_os_linux *l); -int gk20a_cde_reload(struct nvgpu_os_linux *l); -int gk20a_cde_convert(struct nvgpu_os_linux *l, - struct dma_buf *compbits_buf, - u64 compbits_byte_offset, - u64 scatterbuffer_byte_offset, - struct nvgpu_channel_fence *fence, - u32 __flags, struct gk20a_cde_param *params, - int num_params, struct gk20a_fence **fence_out); - -int gk20a_prepare_compressible_read( - struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, - u64 compbits_hoffset, u64 compbits_voffset, - u64 scatterbuffer_offset, - u32 width, u32 height, u32 block_height_log2, - u32 submit_flags, struct nvgpu_channel_fence *fence, - u32 *valid_compbits, u32 *zbc_color, - struct gk20a_fence **fence_out); -int gk20a_mark_compressible_write( - struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, - u32 zbc_color); -int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c b/drivers/gpu/nvgpu/common/linux/cde_gm20b.c deleted file mode 100644 index 1cd15c54..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * GM20B CDE - * - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "gk20a/gk20a.h" -#include "cde_gm20b.h" - -enum programs { - PROG_HPASS = 0, - PROG_VPASS_LARGE = 1, - PROG_VPASS_SMALL = 2, - PROG_HPASS_DEBUG = 3, - PROG_VPASS_LARGE_DEBUG = 4, - PROG_VPASS_SMALL_DEBUG = 5, - PROG_PASSTHROUGH = 6, -}; - -static void gm20b_cde_get_program_numbers(struct gk20a *g, - u32 block_height_log2, - u32 shader_parameter, - int *hprog_out, int *vprog_out) -{ - int hprog = PROG_HPASS; - int vprog = (block_height_log2 >= 2) ? - PROG_VPASS_LARGE : PROG_VPASS_SMALL; - if (shader_parameter == 1) { - hprog = PROG_PASSTHROUGH; - vprog = PROG_PASSTHROUGH; - } else if (shader_parameter == 2) { - hprog = PROG_HPASS_DEBUG; - vprog = (block_height_log2 >= 2) ? - PROG_VPASS_LARGE_DEBUG : - PROG_VPASS_SMALL_DEBUG; - } - - *hprog_out = hprog; - *vprog_out = vprog; -} - -struct nvgpu_os_linux_ops gm20b_cde_ops = { - .cde = { - .get_program_numbers = gm20b_cde_get_program_numbers, - }, -}; diff --git a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h b/drivers/gpu/nvgpu/common/linux/cde_gm20b.h deleted file mode 100644 index 640d6ab6..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde_gm20b.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * GM20B CDE - * - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _NVHOST_GM20B_CDE -#define _NVHOST_GM20B_CDE - -#include "os_linux.h" - -extern struct nvgpu_os_linux_ops gm20b_cde_ops; - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c deleted file mode 100644 index 5c0e79a7..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * GP10B CDE - * - * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "gk20a/gk20a.h" -#include "cde_gp10b.h" - -#include -#include - -enum gp10b_programs { - GP10B_PROG_HPASS = 0, - GP10B_PROG_HPASS_4K = 1, - GP10B_PROG_VPASS = 2, - GP10B_PROG_VPASS_4K = 3, - GP10B_PROG_HPASS_DEBUG = 4, - GP10B_PROG_HPASS_4K_DEBUG = 5, - GP10B_PROG_VPASS_DEBUG = 6, - GP10B_PROG_VPASS_4K_DEBUG = 7, - GP10B_PROG_PASSTHROUGH = 8, -}; - -void gp10b_cde_get_program_numbers(struct gk20a *g, - u32 block_height_log2, - u32 shader_parameter, - int *hprog_out, int *vprog_out) -{ - int hprog, vprog; - - if (shader_parameter == 1) { - hprog = GP10B_PROG_PASSTHROUGH; - vprog = GP10B_PROG_PASSTHROUGH; - } else { - hprog = GP10B_PROG_HPASS; - vprog = GP10B_PROG_VPASS; - if (shader_parameter == 2) { - hprog = GP10B_PROG_HPASS_DEBUG; - vprog = GP10B_PROG_VPASS_DEBUG; - } - if (!nvgpu_iommuable(g)) { - if (!g->mm.disable_bigpage) { - nvgpu_warn(g, - "When no IOMMU big pages cannot be used"); - } - hprog |= 1; - vprog |= 1; - } - } - - *hprog_out = hprog; - *vprog_out = vprog; -} - -bool gp10b_need_scatter_buffer(struct gk20a *g) -{ - return !nvgpu_iommuable(g); -} - -static u8 parity(u32 a) -{ - a ^= a>>16u; - a ^= a>>8u; - a ^= a>>4u; - a &= 0xfu; - return (0x6996u >> a) & 1u; -} - -int gp10b_populate_scatter_buffer(struct gk20a *g, - struct sg_table *sgt, - size_t surface_size, - void *scatter_buffer_ptr, - size_t scatter_buffer_size) -{ - /* map scatter buffer to CPU VA and fill it */ - const u32 page_size_log2 = 12; - const u32 page_size = 1 << page_size_log2; - const u32 page_size_shift = page_size_log2 - 7u; - - /* 0011 1111 1111 1111 1111 1110 0100 1000 */ - const u32 getSliceMaskGP10B = 0x3ffffe48; - u8 *scatter_buffer = scatter_buffer_ptr; - - size_t i; - struct scatterlist *sg = NULL; - u8 d = 0; - size_t page = 0; - size_t pages_left; - - surface_size = round_up(surface_size, page_size); - - pages_left = surface_size >> page_size_log2; - if ((pages_left >> 3) > scatter_buffer_size) - return -ENOMEM; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - unsigned int j; - u64 surf_pa = sg_phys(sg); - unsigned int n = (int)(sg->length >> page_size_log2); - - nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); - - for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { - u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; - u8 scatter_bit = parity(addr); - u8 bit = page & 7; - - d |= scatter_bit << bit; - if (bit == 7) { - scatter_buffer[page >> 3] = d; - d = 0; - } - - ++page; - --pages_left; - } - - if (pages_left == 0) - break; - } - - /* write the last byte in case the number of pages is not divisible by 8 */ - if ((page & 7) != 0) - scatter_buffer[page >> 3] = d; - - if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) { - nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:"); - for (i = 0; i < page >> 3; i++) { - nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]); - } - } - - return 0; -} - -struct nvgpu_os_linux_ops gp10b_cde_ops = { - .cde = { - .get_program_numbers = gp10b_cde_get_program_numbers, - .need_scatter_buffer = gp10b_need_scatter_buffer, - .populate_scatter_buffer = gp10b_populate_scatter_buffer, - }, -}; diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h b/drivers/gpu/nvgpu/common/linux/cde_gp10b.h deleted file mode 100644 index 52e9f292..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde_gp10b.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * GP10B CDE - * - * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _NVHOST_GP10B_CDE -#define _NVHOST_GP10B_CDE - -#include "os_linux.h" - -extern struct nvgpu_os_linux_ops gp10b_cde_ops; - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c deleted file mode 100644 index 165f33db..00000000 --- a/drivers/gpu/nvgpu/common/linux/ce2.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include - -#include "gk20a/ce2_gk20a.h" -#include "gk20a/gk20a.h" -#include "channel.h" - -static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) -{ - /* there is no local memory available, - don't allow local memory related CE flags */ - if (!g->mm.vidmem.size) { - launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | - NVGPU_CE_DST_LOCATION_LOCAL_FB); - } - return launch_flags; -} - -int gk20a_ce_execute_ops(struct gk20a *g, - u32 ce_ctx_id, - u64 src_buf, - u64 dst_buf, - u64 size, - unsigned int payload, - int launch_flags, - int request_operation, - u32 submit_flags, - struct gk20a_fence **gk20a_fence_out) -{ - int ret = -EPERM; - struct gk20a_ce_app *ce_app = &g->ce_app; - struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; - bool found = false; - u32 *cmd_buf_cpu_va; - u64 cmd_buf_gpu_va = 0; - u32 methodSize; - u32 cmd_buf_read_offset; - u32 dma_copy_class; - struct nvgpu_gpfifo_entry gpfifo; - struct nvgpu_channel_fence fence = {0, 0}; - struct gk20a_fence *ce_cmd_buf_fence_out = NULL; - - if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) - goto end; - - nvgpu_mutex_acquire(&ce_app->app_mutex); - - nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, - &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { - if (ce_ctx->ctx_id == ce_ctx_id) { - found = true; - break; - } - } - - nvgpu_mutex_release(&ce_app->app_mutex); - - if (!found) { - ret = -EINVAL; - goto end; - } - - if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { - ret = -ENODEV; - goto end; - } - - nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); - - ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; - - cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * - (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); - - cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; - - if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { - struct gk20a_fence **prev_post_fence = - &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; - - ret = gk20a_fence_wait(g, *prev_post_fence, - gk20a_get_gr_idle_timeout(g)); - - gk20a_fence_put(*prev_post_fence); - *prev_post_fence = NULL; - if (ret) - goto noop; - } - - cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); - - dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); - methodSize = gk20a_ce_prepare_submit(src_buf, - dst_buf, - size, - &cmd_buf_cpu_va[cmd_buf_read_offset], - NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, - payload, - gk20a_get_valid_launch_flags(g, launch_flags), - request_operation, - dma_copy_class); - - if (methodSize) { - /* store the element into gpfifo */ - gpfifo.entry0 = - u64_lo32(cmd_buf_gpu_va); - gpfifo.entry1 = - (u64_hi32(cmd_buf_gpu_va) | - pbdma_gp_entry1_length_f(methodSize)); - - /* take always the postfence as it is needed for protecting the ce context */ - submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; - - nvgpu_smp_wmb(); - - ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, - 1, submit_flags, &fence, - &ce_cmd_buf_fence_out, NULL); - - if (!ret) { - ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = - ce_cmd_buf_fence_out; - if (gk20a_fence_out) { - gk20a_fence_get(ce_cmd_buf_fence_out); - *gk20a_fence_out = ce_cmd_buf_fence_out; - } - - /* Next available command buffer queue Index */ - ++ce_ctx->cmd_buf_read_queue_offset; - } - } else { - ret = -ENOMEM; - } -noop: - nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); -end: - return ret; -} diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c deleted file mode 100644 index 7810bc21..00000000 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ /dev/null @@ -1,1021 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include - -/* - * This is required for nvgpu_vm_find_buf() which is used in the tracing - * code. Once we can get and access userspace buffers without requiring - * direct dma_buf usage this can be removed. - */ -#include - -#include "gk20a/gk20a.h" - -#include "channel.h" -#include "ioctl_channel.h" -#include "os_linux.h" - -#include - -#include -#include -#include -#include - -#include "sync_sema_android.h" - -u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) -{ - u32 flags = 0; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) - flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) - flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) - flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) - flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) - flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; - - if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) - flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; - - return flags; -} - -/* - * API to convert error_notifiers in common code and of the form - * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user - * space and of the form NVGPU_CHANNEL_* - */ -static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) -{ - switch (error_notifier) { - case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: - return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; - case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: - return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; - case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: - return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; - case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: - return NVGPU_CHANNEL_GR_EXCEPTION; - case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: - return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; - case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: - return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; - case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: - return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; - case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: - return NVGPU_CHANNEL_PBDMA_ERROR; - case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: - return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; - case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: - return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; - case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: - return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; - } - - pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); - - return error_notifier; -} - -/** - * nvgpu_set_error_notifier_locked() - * Should be called with ch->error_notifier_mutex held - * - * error should be of the form NVGPU_ERR_NOTIFIER_* - */ -void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - error = nvgpu_error_notifier_to_channel_notifier(error); - - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - struct timespec time_data; - u64 nsec; - - getnstimeofday(&time_data); - nsec = ((u64)time_data.tv_sec) * 1000000000u + - (u64)time_data.tv_nsec; - notification->time_stamp.nanoseconds[0] = - (u32)nsec; - notification->time_stamp.nanoseconds[1] = - (u32)(nsec >> 32); - notification->info32 = error; - notification->status = 0xffff; - - nvgpu_err(ch->g, - "error notifier set to %d for ch %d", error, ch->chid); - } -} - -/* error should be of the form NVGPU_ERR_NOTIFIER_* */ -void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - nvgpu_set_error_notifier_locked(ch, error); - nvgpu_mutex_release(&priv->error_notifier.mutex); -} - -void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - - /* Don't overwrite error flag if it is already set */ - if (notification->status != 0xffff) - nvgpu_set_error_notifier_locked(ch, error); - } - nvgpu_mutex_release(&priv->error_notifier.mutex); -} - -/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ -bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - bool notifier_set = false; - - error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - if (priv->error_notifier.dmabuf) { - struct nvgpu_notification *notification = - priv->error_notifier.notification; - u32 err = notification->info32; - - if (err == error_notifier) - notifier_set = true; - } - nvgpu_mutex_release(&priv->error_notifier.mutex); - - return notifier_set; -} - -static void gk20a_channel_update_runcb_fn(struct work_struct *work) -{ - struct nvgpu_channel_completion_cb *completion_cb = - container_of(work, struct nvgpu_channel_completion_cb, work); - struct nvgpu_channel_linux *priv = - container_of(completion_cb, - struct nvgpu_channel_linux, completion_cb); - struct channel_gk20a *ch = priv->ch; - void (*fn)(struct channel_gk20a *, void *); - void *user_data; - - nvgpu_spinlock_acquire(&completion_cb->lock); - fn = completion_cb->fn; - user_data = completion_cb->user_data; - nvgpu_spinlock_release(&completion_cb->lock); - - if (fn) - fn(ch, user_data); -} - -static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - priv->completion_cb.fn = NULL; - priv->completion_cb.user_data = NULL; - nvgpu_spinlock_init(&priv->completion_cb.lock); - INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); -} - -static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_spinlock_acquire(&priv->completion_cb.lock); - priv->completion_cb.fn = NULL; - priv->completion_cb.user_data = NULL; - nvgpu_spinlock_release(&priv->completion_cb.lock); - cancel_work_sync(&priv->completion_cb.work); -} - -static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - if (priv->completion_cb.fn) - schedule_work(&priv->completion_cb.work); -} - -static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - if (priv->completion_cb.fn) - cancel_work_sync(&priv->completion_cb.work); -} - -struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, - void (*update_fn)(struct channel_gk20a *, void *), - void *update_fn_data, - int runlist_id, - bool is_privileged_channel) -{ - struct channel_gk20a *ch; - struct nvgpu_channel_linux *priv; - - ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, - nvgpu_current_pid(g), nvgpu_current_tid(g)); - - if (ch) { - priv = ch->os_priv; - nvgpu_spinlock_acquire(&priv->completion_cb.lock); - priv->completion_cb.fn = update_fn; - priv->completion_cb.user_data = update_fn_data; - nvgpu_spinlock_release(&priv->completion_cb.lock); - } - - return ch; -} - -static void nvgpu_channel_open_linux(struct channel_gk20a *ch) -{ -} - -static void nvgpu_channel_close_linux(struct channel_gk20a *ch) -{ - nvgpu_channel_work_completion_clear(ch); - -#if defined(CONFIG_GK20A_CYCLE_STATS) - gk20a_channel_free_cycle_stats_buffer(ch); - gk20a_channel_free_cycle_stats_snapshot(ch); -#endif -} - -static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv; - int err; - - priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) - return -ENOMEM; - - ch->os_priv = priv; - priv->ch = ch; - -#ifdef CONFIG_SYNC - ch->has_os_fence_framework_support = true; -#endif - - err = nvgpu_mutex_init(&priv->error_notifier.mutex); - if (err) { - nvgpu_kfree(g, priv); - return err; - } - - nvgpu_channel_work_completion_init(ch); - - return 0; -} - -static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_destroy(&priv->error_notifier.mutex); - nvgpu_kfree(g, priv); - - ch->os_priv = NULL; - -#ifdef CONFIG_SYNC - ch->has_os_fence_framework_support = false; -#endif -} - -static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, - const char *fmt, ...) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - char name[30]; - va_list args; - - fence_framework = &priv->fence_framework; - - va_start(args, fmt); - vsnprintf(name, sizeof(name), fmt, args); - va_end(args); - - fence_framework->timeline = gk20a_sync_timeline_create(name); - - if (!fence_framework->timeline) - return -EINVAL; - - return 0; -} -static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - gk20a_sync_timeline_signal(fence_framework->timeline); -} - -static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - gk20a_sync_timeline_destroy(fence_framework->timeline); - fence_framework->timeline = NULL; -} - -static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - struct nvgpu_os_fence_framework *fence_framework; - - fence_framework = &priv->fence_framework; - - return (fence_framework->timeline != NULL); -} - -int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - struct fifo_gk20a *f = &g->fifo; - int chid; - int err; - - for (chid = 0; chid < (int)f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - err = nvgpu_channel_alloc_linux(g, ch); - if (err) - goto err_clean; - } - - g->os_channel.open = nvgpu_channel_open_linux; - g->os_channel.close = nvgpu_channel_close_linux; - g->os_channel.work_completion_signal = - nvgpu_channel_work_completion_signal; - g->os_channel.work_completion_cancel_sync = - nvgpu_channel_work_completion_cancel_sync; - - g->os_channel.os_fence_framework_inst_exists = - nvgpu_channel_fence_framework_exists; - g->os_channel.init_os_fence_framework = - nvgpu_channel_init_os_fence_framework; - g->os_channel.signal_os_fence_framework = - nvgpu_channel_signal_os_fence_framework; - g->os_channel.destroy_os_fence_framework = - nvgpu_channel_destroy_os_fence_framework; - - return 0; - -err_clean: - for (; chid >= 0; chid--) { - struct channel_gk20a *ch = &f->channel[chid]; - - nvgpu_channel_free_linux(g, ch); - } - return err; -} - -void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - struct fifo_gk20a *f = &g->fifo; - unsigned int chid; - - for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - - nvgpu_channel_free_linux(g, ch); - } - - g->os_channel.os_fence_framework_inst_exists = NULL; - g->os_channel.init_os_fence_framework = NULL; - g->os_channel.signal_os_fence_framework = NULL; - g->os_channel.destroy_os_fence_framework = NULL; -} - -u32 nvgpu_get_gpfifo_entry_size(void) -{ - return sizeof(struct nvgpu_gpfifo_entry); -} - -#ifdef CONFIG_DEBUG_FS -static void trace_write_pushbuffer(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *g) -{ - void *mem = NULL; - unsigned int words; - u64 offset; - struct dma_buf *dmabuf = NULL; - - if (gk20a_debug_trace_cmdbuf) { - u64 gpu_va = (u64)g->entry0 | - (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); - int err; - - words = pbdma_gp_entry1_length_v(g->entry1); - err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); - if (!err) - mem = dma_buf_vmap(dmabuf); - } - - if (mem) { - u32 i; - /* - * Write in batches of 128 as there seems to be a limit - * of how much you can output to ftrace at once. - */ - for (i = 0; i < words; i += 128U) { - trace_gk20a_push_cmdbuf( - c->g->name, - 0, - min(words - i, 128U), - offset + i * sizeof(u32), - mem); - } - dma_buf_vunmap(dmabuf, mem); - } -} -#endif - -static void trace_write_pushbuffer_range(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *g, - struct nvgpu_gpfifo_entry __user *user_gpfifo, - int offset, - int count) -{ -#ifdef CONFIG_DEBUG_FS - u32 size; - int i; - struct nvgpu_gpfifo_entry *gp; - bool gpfifo_allocated = false; - - if (!gk20a_debug_trace_cmdbuf) - return; - - if (!g && !user_gpfifo) - return; - - if (!g) { - size = count * sizeof(struct nvgpu_gpfifo_entry); - if (size) { - g = nvgpu_big_malloc(c->g, size); - if (!g) - return; - - if (copy_from_user(g, user_gpfifo, size)) { - nvgpu_big_free(c->g, g); - return; - } - } - gpfifo_allocated = true; - } - - gp = g + offset; - for (i = 0; i < count; i++, gp++) - trace_write_pushbuffer(c, gp); - - if (gpfifo_allocated) - nvgpu_big_free(c->g, g); -#endif -} - -/* - * Handle the submit synchronization - pre-fences and post-fences. - */ -static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, - struct nvgpu_channel_fence *fence, - struct channel_gk20a_job *job, - struct priv_cmd_entry **wait_cmd, - struct priv_cmd_entry **incr_cmd, - struct gk20a_fence **post_fence, - bool register_irq, - u32 flags) -{ - struct gk20a *g = c->g; - bool need_sync_fence = false; - bool new_sync_created = false; - int wait_fence_fd = -1; - int err = 0; - bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); - bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); - - if (g->aggressive_sync_destroy_thresh) { - nvgpu_mutex_acquire(&c->sync_lock); - if (!c->sync) { - c->sync = gk20a_channel_sync_create(c, false); - if (!c->sync) { - err = -ENOMEM; - nvgpu_mutex_release(&c->sync_lock); - goto fail; - } - new_sync_created = true; - } - nvgpu_atomic_inc(&c->sync->refcount); - nvgpu_mutex_release(&c->sync_lock); - } - - if (g->ops.fifo.resetup_ramfc && new_sync_created) { - err = g->ops.fifo.resetup_ramfc(c); - if (err) - goto fail; - } - - /* - * Optionally insert syncpt/semaphore wait in the beginning of gpfifo - * submission when user requested and the wait hasn't expired. - */ - if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { - int max_wait_cmds = c->deterministic ? 1 : 0; - - if (!pre_alloc_enabled) - job->wait_cmd = nvgpu_kzalloc(g, - sizeof(struct priv_cmd_entry)); - - if (!job->wait_cmd) { - err = -ENOMEM; - goto fail; - } - - if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { - wait_fence_fd = fence->id; - err = c->sync->wait_fd(c->sync, wait_fence_fd, - job->wait_cmd, max_wait_cmds); - } else { - err = c->sync->wait_syncpt(c->sync, fence->id, - fence->value, - job->wait_cmd); - } - - if (err) - goto clean_up_wait_cmd; - - if (job->wait_cmd->valid) - *wait_cmd = job->wait_cmd; - } - - if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && - (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) - need_sync_fence = true; - - /* - * Always generate an increment at the end of a GPFIFO submission. This - * is used to keep track of method completion for idle railgating. The - * sync_pt/semaphore PB is added to the GPFIFO later on in submit. - */ - job->post_fence = gk20a_alloc_fence(c); - if (!job->post_fence) { - err = -ENOMEM; - goto clean_up_wait_cmd; - } - if (!pre_alloc_enabled) - job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); - - if (!job->incr_cmd) { - err = -ENOMEM; - goto clean_up_post_fence; - } - - if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) - err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, - job->post_fence, need_wfi, need_sync_fence, - register_irq); - else - err = c->sync->incr(c->sync, job->incr_cmd, - job->post_fence, need_sync_fence, - register_irq); - if (!err) { - *incr_cmd = job->incr_cmd; - *post_fence = job->post_fence; - } else - goto clean_up_incr_cmd; - - return 0; - -clean_up_incr_cmd: - free_priv_cmdbuf(c, job->incr_cmd); - if (!pre_alloc_enabled) - job->incr_cmd = NULL; -clean_up_post_fence: - gk20a_fence_put(job->post_fence); - job->post_fence = NULL; -clean_up_wait_cmd: - free_priv_cmdbuf(c, job->wait_cmd); - if (!pre_alloc_enabled) - job->wait_cmd = NULL; -fail: - *wait_cmd = NULL; - return err; -} - -static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, - struct priv_cmd_entry *cmd) -{ - struct gk20a *g = c->g; - struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; - struct nvgpu_gpfifo_entry x = { - .entry0 = u64_lo32(cmd->gva), - .entry1 = u64_hi32(cmd->gva) | - pbdma_gp_entry1_length_f(cmd->size) - }; - - nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), - &x, sizeof(x)); - - if (cmd->mem->aperture == APERTURE_SYSMEM) - trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, - cmd->mem->cpu_va + cmd->off * sizeof(u32)); - - c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); -} - -/* - * Copy source gpfifo entries into the gpfifo ring buffer, potentially - * splitting into two memcpys to handle wrap-around. - */ -static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *kern_gpfifo, - struct nvgpu_gpfifo_entry __user *user_gpfifo, - u32 num_entries) -{ - /* byte offsets */ - u32 gpfifo_size = - c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); - u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); - u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); - u32 end = start + len; /* exclusive */ - struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; - struct nvgpu_gpfifo_entry *cpu_src; - int err; - - if (user_gpfifo && !c->gpfifo.pipe) { - /* - * This path (from userspace to sysmem) is special in order to - * avoid two copies unnecessarily (from user to pipe, then from - * pipe to gpu sysmem buffer). - */ - if (end > gpfifo_size) { - /* wrap-around */ - int length0 = gpfifo_size - start; - int length1 = len - length0; - void __user *user2 = (u8 __user *)user_gpfifo + length0; - - err = copy_from_user(gpfifo_mem->cpu_va + start, - user_gpfifo, length0); - if (err) - return err; - - err = copy_from_user(gpfifo_mem->cpu_va, - user2, length1); - if (err) - return err; - } else { - err = copy_from_user(gpfifo_mem->cpu_va + start, - user_gpfifo, len); - if (err) - return err; - } - - trace_write_pushbuffer_range(c, NULL, user_gpfifo, - 0, num_entries); - goto out; - } else if (user_gpfifo) { - /* from userspace to vidmem, use the common copy path below */ - err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len); - if (err) - return err; - - cpu_src = c->gpfifo.pipe; - } else { - /* from kernel to either sysmem or vidmem, don't need - * copy_from_user so use the common path below */ - cpu_src = kern_gpfifo; - } - - if (end > gpfifo_size) { - /* wrap-around */ - int length0 = gpfifo_size - start; - int length1 = len - length0; - void *src2 = (u8 *)cpu_src + length0; - - nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0); - nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1); - } else { - nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len); - - } - - trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); - -out: - c->gpfifo.put = (c->gpfifo.put + num_entries) & - (c->gpfifo.entry_num - 1); - - return 0; -} - -int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *gpfifo, - struct nvgpu_submit_gpfifo_args *args, - u32 num_entries, - u32 flags, - struct nvgpu_channel_fence *fence, - struct gk20a_fence **fence_out, - struct fifo_profile_gk20a *profile) -{ - struct gk20a *g = c->g; - struct priv_cmd_entry *wait_cmd = NULL; - struct priv_cmd_entry *incr_cmd = NULL; - struct gk20a_fence *post_fence = NULL; - struct channel_gk20a_job *job = NULL; - /* we might need two extra gpfifo entries - one for pre fence - * and one for post fence. */ - const int extra_entries = 2; - bool skip_buffer_refcounting = (flags & - NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); - int err = 0; - bool need_job_tracking; - bool need_deferred_cleanup = false; - struct nvgpu_gpfifo_entry __user *user_gpfifo = args ? - (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL; - - if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) - return -ENODEV; - - if (c->has_timedout) - return -ETIMEDOUT; - - if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) - return -ENOMEM; - - /* fifo not large enough for request. Return error immediately. - * Kernel can insert gpfifo entries before and after user gpfifos. - * So, add extra_entries in user request. Also, HW with fifo size N - * can accept only N-1 entreis and so the below condition */ - if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { - nvgpu_err(g, "not enough gpfifo space allocated"); - return -ENOMEM; - } - - if (!gpfifo && !args) - return -EINVAL; - - if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | - NVGPU_SUBMIT_FLAGS_FENCE_GET)) && - !fence) - return -EINVAL; - - /* an address space needs to have been bound at this point. */ - if (!gk20a_channel_as_bound(c)) { - nvgpu_err(g, - "not bound to an address space at time of gpfifo" - " submission."); - return -EINVAL; - } - - gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); - - /* update debug settings */ - nvgpu_ltc_sync_enabled(g); - - nvgpu_log_info(g, "channel %d", c->chid); - - /* - * Job tracking is necessary for any of the following conditions: - * - pre- or post-fence functionality - * - channel wdt - * - GPU rail-gating with non-deterministic channels - * - buffer refcounting - * - * If none of the conditions are met, then job tracking is not - * required and a fast submit can be done (ie. only need to write - * out userspace GPFIFO entries and update GP_PUT). - */ - need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || - (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || - c->timeout.enabled || - (g->can_railgate && !c->deterministic) || - !skip_buffer_refcounting; - - if (need_job_tracking) { - bool need_sync_framework = false; - - /* - * If the channel is to have deterministic latency and - * job tracking is required, the channel must have - * pre-allocated resources. Otherwise, we fail the submit here - */ - if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) - return -EINVAL; - - need_sync_framework = - gk20a_channel_sync_needs_sync_framework(g) || - (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && - flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); - - /* - * Deferred clean-up is necessary for any of the following - * conditions: - * - channel's deterministic flag is not set - * - dependency on sync framework, which could make the - * behavior of the clean-up operation non-deterministic - * (should not be performed in the submit path) - * - channel wdt - * - GPU rail-gating with non-deterministic channels - * - buffer refcounting - * - * If none of the conditions are met, then deferred clean-up - * is not required, and we clean-up one job-tracking - * resource in the submit path. - */ - need_deferred_cleanup = !c->deterministic || - need_sync_framework || - c->timeout.enabled || - (g->can_railgate && - !c->deterministic) || - !skip_buffer_refcounting; - - /* - * For deterministic channels, we don't allow deferred clean_up - * processing to occur. In cases we hit this, we fail the submit - */ - if (c->deterministic && need_deferred_cleanup) - return -EINVAL; - - if (!c->deterministic) { - /* - * Get a power ref unless this is a deterministic - * channel that holds them during the channel lifetime. - * This one is released by gk20a_channel_clean_up_jobs, - * via syncpt or sema interrupt, whichever is used. - */ - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, - "failed to host gk20a to submit gpfifo, process %s", - current->comm); - return err; - } - } - - if (!need_deferred_cleanup) { - /* clean up a single job */ - gk20a_channel_clean_up_jobs(c, false); - } - } - - - /* Grab access to HW to deal with do_idle */ - if (c->deterministic) - nvgpu_rwsem_down_read(&g->deterministic_busy); - - if (c->deterministic && c->deterministic_railgate_allowed) { - /* - * Nope - this channel has dropped its own power ref. As - * deterministic submits don't hold power on per each submitted - * job like normal ones do, the GPU might railgate any time now - * and thus submit is disallowed. - */ - err = -EINVAL; - goto clean_up; - } - - trace_gk20a_channel_submit_gpfifo(g->name, - c->chid, - num_entries, - flags, - fence ? fence->id : 0, - fence ? fence->value : 0); - - nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", - c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); - - /* - * Make sure we have enough space for gpfifo entries. Check cached - * values first and then read from HW. If no space, return EAGAIN - * and let userpace decide to re-try request or not. - */ - if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { - if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { - err = -EAGAIN; - goto clean_up; - } - } - - if (c->has_timedout) { - err = -ETIMEDOUT; - goto clean_up; - } - - if (need_job_tracking) { - err = channel_gk20a_alloc_job(c, &job); - if (err) - goto clean_up; - - err = gk20a_submit_prepare_syncs(c, fence, job, - &wait_cmd, &incr_cmd, - &post_fence, - need_deferred_cleanup, - flags); - if (err) - goto clean_up_job; - } - - gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); - - if (wait_cmd) - gk20a_submit_append_priv_cmdbuf(c, wait_cmd); - - if (gpfifo || user_gpfifo) - err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, - num_entries); - if (err) - goto clean_up_job; - - /* - * And here's where we add the incr_cmd we generated earlier. It should - * always run! - */ - if (incr_cmd) - gk20a_submit_append_priv_cmdbuf(c, incr_cmd); - - if (fence_out) - *fence_out = gk20a_fence_get(post_fence); - - if (need_job_tracking) - /* TODO! Check for errors... */ - gk20a_channel_add_job(c, job, skip_buffer_refcounting); - gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); - - g->ops.fifo.userd_gp_put(g, c); - - /* No hw access beyond this point */ - if (c->deterministic) - nvgpu_rwsem_up_read(&g->deterministic_busy); - - trace_gk20a_channel_submitted_gpfifo(g->name, - c->chid, - num_entries, - flags, - post_fence ? post_fence->syncpt_id : 0, - post_fence ? post_fence->syncpt_value : 0); - - nvgpu_log_info(g, "post-submit put %d, get %d, size %d", - c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); - - gk20a_fifo_profile_snapshot(profile, PROFILE_END); - - nvgpu_log_fn(g, "done"); - return err; - -clean_up_job: - channel_gk20a_free_job(c, job); -clean_up: - nvgpu_log_fn(g, "fail"); - gk20a_fence_put(post_fence); - if (c->deterministic) - nvgpu_rwsem_up_read(&g->deterministic_busy); - else if (need_deferred_cleanup) - gk20a_idle(g); - - return err; -} - diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h deleted file mode 100644 index 4a58b10c..00000000 --- a/drivers/gpu/nvgpu/common/linux/channel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __NVGPU_CHANNEL_H__ -#define __NVGPU_CHANNEL_H__ - -#include -#include - -#include - -struct channel_gk20a; -struct nvgpu_gpfifo; -struct nvgpu_submit_gpfifo_args; -struct nvgpu_channel_fence; -struct gk20a_fence; -struct fifo_profile_gk20a; -struct nvgpu_os_linux; - -struct sync_fence; -struct sync_timeline; - -struct nvgpu_channel_completion_cb { - /* - * Signal channel owner via a callback, if set, in job cleanup with - * schedule_work. Means that something finished on the channel (perhaps - * more than one job). - */ - void (*fn)(struct channel_gk20a *, void *); - void *user_data; - /* Make access to the two above atomic */ - struct nvgpu_spinlock lock; - /* Per-channel async work task, cannot reschedule itself */ - struct work_struct work; -}; - -struct nvgpu_error_notifier { - struct dma_buf *dmabuf; - void *vaddr; - - struct nvgpu_notification *notification; - - struct nvgpu_mutex mutex; -}; - -/* - * This struct contains fence_related data. - * e.g. sync_timeline for sync_fences. - */ -struct nvgpu_os_fence_framework { - struct sync_timeline *timeline; -}; - -struct nvgpu_channel_linux { - struct channel_gk20a *ch; - - struct nvgpu_os_fence_framework fence_framework; - - struct nvgpu_channel_completion_cb completion_cb; - struct nvgpu_error_notifier error_notifier; - - struct dma_buf *cyclestate_buffer_handler; -}; - -u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); -int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); -void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l); - -struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, - void (*update_fn)(struct channel_gk20a *, void *), - void *update_fn_data, - int runlist_id, - bool is_privileged_channel); - -int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, - struct nvgpu_gpfifo_entry *gpfifo, - struct nvgpu_submit_gpfifo_args *args, - u32 num_entries, - u32 flags, - struct nvgpu_channel_fence *fence, - struct gk20a_fence **fence_out, - struct fifo_profile_gk20a *profile); - -#endif /* __NVGPU_CHANNEL_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/clk.c b/drivers/gpu/nvgpu/common/linux/clk.c deleted file mode 100644 index 414b17c4..00000000 --- a/drivers/gpu/nvgpu/common/linux/clk.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Linux clock support - * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include - -#include "clk.h" -#include "os_linux.h" -#include "platform_gk20a.h" - -#include "gk20a/gk20a.h" - -static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); - unsigned long ret; - - switch (api_domain) { - case CTRL_CLK_DOMAIN_GPCCLK: - if (g->clk.tegra_clk) - ret = g->clk.cached_rate ? - g->clk.cached_rate : - clk_get_rate(g->clk.tegra_clk); - else - ret = platform->cached_rate ? - platform->cached_rate : - clk_get_rate(platform->clk[0]); - break; - case CTRL_CLK_DOMAIN_PWRCLK: - ret = clk_get_rate(platform->clk[1]); - break; - default: - nvgpu_err(g, "unknown clock: %u", api_domain); - ret = 0; - break; - } - - return ret; -} - -static int nvgpu_linux_clk_set_rate(struct gk20a *g, - u32 api_domain, unsigned long rate) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); - int ret; - - switch (api_domain) { - case CTRL_CLK_DOMAIN_GPCCLK: - if (g->clk.tegra_clk) { - ret = clk_set_rate(g->clk.tegra_clk, rate); - if (!ret) - g->clk.cached_rate = rate; - } else { - ret = clk_set_rate(platform->clk[0], rate); - if (!ret) - platform->cached_rate = rate; - } - break; - case CTRL_CLK_DOMAIN_PWRCLK: - ret = clk_set_rate(platform->clk[1], rate); - break; - default: - nvgpu_err(g, "unknown clock: %u", api_domain); - ret = -EINVAL; - break; - } - - return ret; -} - -static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); - - /* - * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed - * to frequency governor is a shared user on the gbus. The latter can be - * accessed as GPU clock parent, and incorporate DVFS related data. - */ - if (g->clk.tegra_clk) - return tegra_dvfs_get_fmax_at_vmin_safe_t( - clk_get_parent(g->clk.tegra_clk)); - - if (platform->maxmin_clk_id) - return tegra_bpmp_dvfs_get_fmax_at_vmin( - platform->maxmin_clk_id); - - return 0; -} - -static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g) -{ - struct clk *c; - - c = clk_get_sys("gpu_ref", "gpu_ref"); - if (IS_ERR(c)) { - nvgpu_err(g, "failed to get GPCPLL reference clock"); - return 0; - } - - return clk_get_rate(c); -} - -static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk, - unsigned long rate) -{ - return tegra_dvfs_predict_mv_at_hz_cur_tfloor( - clk_get_parent(clk->tegra_clk), rate); -} - -static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) -{ - int ret; - - switch (api_domain) { - case CTRL_CLK_DOMAIN_GPCCLK: - ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk)); - break; - default: - nvgpu_err(g, "unknown clock: %u", api_domain); - ret = 0; - break; - } - - return ret; -} - -static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) -{ - return clk_prepare_enable(clk->tegra_clk); -} - -static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk) -{ - clk_disable_unprepare(clk->tegra_clk); -} - -void nvgpu_linux_init_clk_support(struct gk20a *g) -{ - g->ops.clk.get_rate = nvgpu_linux_clk_get_rate; - g->ops.clk.set_rate = nvgpu_linux_clk_set_rate; - g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe; - g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate; - g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor; - g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; - g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; - g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; -} diff --git a/drivers/gpu/nvgpu/common/linux/clk.h b/drivers/gpu/nvgpu/common/linux/clk.h deleted file mode 100644 index 614a7fd7..00000000 --- a/drivers/gpu/nvgpu/common/linux/clk.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef NVGPU_COMMON_LINUX_CLK_H - -struct gk20a; -void nvgpu_linux_init_clk_support(struct gk20a *g); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c deleted file mode 100644 index 353f6363..00000000 --- a/drivers/gpu/nvgpu/common/linux/comptags.c +++ /dev/null @@ -1,140 +0,0 @@ -/* -* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include - -#include - -#include "gk20a/gk20a.h" -#include "dmabuf.h" - -void gk20a_get_comptags(struct nvgpu_os_buffer *buf, - struct gk20a_comptags *comptags) -{ - struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, - buf->dev); - - if (!comptags) - return; - - if (!priv) { - memset(comptags, 0, sizeof(*comptags)); - return; - } - - nvgpu_mutex_acquire(&priv->lock); - *comptags = priv->comptags; - nvgpu_mutex_release(&priv->lock); -} - -int gk20a_alloc_or_get_comptags(struct gk20a *g, - struct nvgpu_os_buffer *buf, - struct gk20a_comptag_allocator *allocator, - struct gk20a_comptags *comptags) -{ - struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, - buf->dev); - u32 offset; - int err; - unsigned int ctag_granularity; - u32 lines; - - if (!priv) - return -ENOSYS; - - nvgpu_mutex_acquire(&priv->lock); - - if (priv->comptags.allocated) { - /* - * already allocated - */ - *comptags = priv->comptags; - - err = 0; - goto exit_locked; - } - - ctag_granularity = g->ops.fb.compression_page_size(g); - lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); - - /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ - if (lines < 1) { - err = -EINVAL; - goto exit_locked; - } - - /* store the allocator so we can use it when we free the ctags */ - priv->comptag_allocator = allocator; - err = gk20a_comptaglines_alloc(allocator, &offset, lines); - if (!err) { - priv->comptags.offset = offset; - priv->comptags.lines = lines; - priv->comptags.needs_clear = true; - } else { - priv->comptags.offset = 0; - priv->comptags.lines = 0; - priv->comptags.needs_clear = false; - } - - /* - * We don't report an error here if comptag alloc failed. The - * caller will simply fallback to incompressible kinds. It - * would not be safe to re-allocate comptags anyways on - * successive calls, as that would break map aliasing. - */ - err = 0; - priv->comptags.allocated = true; - - *comptags = priv->comptags; - -exit_locked: - nvgpu_mutex_release(&priv->lock); - - return err; -} - -bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) -{ - struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, - buf->dev); - bool clear_started = false; - - if (priv) { - nvgpu_mutex_acquire(&priv->lock); - - clear_started = priv->comptags.needs_clear; - - if (!clear_started) - nvgpu_mutex_release(&priv->lock); - } - - return clear_started; -} - -void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf, - bool clear_successful) -{ - struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, - buf->dev); - if (priv) { - if (clear_successful) - priv->comptags.needs_clear = false; - - nvgpu_mutex_release(&priv->lock); - } -} diff --git a/drivers/gpu/nvgpu/common/linux/cond.c b/drivers/gpu/nvgpu/common/linux/cond.c deleted file mode 100644 index 633c34fd..00000000 --- a/drivers/gpu/nvgpu/common/linux/cond.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include - -int nvgpu_cond_init(struct nvgpu_cond *cond) -{ - init_waitqueue_head(&cond->wq); - cond->initialized = true; - - return 0; -} - -void nvgpu_cond_destroy(struct nvgpu_cond *cond) -{ - cond->initialized = false; -} - -int nvgpu_cond_signal(struct nvgpu_cond *cond) -{ - if (!cond->initialized) - return -EINVAL; - - wake_up(&cond->wq); - - return 0; -} - -int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond) -{ - if (!cond->initialized) - return -EINVAL; - - wake_up_interruptible(&cond->wq); - - return 0; -} - -int nvgpu_cond_broadcast(struct nvgpu_cond *cond) -{ - if (!cond->initialized) - return -EINVAL; - - wake_up_all(&cond->wq); - - return 0; -} - -int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond) -{ - if (!cond->initialized) - return -EINVAL; - - wake_up_interruptible_all(&cond->wq); - - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c deleted file mode 100644 index a335988a..00000000 --- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/gr_gk20a.h" - -#include -#include -#include -#include - -#include "platform_gk20a.h" -#include "os_linux.h" -#include "ctxsw_trace.h" - -#include -#include - -#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) - -/* Userland-facing FIFO (one global + eventually one per VM) */ -struct gk20a_ctxsw_dev { - struct gk20a *g; - - struct nvgpu_ctxsw_ring_header *hdr; - struct nvgpu_ctxsw_trace_entry *ents; - struct nvgpu_ctxsw_trace_filter filter; - bool write_enabled; - struct nvgpu_cond readout_wq; - size_t size; - u32 num_ents; - - nvgpu_atomic_t vma_ref; - - struct nvgpu_mutex write_lock; -}; - - -struct gk20a_ctxsw_trace { - struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; -}; - -static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) -{ - return (hdr->write_idx == hdr->read_idx); -} - -static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) -{ - return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; -} - -static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) -{ - return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; -} - -ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, - loff_t *off) -{ - struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = dev->g; - struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; - struct nvgpu_ctxsw_trace_entry __user *entry = - (struct nvgpu_ctxsw_trace_entry *) buf; - size_t copied = 0; - int err; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, - "filp=%p buf=%p size=%zu", filp, buf, size); - - nvgpu_mutex_acquire(&dev->write_lock); - while (ring_is_empty(hdr)) { - nvgpu_mutex_release(&dev->write_lock); - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, - !ring_is_empty(hdr), 0); - if (err) - return err; - nvgpu_mutex_acquire(&dev->write_lock); - } - - while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { - if (ring_is_empty(hdr)) - break; - - if (copy_to_user(entry, &dev->ents[hdr->read_idx], - sizeof(*entry))) { - nvgpu_mutex_release(&dev->write_lock); - return -EFAULT; - } - - hdr->read_idx++; - if (hdr->read_idx >= hdr->num_ents) - hdr->read_idx = 0; - - entry++; - copied += sizeof(*entry); - size -= sizeof(*entry); - } - - nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, - hdr->read_idx); - - *off = hdr->read_idx; - nvgpu_mutex_release(&dev->write_lock); - - return copied; -} - -static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) -{ - struct gk20a *g = dev->g; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); - nvgpu_mutex_acquire(&dev->write_lock); - dev->write_enabled = true; - nvgpu_mutex_release(&dev->write_lock); - dev->g->ops.fecs_trace.enable(dev->g); - return 0; -} - -static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) -{ - struct gk20a *g = dev->g; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); - dev->g->ops.fecs_trace.disable(dev->g); - nvgpu_mutex_acquire(&dev->write_lock); - dev->write_enabled = false; - nvgpu_mutex_release(&dev->write_lock); - return 0; -} - -static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, - size_t size) -{ - struct gk20a *g = dev->g; - void *buf; - int err; - - if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) - return -EBUSY; - - err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); - if (err) - return err; - - - dev->hdr = buf; - dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); - dev->size = size; - dev->num_ents = dev->hdr->num_ents; - - nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", - dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); - return 0; -} - -int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, - void **buf, size_t *size) -{ - struct nvgpu_ctxsw_ring_header *hdr; - - *size = roundup(*size, PAGE_SIZE); - hdr = vmalloc_user(*size); - if (!hdr) - return -ENOMEM; - - hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; - hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; - hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) - / sizeof(struct nvgpu_ctxsw_trace_entry); - hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); - hdr->drop_count = 0; - hdr->read_idx = 0; - hdr->write_idx = 0; - hdr->write_seqno = 0; - - *buf = hdr; - return 0; -} - -int gk20a_ctxsw_dev_ring_free(struct gk20a *g) -{ - struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; - - nvgpu_vfree(g, dev->hdr); - return 0; -} - -static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, - struct nvgpu_ctxsw_ring_setup_args *args) -{ - struct gk20a *g = dev->g; - size_t size = args->size; - int ret; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); - - if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) - return -EINVAL; - - nvgpu_mutex_acquire(&dev->write_lock); - ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); - nvgpu_mutex_release(&dev->write_lock); - - return ret; -} - -static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, - struct nvgpu_ctxsw_trace_filter_args *args) -{ - struct gk20a *g = dev->g; - - nvgpu_mutex_acquire(&dev->write_lock); - dev->filter = args->filter; - nvgpu_mutex_release(&dev->write_lock); - - if (g->ops.fecs_trace.set_filter) - g->ops.fecs_trace.set_filter(g, &dev->filter); - return 0; -} - -static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, - struct nvgpu_ctxsw_trace_filter_args *args) -{ - nvgpu_mutex_acquire(&dev->write_lock); - args->filter = dev->filter; - nvgpu_mutex_release(&dev->write_lock); - - return 0; -} - -static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) -{ - struct gk20a *g = dev->g; - int err; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); - - err = gk20a_busy(g); - if (err) - return err; - - if (g->ops.fecs_trace.flush) - err = g->ops.fecs_trace.flush(g); - - if (likely(!err)) - err = g->ops.fecs_trace.poll(g); - - gk20a_idle(g); - return err; -} - -int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l; - struct gk20a *g; - struct gk20a_ctxsw_trace *trace; - struct gk20a_ctxsw_dev *dev; - int err; - size_t size; - u32 n; - - /* only one VM for now */ - const int vmid = 0; - - l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); - g = gk20a_get(&l->g); - if (!g) - return -ENODEV; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); - - if (!capable(CAP_SYS_ADMIN)) { - err = -EPERM; - goto free_ref; - } - - err = gk20a_busy(g); - if (err) - goto free_ref; - - trace = g->ctxsw_trace; - if (!trace) { - err = -ENODEV; - goto idle; - } - - /* Allow only one user for this device */ - dev = &trace->devs[vmid]; - nvgpu_mutex_acquire(&dev->write_lock); - if (dev->hdr) { - err = -EBUSY; - goto done; - } - - /* By default, allocate ring buffer big enough to accommodate - * FECS records with default event filter */ - - /* enable all traces by default */ - NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); - - /* compute max number of entries generated with this filter */ - n = g->ops.fecs_trace.max_entries(g, &dev->filter); - - size = sizeof(struct nvgpu_ctxsw_ring_header) + - n * sizeof(struct nvgpu_ctxsw_trace_entry); - nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", - size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); - - err = gk20a_ctxsw_dev_alloc_buffer(dev, size); - if (!err) { - filp->private_data = dev; - nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", - filp, dev, size); - } - -done: - nvgpu_mutex_release(&dev->write_lock); - -idle: - gk20a_idle(g); -free_ref: - if (err) - gk20a_put(g); - return err; -} - -int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) -{ - struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = dev->g; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); - - g->ops.fecs_trace.disable(g); - - nvgpu_mutex_acquire(&dev->write_lock); - dev->write_enabled = false; - nvgpu_mutex_release(&dev->write_lock); - - if (dev->hdr) { - dev->g->ops.fecs_trace.free_user_buffer(dev->g); - dev->hdr = NULL; - } - gk20a_put(g); - return 0; -} - -long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = dev->g; - u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - switch (cmd) { - case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: - err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); - break; - case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: - err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); - break; - case NVGPU_CTXSW_IOCTL_RING_SETUP: - err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, - (struct nvgpu_ctxsw_ring_setup_args *) buf); - break; - case NVGPU_CTXSW_IOCTL_SET_FILTER: - err = gk20a_ctxsw_dev_ioctl_set_filter(dev, - (struct nvgpu_ctxsw_trace_filter_args *) buf); - break; - case NVGPU_CTXSW_IOCTL_GET_FILTER: - err = gk20a_ctxsw_dev_ioctl_get_filter(dev, - (struct nvgpu_ctxsw_trace_filter_args *) buf); - break; - case NVGPU_CTXSW_IOCTL_POLL: - err = gk20a_ctxsw_dev_ioctl_poll(dev); - break; - default: - dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", - cmd); - err = -ENOTTY; - } - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); - - return err; -} - -unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) -{ - struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = dev->g; - struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; - unsigned int mask = 0; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); - - nvgpu_mutex_acquire(&dev->write_lock); - poll_wait(filp, &dev->readout_wq.wq, wait); - if (!ring_is_empty(hdr)) - mask |= POLLIN | POLLRDNORM; - nvgpu_mutex_release(&dev->write_lock); - - return mask; -} - -static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) -{ - struct gk20a_ctxsw_dev *dev = vma->vm_private_data; - struct gk20a *g = dev->g; - - nvgpu_atomic_inc(&dev->vma_ref); - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", - nvgpu_atomic_read(&dev->vma_ref)); -} - -static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) -{ - struct gk20a_ctxsw_dev *dev = vma->vm_private_data; - struct gk20a *g = dev->g; - - nvgpu_atomic_dec(&dev->vma_ref); - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", - nvgpu_atomic_read(&dev->vma_ref)); -} - -static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { - .open = gk20a_ctxsw_dev_vma_open, - .close = gk20a_ctxsw_dev_vma_close, -}; - -int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, - struct vm_area_struct *vma) -{ - return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); -} - -int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct gk20a_ctxsw_dev *dev = filp->private_data; - struct gk20a *g = dev->g; - int ret; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - - ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); - if (likely(!ret)) { - vma->vm_private_data = dev; - vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; - vma->vm_ops->open(vma); - } - - return ret; -} - -#ifdef CONFIG_GK20A_CTXSW_TRACE -static int gk20a_ctxsw_init_devs(struct gk20a *g) -{ - struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; - struct gk20a_ctxsw_dev *dev = trace->devs; - int err; - int i; - - for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { - dev->g = g; - dev->hdr = NULL; - dev->write_enabled = false; - nvgpu_cond_init(&dev->readout_wq); - err = nvgpu_mutex_init(&dev->write_lock); - if (err) - return err; - nvgpu_atomic_set(&dev->vma_ref, 0); - dev++; - } - return 0; -} -#endif - -int gk20a_ctxsw_trace_init(struct gk20a *g) -{ -#ifdef CONFIG_GK20A_CTXSW_TRACE - struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; - int err; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); - - /* if tracing is not supported, skip this */ - if (!g->ops.fecs_trace.init) - return 0; - - if (likely(trace)) - return 0; - - trace = nvgpu_kzalloc(g, sizeof(*trace)); - if (unlikely(!trace)) - return -ENOMEM; - g->ctxsw_trace = trace; - - err = gk20a_ctxsw_init_devs(g); - if (err) - goto fail; - - err = g->ops.fecs_trace.init(g); - if (unlikely(err)) - goto fail; - - return 0; - -fail: - memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); - nvgpu_kfree(g, trace); - g->ctxsw_trace = NULL; - return err; -#else - return 0; -#endif -} - -void gk20a_ctxsw_trace_cleanup(struct gk20a *g) -{ -#ifdef CONFIG_GK20A_CTXSW_TRACE - struct gk20a_ctxsw_trace *trace; - struct gk20a_ctxsw_dev *dev; - int i; - - if (!g->ctxsw_trace) - return; - - trace = g->ctxsw_trace; - dev = trace->devs; - - for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { - nvgpu_mutex_destroy(&dev->write_lock); - dev++; - } - - nvgpu_kfree(g, g->ctxsw_trace); - g->ctxsw_trace = NULL; - - g->ops.fecs_trace.deinit(g); -#endif -} - -int gk20a_ctxsw_trace_write(struct gk20a *g, - struct nvgpu_ctxsw_trace_entry *entry) -{ - struct nvgpu_ctxsw_ring_header *hdr; - struct gk20a_ctxsw_dev *dev; - int ret = 0; - const char *reason; - u32 write_idx; - - if (!g->ctxsw_trace) - return 0; - - if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) - return -ENODEV; - - dev = &g->ctxsw_trace->devs[entry->vmid]; - hdr = dev->hdr; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, - "dev=%p hdr=%p", dev, hdr); - - nvgpu_mutex_acquire(&dev->write_lock); - - if (unlikely(!hdr)) { - /* device has been released */ - ret = -ENODEV; - goto done; - } - - write_idx = hdr->write_idx; - if (write_idx >= dev->num_ents) { - nvgpu_err(dev->g, - "write_idx=%u out of range [0..%u]", - write_idx, dev->num_ents); - ret = -ENOSPC; - reason = "write_idx out of range"; - goto disable; - } - - entry->seqno = hdr->write_seqno++; - - if (!dev->write_enabled) { - ret = -EBUSY; - reason = "write disabled"; - goto drop; - } - - if (unlikely(ring_is_full(hdr))) { - ret = -ENOSPC; - reason = "user fifo full"; - goto drop; - } - - if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { - reason = "filtered out"; - goto filter; - } - - nvgpu_log(g, gpu_dbg_ctxsw, - "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", - entry->seqno, entry->context_id, entry->pid, - entry->tag, entry->timestamp); - - dev->ents[write_idx] = *entry; - - /* ensure record is written before updating write index */ - nvgpu_smp_wmb(); - - write_idx++; - if (unlikely(write_idx >= hdr->num_ents)) - write_idx = 0; - hdr->write_idx = write_idx; - nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", - hdr->read_idx, hdr->write_idx, ring_len(hdr)); - - nvgpu_mutex_release(&dev->write_lock); - return ret; - -disable: - g->ops.fecs_trace.disable(g); - -drop: - hdr->drop_count++; - -filter: - nvgpu_log(g, gpu_dbg_ctxsw, - "dropping seqno=%d context_id=%08x pid=%lld " - "tag=%x time=%llx (%s)", - entry->seqno, entry->context_id, entry->pid, - entry->tag, entry->timestamp, reason); - -done: - nvgpu_mutex_release(&dev->write_lock); - return ret; -} - -void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) -{ - struct gk20a_ctxsw_dev *dev; - - if (!g->ctxsw_trace) - return; - - dev = &g->ctxsw_trace->devs[vmid]; - nvgpu_cond_signal_interruptible(&dev->readout_wq); -} - -void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) -{ -#ifdef CONFIG_GK20A_CTXSW_TRACE - struct nvgpu_ctxsw_trace_entry entry = { - .vmid = 0, - .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, - .context_id = 0, - .pid = ch->tgid, - }; - - if (!g->ctxsw_trace) - return; - - g->ops.ptimer.read_ptimer(g, &entry.timestamp); - gk20a_ctxsw_trace_write(g, &entry); - gk20a_ctxsw_trace_wake_up(g, 0); -#endif - trace_gk20a_channel_reset(ch->chid, ch->tsgid); -} - -void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) -{ -#ifdef CONFIG_GK20A_CTXSW_TRACE - struct nvgpu_ctxsw_trace_entry entry = { - .vmid = 0, - .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, - .context_id = 0, - .pid = tsg->tgid, - }; - - if (!g->ctxsw_trace) - return; - - g->ops.ptimer.read_ptimer(g, &entry.timestamp); - gk20a_ctxsw_trace_write(g, &entry); - gk20a_ctxsw_trace_wake_up(g, 0); -#endif - trace_gk20a_channel_reset(~0, tsg->tsgid); -} diff --git a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h deleted file mode 100644 index 88ca7f25..00000000 --- a/drivers/gpu/nvgpu/common/linux/ctxsw_trace.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __CTXSW_TRACE_H__ -#define __CTXSW_TRACE_H__ - -#include - -#define GK20A_CTXSW_TRACE_NUM_DEVS 1 - -struct file; -struct inode; -struct poll_table_struct; - -struct gk20a; - -int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); -int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); -long gk20a_ctxsw_dev_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg); -ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, - size_t size, loff_t *offs); -unsigned int gk20a_ctxsw_dev_poll(struct file *filp, - struct poll_table_struct *pts); - -#endif /* __CTXSW_TRACE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c deleted file mode 100644 index 8738f3e7..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_cde.h" -#include "debug_ce.h" -#include "debug_fifo.h" -#include "debug_gr.h" -#include "debug_allocator.h" -#include "debug_kmem.h" -#include "debug_pmu.h" -#include "debug_sched.h" -#include "debug_hal.h" -#include "debug_xve.h" -#include "os_linux.h" -#include "platform_gk20a.h" - -#include "gk20a/gk20a.h" - -#include -#include -#include - -#include - -unsigned int gk20a_debug_trace_cmdbuf; - -static inline void gk20a_debug_write_printk(void *ctx, const char *str, - size_t len) -{ - pr_info("%s", str); -} - -static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, - size_t len) -{ - seq_write((struct seq_file *)ctx, str, len); -} - -void gk20a_debug_output(struct gk20a_debug_output *o, - const char *fmt, ...) -{ - va_list args; - int len; - - va_start(args, fmt); - len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); - va_end(args); - o->fn(o->ctx, o->buf, len); -} - -static int gk20a_gr_dump_regs(struct gk20a *g, - struct gk20a_debug_output *o) -{ - if (g->ops.gr.dump_gr_regs) - gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); - - return 0; -} - -int gk20a_gr_debug_dump(struct gk20a *g) -{ - struct gk20a_debug_output o = { - .fn = gk20a_debug_write_printk - }; - - gk20a_gr_dump_regs(g, &o); - - return 0; -} - -static int gk20a_gr_debug_show(struct seq_file *s, void *unused) -{ - struct device *dev = s->private; - struct gk20a *g = gk20a_get_platform(dev)->g; - struct gk20a_debug_output o = { - .fn = gk20a_debug_write_to_seqfile, - .ctx = s, - }; - int err; - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on gpu: %d", err); - return -EINVAL; - } - - gk20a_gr_dump_regs(g, &o); - - gk20a_idle(g); - - return 0; -} - -void gk20a_debug_dump(struct gk20a *g) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); - struct gk20a_debug_output o = { - .fn = gk20a_debug_write_printk - }; - - if (platform->dump_platform_dependencies) - platform->dump_platform_dependencies(dev_from_gk20a(g)); - - /* HAL only initialized after 1st power-on */ - if (g->ops.debug.show_dump) - g->ops.debug.show_dump(g, &o); -} - -static int gk20a_debug_show(struct seq_file *s, void *unused) -{ - struct device *dev = s->private; - struct gk20a_debug_output o = { - .fn = gk20a_debug_write_to_seqfile, - .ctx = s, - }; - struct gk20a *g; - int err; - - g = gk20a_get_platform(dev)->g; - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on gpu: %d", err); - return -EFAULT; - } - - /* HAL only initialized after 1st power-on */ - if (g->ops.debug.show_dump) - g->ops.debug.show_dump(g, &o); - - gk20a_idle(g); - return 0; -} - -static int gk20a_gr_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_gr_debug_show, inode->i_private); -} - -static int gk20a_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_debug_show, inode->i_private); -} - -static const struct file_operations gk20a_gr_debug_fops = { - .open = gk20a_gr_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations gk20a_debug_fops = { - .open = gk20a_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) -{ - g->ops.fifo.dump_pbdma_status(g, o); - g->ops.fifo.dump_eng_status(g, o); - - gk20a_debug_dump_all_channel_status_ramfc(g, o); -} - -static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - struct gk20a *g = file->private_data; - - if (g->mm.disable_bigpage) - buf[0] = 'Y'; - else - buf[0] = 'N'; - buf[1] = '\n'; - buf[2] = 0x00; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[32]; - int buf_size; - bool bv; - struct gk20a *g = file->private_data; - - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - if (strtobool(buf, &bv) == 0) { - g->mm.disable_bigpage = bv; - gk20a_init_gpu_characteristics(g); - } - - return count; -} - -static struct file_operations disable_bigpage_fops = { - .open = simple_open, - .read = disable_bigpage_read, - .write = disable_bigpage_write, -}; - -static int railgate_residency_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - unsigned long time_since_last_state_transition_ms; - unsigned long total_rail_gate_time_ms; - unsigned long total_rail_ungate_time_ms; - - if (platform->is_railgated(dev_from_gk20a(g))) { - time_since_last_state_transition_ms = - jiffies_to_msecs(jiffies - - g->pstats.last_rail_gate_complete); - total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; - total_rail_gate_time_ms = - g->pstats.total_rail_gate_time_ms + - time_since_last_state_transition_ms; - } else { - time_since_last_state_transition_ms = - jiffies_to_msecs(jiffies - - g->pstats.last_rail_ungate_complete); - total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; - total_rail_ungate_time_ms = - g->pstats.total_rail_ungate_time_ms + - time_since_last_state_transition_ms; - } - - seq_printf(s, "Time with Rails Gated: %lu ms\n" - "Time with Rails UnGated: %lu ms\n" - "Total railgating cycles: %lu\n", - total_rail_gate_time_ms, - total_rail_ungate_time_ms, - g->pstats.railgating_cycle_count - 1); - return 0; - -} - -static int railgate_residency_open(struct inode *inode, struct file *file) -{ - return single_open(file, railgate_residency_show, inode->i_private); -} - -static const struct file_operations railgate_residency_fops = { - .open = railgate_residency_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int gk20a_railgating_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *d; - - d = debugfs_create_file( - "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g, - &railgate_residency_fops); - if (!d) - return -ENOMEM; - - return 0; -} -static ssize_t timeouts_enabled_read(struct file *file, - char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - struct gk20a *g = file->private_data; - - if (nvgpu_is_timeouts_enabled(g)) - buf[0] = 'Y'; - else - buf[0] = 'N'; - buf[1] = '\n'; - buf[2] = 0x00; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t timeouts_enabled_write(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - int buf_size; - bool timeouts_enabled; - struct gk20a *g = file->private_data; - - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - if (strtobool(buf, &timeouts_enabled) == 0) { - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - if (timeouts_enabled == false) { - /* requesting to disable timeouts */ - if (g->timeouts_disabled_by_user == false) { - nvgpu_atomic_inc(&g->timeouts_disabled_refcount); - g->timeouts_disabled_by_user = true; - } - } else { - /* requesting to enable timeouts */ - if (g->timeouts_disabled_by_user == true) { - nvgpu_atomic_dec(&g->timeouts_disabled_refcount); - g->timeouts_disabled_by_user = false; - } - } - nvgpu_mutex_release(&g->dbg_sessions_lock); - } - - return count; -} - -static const struct file_operations timeouts_enabled_fops = { - .open = simple_open, - .read = timeouts_enabled_read, - .write = timeouts_enabled_write, -}; - -void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct device *dev = dev_from_gk20a(g); - - l->debugfs = debugfs_create_dir(dev_name(dev), NULL); - if (!l->debugfs) - return; - - if (debugfs_symlink) - l->debugfs_alias = - debugfs_create_symlink(debugfs_symlink, - NULL, dev_name(dev)); - - debugfs_create_file("status", S_IRUGO, l->debugfs, - dev, &gk20a_debug_fops); - debugfs_create_file("gr_status", S_IRUGO, l->debugfs, - dev, &gk20a_gr_debug_fops); - debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, - l->debugfs, &gk20a_debug_trace_cmdbuf); - - debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, - l->debugfs, &g->ch_wdt_timeout_ms); - - debugfs_create_u32("disable_syncpoints", S_IRUGO, - l->debugfs, &g->disable_syncpoints); - - /* New debug logging API. */ - debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR, - l->debugfs, &g->log_mask); - debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR, - l->debugfs, &g->log_trace); - - l->debugfs_ltc_enabled = - debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, - l->debugfs, - &g->mm.ltc_enabled_target); - - l->debugfs_gr_idle_timeout_default = - debugfs_create_u32("gr_idle_timeout_default_us", - S_IRUGO|S_IWUSR, l->debugfs, - &g->gr_idle_timeout_default); - l->debugfs_timeouts_enabled = - debugfs_create_file("timeouts_enabled", - S_IRUGO|S_IWUSR, - l->debugfs, - g, - &timeouts_enabled_fops); - - l->debugfs_disable_bigpage = - debugfs_create_file("disable_bigpage", - S_IRUGO|S_IWUSR, - l->debugfs, - g, - &disable_bigpage_fops); - - l->debugfs_timeslice_low_priority_us = - debugfs_create_u32("timeslice_low_priority_us", - S_IRUGO|S_IWUSR, - l->debugfs, - &g->timeslice_low_priority_us); - l->debugfs_timeslice_medium_priority_us = - debugfs_create_u32("timeslice_medium_priority_us", - S_IRUGO|S_IWUSR, - l->debugfs, - &g->timeslice_medium_priority_us); - l->debugfs_timeslice_high_priority_us = - debugfs_create_u32("timeslice_high_priority_us", - S_IRUGO|S_IWUSR, - l->debugfs, - &g->timeslice_high_priority_us); - l->debugfs_runlist_interleave = - debugfs_create_bool("runlist_interleave", - S_IRUGO|S_IWUSR, - l->debugfs, - &g->runlist_interleave); - l->debugfs_force_preemption_gfxp = - debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, - l->debugfs, - &g->gr.ctx_vars.force_preemption_gfxp); - - l->debugfs_force_preemption_cilp = - debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, - l->debugfs, - &g->gr.ctx_vars.force_preemption_cilp); - - l->debugfs_dump_ctxsw_stats = - debugfs_create_bool("dump_ctxsw_stats_on_channel_close", - S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); - - gr_gk20a_debugfs_init(g); - gk20a_pmu_debugfs_init(g); - gk20a_railgating_debugfs_init(g); -#ifdef CONFIG_NVGPU_SUPPORT_CDE - gk20a_cde_debugfs_init(g); -#endif - gk20a_ce_debugfs_init(g); - nvgpu_alloc_debugfs_init(g); - nvgpu_hal_debugfs_init(g); - gk20a_fifo_debugfs_init(g); - gk20a_sched_debugfs_init(g); -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - nvgpu_kmem_debugfs_init(g); -#endif - if (g->pci_vendor_id) - nvgpu_xve_debugfs_init(g); -} - -void gk20a_debug_deinit(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (!l->debugfs) - return; - - gk20a_fifo_debugfs_deinit(g); - - debugfs_remove_recursive(l->debugfs); - debugfs_remove(l->debugfs_alias); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c deleted file mode 100644 index d63a9030..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_allocator.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_allocator.h" -#include "os_linux.h" - -#include -#include - -#include - -static int __alloc_show(struct seq_file *s, void *unused) -{ - struct nvgpu_allocator *a = s->private; - - nvgpu_alloc_print_stats(a, s, 1); - - return 0; -} - -static int __alloc_open(struct inode *inode, struct file *file) -{ - return single_open(file, __alloc_show, inode->i_private); -} - -static const struct file_operations __alloc_fops = { - .open = __alloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (!l->debugfs_allocators) - return; - - a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, - l->debugfs_allocators, - a, &__alloc_fops); -} - -void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) -{ -} - -void nvgpu_alloc_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs); - if (IS_ERR_OR_NULL(l->debugfs_allocators)) { - l->debugfs_allocators = NULL; - return; - } -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h deleted file mode 100644 index 1b21cfc5..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_allocator.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_ALLOCATOR_H__ -#define __NVGPU_DEBUG_ALLOCATOR_H__ - -struct gk20a; -void nvgpu_alloc_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c deleted file mode 100644 index f0afa6ee..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_cde.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_cde.h" -#include "platform_gk20a.h" -#include "os_linux.h" - -#include - - -static ssize_t gk20a_cde_reload_write(struct file *file, - const char __user *userbuf, size_t count, loff_t *ppos) -{ - struct nvgpu_os_linux *l = file->private_data; - gk20a_cde_reload(l); - return count; -} - -static const struct file_operations gk20a_cde_reload_fops = { - .open = simple_open, - .write = gk20a_cde_reload_write, -}; - -void gk20a_cde_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - if (!platform->has_cde) - return; - - debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, - l->debugfs, &l->cde_app.shader_parameter); - debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, - l->debugfs, &l->cde_app.ctx_count); - debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, - l->debugfs, &l->cde_app.ctx_usecount); - debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, - l->debugfs, &l->cde_app.ctx_count_top); - debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, - l, &gk20a_cde_reload_fops); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h deleted file mode 100644 index 4895edd6..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_cde.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_CDE_H__ -#define __NVGPU_DEBUG_CDE_H__ - -struct gk20a; -void gk20a_cde_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_CDE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c deleted file mode 100644 index cea0bb47..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_ce.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_ce.h" -#include "os_linux.h" - -#include - -void gk20a_ce_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, - l->debugfs, &g->ce_app.ctx_count); - debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, - l->debugfs, &g->ce_app.app_state); - debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, - l->debugfs, &g->ce_app.next_ctx_id); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h deleted file mode 100644 index 2a8750c4..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_ce.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_CE_H__ -#define __NVGPU_DEBUG_CE_H__ - -struct gk20a; -void gk20a_ce_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_CE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_clk.c b/drivers/gpu/nvgpu/common/linux/debug_clk.c deleted file mode 100644 index 2484d44b..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_clk.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include - -#include "gm20b/clk_gm20b.h" -#include "os_linux.h" -#include "platform_gk20a.h" - -static int rate_get(void *data, u64 *val) -{ - struct gk20a *g = (struct gk20a *)data; - struct clk_gk20a *clk = &g->clk; - - *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq); - return 0; -} -static int rate_set(void *data, u64 val) -{ - struct gk20a *g = (struct gk20a *)data; - return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val); -} -DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); - -static int pll_reg_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct nvgpu_clk_pll_debug_data d; - u32 reg, m, n, pl, f; - int err = 0; - - if (g->ops.clk.get_pll_debug_data) { - err = g->ops.clk.get_pll_debug_data(g, &d); - if (err) - return err; - } else { - return -EINVAL; - } - - seq_printf(s, "bypassctrl = %s, ", - d.trim_sys_bypassctrl_val ? "bypass" : "vco"); - seq_printf(s, "sel_vco = %s, ", - d.trim_sys_sel_vco_val ? "vco" : "bypass"); - - seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val, - d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled", - d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked", - d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off"); - - reg = d.trim_sys_gpcpll_coeff_val; - m = d.trim_sys_gpcpll_coeff_mdiv; - n = d.trim_sys_gpcpll_coeff_ndiv; - pl = d.trim_sys_gpcpll_coeff_pldiv; - f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl)); - seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); - seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); - - seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n", - d.trim_sys_gpcpll_dvfs0_val, - d.trim_sys_gpcpll_dvfs0_dfs_coeff, - d.trim_sys_gpcpll_dvfs0_dfs_det_max, - d.trim_sys_gpcpll_dvfs0_dfs_dc_offset); - - return 0; -} - -static int pll_reg_open(struct inode *inode, struct file *file) -{ - return single_open(file, pll_reg_show, inode->i_private); -} - -static const struct file_operations pll_reg_fops = { - .open = pll_reg_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pll_reg_raw_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct nvgpu_clk_pll_debug_data d; - u32 reg; - int err = 0; - - if (g->ops.clk.get_pll_debug_data) { - err = g->ops.clk.get_pll_debug_data(g, &d); - if (err) - return err; - } else { - return -EINVAL; - } - - seq_puts(s, "GPCPLL REGISTERS:\n"); - for (reg = d.trim_sys_gpcpll_cfg_reg; - reg <= d.trim_sys_gpcpll_dvfs2_reg; - reg += sizeof(u32)) - seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); - - seq_puts(s, "\nGPC CLK OUT REGISTERS:\n"); - - seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg, - d.trim_sys_sel_vco_val); - seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg, - d.trim_sys_gpc2clk_out_val); - seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg, - d.trim_sys_bypassctrl_val); - - return 0; -} - -static int pll_reg_raw_open(struct inode *inode, struct file *file) -{ - return single_open(file, pll_reg_raw_show, inode->i_private); -} - -static ssize_t pll_reg_raw_write(struct file *file, - const char __user *userbuf, size_t count, loff_t *ppos) -{ - struct gk20a *g = file->f_path.dentry->d_inode->i_private; - char buf[80]; - u32 reg, val; - int err = 0; - - if (sizeof(buf) <= count) - return -EINVAL; - - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - - /* terminate buffer and trim - white spaces may be appended - * at the end when invoked from shell command line */ - buf[count] = '\0'; - strim(buf); - - if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2) - return -EINVAL; - - if (g->ops.clk.pll_reg_write(g, reg, val)) - err = g->ops.clk.pll_reg_write(g, reg, val); - else - err = -EINVAL; - - return err; -} - -static const struct file_operations pll_reg_raw_fops = { - .open = pll_reg_raw_open, - .read = seq_read, - .write = pll_reg_raw_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int monitor_get(void *data, u64 *val) -{ - struct gk20a *g = (struct gk20a *)data; - int err = 0; - - if (g->ops.clk.get_gpcclk_clock_counter) - err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val); - else - err = -EINVAL; - - return err; -} -DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); - -static int voltage_get(void *data, u64 *val) -{ - struct gk20a *g = (struct gk20a *)data; - int err = 0; - - if (g->ops.clk.get_voltage) - err = g->ops.clk.get_voltage(&g->clk, val); - else - err = -EINVAL; - - return err; -} -DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n"); - -static int pll_param_show(struct seq_file *s, void *data) -{ - struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); - - seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n", - gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope, - gpc_pll_params->vco_ctrl); - return 0; -} - -static int pll_param_open(struct inode *inode, struct file *file) -{ - return single_open(file, pll_param_show, inode->i_private); -} - -static const struct file_operations pll_param_fops = { - .open = pll_param_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int gm20b_clk_init_debugfs(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *d; - - if (!l->debugfs) - return -EINVAL; - - d = debugfs_create_file( - "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops); - if (!d) - goto err_out; - - d = debugfs_create_file("pll_reg_raw", - S_IRUGO, l->debugfs, g, &pll_reg_raw_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "monitor", S_IRUGO, l->debugfs, g, &monitor_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "voltage", S_IRUGO, l->debugfs, g, &voltage_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops); - if (!d) - goto err_out; - - d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs, - (u32 *)&g->clk.gpc_pll.mode); - if (!d) - goto err_out; - - d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO, - l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq); - if (!d) - goto err_out; - - return 0; - -err_out: - pr_err("%s: Failed to make debugfs node\n", __func__); - return -ENOMEM; -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c deleted file mode 100644 index 2b5674c0..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_fifo.h" -#include "os_linux.h" - -#include -#include - -#include -#include - -void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); - -static void *gk20a_fifo_sched_debugfs_seq_start( - struct seq_file *s, loff_t *pos) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - - if (*pos >= f->num_channels) - return NULL; - - return &f->channel[*pos]; -} - -static void *gk20a_fifo_sched_debugfs_seq_next( - struct seq_file *s, void *v, loff_t *pos) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - - ++(*pos); - if (*pos >= f->num_channels) - return NULL; - - return &f->channel[*pos]; -} - -static void gk20a_fifo_sched_debugfs_seq_stop( - struct seq_file *s, void *v) -{ -} - -static int gk20a_fifo_sched_debugfs_seq_show( - struct seq_file *s, void *v) -{ - struct gk20a *g = s->private; - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = v; - struct tsg_gk20a *tsg = NULL; - - struct fifo_engine_info_gk20a *engine_info; - struct fifo_runlist_info_gk20a *runlist; - u32 runlist_id; - int ret = SEQ_SKIP; - u32 engine_id; - - engine_id = gk20a_fifo_get_gr_engine_id(g); - engine_info = (f->engine_info + engine_id); - runlist_id = engine_info->runlist_id; - runlist = &f->runlist_info[runlist_id]; - - if (ch == f->channel) { - seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); - seq_puts(s, " (usecs) (msecs)\n"); - ret = 0; - } - - if (!test_bit(ch->chid, runlist->active_channels)) - return ret; - - if (gk20a_channel_get(ch)) { - tsg = tsg_gk20a_from_ch(ch); - - if (tsg) - seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", - ch->chid, - ch->tsgid, - ch->tgid, - tsg->timeslice_us, - ch->timeout_ms_max, - tsg->interleave_level, - tsg->gr_ctx.graphics_preempt_mode, - tsg->gr_ctx.compute_preempt_mode); - gk20a_channel_put(ch); - } - return 0; -} - -static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { - .start = gk20a_fifo_sched_debugfs_seq_start, - .next = gk20a_fifo_sched_debugfs_seq_next, - .stop = gk20a_fifo_sched_debugfs_seq_stop, - .show = gk20a_fifo_sched_debugfs_seq_show -}; - -static int gk20a_fifo_sched_debugfs_open(struct inode *inode, - struct file *file) -{ - struct gk20a *g = inode->i_private; - int err; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); - if (err) - return err; - - nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); - - ((struct seq_file *)file->private_data)->private = inode->i_private; - return 0; -}; - -/* - * The file operations structure contains our open function along with - * set of the canned seq_ ops. - */ -static const struct file_operations gk20a_fifo_sched_debugfs_fops = { - .owner = THIS_MODULE, - .open = gk20a_fifo_sched_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static int gk20a_fifo_profile_enable(void *data, u64 val) -{ - struct gk20a *g = (struct gk20a *) data; - struct fifo_gk20a *f = &g->fifo; - - - nvgpu_mutex_acquire(&f->profile.lock); - if (val == 0) { - if (f->profile.enabled) { - f->profile.enabled = false; - nvgpu_ref_put(&f->profile.ref, - __gk20a_fifo_profile_free); - } - } else { - if (!f->profile.enabled) { - /* not kref init as it can have a running condition if - * we enable/disable/enable while kickoff is happening - */ - if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { - f->profile.data = nvgpu_vzalloc(g, - FIFO_PROFILING_ENTRIES * - sizeof(struct fifo_profile_gk20a)); - f->profile.sorted = nvgpu_vzalloc(g, - FIFO_PROFILING_ENTRIES * - sizeof(u64)); - if (!(f->profile.data && f->profile.sorted)) { - nvgpu_vfree(g, f->profile.data); - nvgpu_vfree(g, f->profile.sorted); - nvgpu_mutex_release(&f->profile.lock); - return -ENOMEM; - } - nvgpu_ref_init(&f->profile.ref); - } - atomic_set(&f->profile.get.atomic_var, 0); - f->profile.enabled = true; - } - } - nvgpu_mutex_release(&f->profile.lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE( - gk20a_fifo_profile_enable_debugfs_fops, - NULL, - gk20a_fifo_profile_enable, - "%llu\n" -); - -static int __profile_cmp(const void *a, const void *b) -{ - return *((unsigned long long *) a) - *((unsigned long long *) b); -} - -/* - * This uses about 800b in the stack, but the function using it is not part - * of a callstack where much memory is being used, so it is fine - */ -#define PERCENTILE_WIDTH 5 -#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) - -static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, - u64 *percentiles, u32 index_end, u32 index_start) -{ - unsigned int nelem = 0; - unsigned int index; - struct fifo_profile_gk20a *profile; - - for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { - profile = &g->fifo.profile.data[index]; - - if (profile->timestamp[index_end] > - profile->timestamp[index_start]) { - /* This is a valid element */ - g->fifo.profile.sorted[nelem] = - profile->timestamp[index_end] - - profile->timestamp[index_start]; - nelem++; - } - } - - /* sort it */ - sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), - __profile_cmp, NULL); - - /* build ranges */ - for (index = 0; index < PERCENTILE_RANGES; index++) { - percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : - g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * - nelem)/100 - 1]; - } - return nelem; -} - -static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - unsigned int get, nelem, index; - /* - * 800B in the stack, but function is declared statically and only - * called from debugfs handler - */ - u64 percentiles_ioctl[PERCENTILE_RANGES]; - u64 percentiles_kickoff[PERCENTILE_RANGES]; - u64 percentiles_jobtracking[PERCENTILE_RANGES]; - u64 percentiles_append[PERCENTILE_RANGES]; - u64 percentiles_userd[PERCENTILE_RANGES]; - - if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { - seq_printf(s, "Profiling disabled\n"); - return 0; - } - - get = atomic_read(&g->fifo.profile.get.atomic_var); - - __gk20a_fifo_create_stats(g, percentiles_ioctl, - PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_kickoff, - PROFILE_END, PROFILE_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_jobtracking, - PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); - __gk20a_fifo_create_stats(g, percentiles_append, - PROFILE_APPEND, PROFILE_JOB_TRACKING); - nelem = __gk20a_fifo_create_stats(g, percentiles_userd, - PROFILE_END, PROFILE_APPEND); - - seq_printf(s, "Number of kickoffs: %d\n", nelem); - seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); - - for (index = 0; index < PERCENTILE_RANGES; index++) - seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", - PERCENTILE_WIDTH * (index+1), - percentiles_ioctl[index], - percentiles_kickoff[index], - percentiles_append[index], - percentiles_jobtracking[index], - percentiles_userd[index]); - - nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); - - return 0; -} - -static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_fifo_profile_stats, inode->i_private); -} - -static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { - .open = gk20a_fifo_profile_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - -void gk20a_fifo_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *gpu_root = l->debugfs; - struct dentry *fifo_root; - struct dentry *profile_root; - - fifo_root = debugfs_create_dir("fifo", gpu_root); - if (IS_ERR_OR_NULL(fifo_root)) - return; - - nvgpu_log(g, gpu_dbg_info, "g=%p", g); - - debugfs_create_file("sched", 0600, fifo_root, g, - &gk20a_fifo_sched_debugfs_fops); - - profile_root = debugfs_create_dir("profile", fifo_root); - if (IS_ERR_OR_NULL(profile_root)) - return; - - nvgpu_mutex_init(&g->fifo.profile.lock); - g->fifo.profile.enabled = false; - atomic_set(&g->fifo.profile.get.atomic_var, 0); - atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); - - debugfs_create_file("enable", 0600, profile_root, g, - &gk20a_fifo_profile_enable_debugfs_fops); - - debugfs_create_file("stats", 0600, profile_root, g, - &gk20a_fifo_profile_stats_debugfs_fops); - -} - -void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) -{ - if (profile) - profile->timestamp[idx] = nvgpu_current_time_ns(); -} - -void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) -{ - struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, - profile.ref); - nvgpu_vfree(f->g, f->profile.data); - nvgpu_vfree(f->g, f->profile.sorted); -} - -/* Get the next element in the ring buffer of profile entries - * and grab a reference to the structure - */ -struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - struct fifo_profile_gk20a *profile; - unsigned int index; - - /* If kref is zero, profiling is not enabled */ - if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) - return NULL; - index = atomic_inc_return(&f->profile.get.atomic_var); - profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; - - return profile; -} - -/* Free the reference to the structure. This allows deferred cleanups */ -void gk20a_fifo_profile_release(struct gk20a *g, - struct fifo_profile_gk20a *profile) -{ - nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); -} - -void gk20a_fifo_debugfs_deinit(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - - nvgpu_mutex_acquire(&f->profile.lock); - if (f->profile.enabled) { - f->profile.enabled = false; - nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); - } - nvgpu_mutex_release(&f->profile.lock); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h deleted file mode 100644 index 46ac853e..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_fifo.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_FIFO_H__ -#define __NVGPU_DEBUG_FIFO_H__ - -struct gk20a; -void gk20a_fifo_debugfs_init(struct gk20a *g); -void gk20a_fifo_debugfs_deinit(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_FIFO_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c deleted file mode 100644 index d54c6d63..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_gr.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_gr.h" -#include "os_linux.h" - -#include - -int gr_gk20a_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->debugfs_gr_default_attrib_cb_size = - debugfs_create_u32("gr_default_attrib_cb_size", - S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.attrib_cb_default_size); - - return 0; -} - diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h deleted file mode 100644 index 4b46acbb..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_gr.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_GR_H__ -#define __NVGPU_DEBUG_GR_H__ - -struct gk20a; -int gr_gk20a_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_GR_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.c b/drivers/gpu/nvgpu/common/linux/debug_hal.c deleted file mode 100644 index 031e335e..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_hal.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_hal.h" -#include "os_linux.h" - -#include -#include - -/* Format and print a single function pointer to the specified seq_file. */ -static void __hal_print_op(struct seq_file *s, void *op_ptr) -{ - seq_printf(s, "%pF\n", op_ptr); -} - -/* - * Prints an array of function pointer addresses in op_ptrs to the - * specified seq_file - */ -static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops) -{ - int i; - - for (i = 0; i < num_ops; i++) - __hal_print_op(s, op_ptrs[i]); -} - -/* - * Show file operation, which generates content of the file once. Prints a list - * of gpu operations as defined by gops and the corresponding function pointer - * destination addresses. Relies on no compiler reordering of struct fields and - * assumption that all members are function pointers. - */ -static int __hal_show(struct seq_file *s, void *unused) -{ - struct gpu_ops *gops = s->private; - - __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *)); - - return 0; -} - -static int __hal_open(struct inode *inode, struct file *file) -{ - return single_open(file, __hal_show, inode->i_private); -} - -static const struct file_operations __hal_fops = { - .open = __hal_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void nvgpu_hal_debugfs_fini(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (!(l->debugfs_hal == NULL)) - debugfs_remove_recursive(l->debugfs_hal); -} - -void nvgpu_hal_debugfs_init(struct gk20a *g) -{ - struct dentry *d; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (!l->debugfs) - return; - l->debugfs_hal = debugfs_create_dir("hal", l->debugfs); - if (IS_ERR_OR_NULL(l->debugfs_hal)) { - l->debugfs_hal = NULL; - return; - } - - /* Pass along reference to the gpu_ops struct as private data */ - d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal, - &g->ops, &__hal_fops); - if (!d) { - nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__); - debugfs_remove_recursive(l->debugfs_hal); - return; - } -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_hal.h b/drivers/gpu/nvgpu/common/linux/debug_hal.h deleted file mode 100644 index eee6f234..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_hal.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_HAL_H__ -#define __NVGPU_DEBUG_HAL_H__ - -struct gk20a; -void nvgpu_hal_debugfs_fini(struct gk20a *g); -void nvgpu_hal_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_HAL_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c deleted file mode 100644 index a0c7d47d..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_kmem.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include - -#include "os_linux.h" -#include "debug_kmem.h" -#include "kmem_priv.h" - -/** - * to_human_readable_bytes - Determine suffix for passed size. - * - * @bytes - Number of bytes to generate a suffix for. - * @hr_bytes [out] - The human readable number of bytes. - * @hr_suffix [out] - The suffix for the HR number of bytes. - * - * Computes a human readable decomposition of the passed number of bytes. The - * suffix for the bytes is passed back through the @hr_suffix pointer. The right - * number of bytes is then passed back in @hr_bytes. This returns the following - * ranges: - * - * 0 - 1023 B - * 1 - 1023 KB - * 1 - 1023 MB - * 1 - 1023 GB - * 1 - 1023 TB - * 1 - ... PB - */ -static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, - const char **hr_suffix) -{ - static const char *suffixes[] = - { "B", "KB", "MB", "GB", "TB", "PB" }; - - u64 suffix_ind = 0; - - while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { - bytes >>= 10; - suffix_ind++; - } - - /* - * Handle case where bytes > 1023PB. - */ - suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? - suffix_ind : ARRAY_SIZE(suffixes) - 1; - - *hr_bytes = bytes; - *hr_suffix = suffixes[suffix_ind]; -} - -/** - * print_hr_bytes - Print human readable bytes - * - * @s - A seq_file to print to. May be NULL. - * @msg - A message to print before the bytes. - * @bytes - Number of bytes. - * - * Print @msg followed by the human readable decomposition of the passed number - * of bytes. - * - * If @s is NULL then this prints will be made to the kernel log. - */ -static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) -{ - u64 hr_bytes; - const char *hr_suffix; - - __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); - __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); -} - -/** - * print_histogram - Build a histogram of the memory usage. - * - * @tracker The tracking to pull data from. - * @s A seq_file to dump info into. - */ -static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - int i; - u64 pot_min, pot_max; - u64 nr_buckets; - unsigned int *buckets; - unsigned int total_allocs; - struct nvgpu_rbtree_node *node; - static const char histogram_line[] = - "++++++++++++++++++++++++++++++++++++++++"; - - /* - * pot_min is essentially a round down to the nearest power of 2. This - * is the start of the histogram. pot_max is just a round up to the - * nearest power of two. Each histogram bucket is one power of two so - * the histogram buckets are exponential. - */ - pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); - pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); - - nr_buckets = __ffs(pot_max) - __ffs(pot_min); - - buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); - if (!buckets) { - __pstat(s, "OOM: could not allocate bucket storage!?\n"); - return; - } - - /* - * Iterate across all of the allocs and determine what bucket they - * should go in. Round the size down to the nearest power of two to - * find the right bucket. - */ - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - int b; - u64 bucket_min; - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - bucket_min = (u64)rounddown_pow_of_two(alloc->size); - if (bucket_min < tracker->min_alloc) - bucket_min = tracker->min_alloc; - - b = __ffs(bucket_min) - __ffs(pot_min); - - /* - * Handle the one case were there's an alloc exactly as big as - * the maximum bucket size of the largest bucket. Most of the - * buckets have an inclusive minimum and exclusive maximum. But - * the largest bucket needs to have an _inclusive_ maximum as - * well. - */ - if (b == (int)nr_buckets) - b--; - - buckets[b]++; - - nvgpu_rbtree_enum_next(&node, node); - } - - total_allocs = 0; - for (i = 0; i < (int)nr_buckets; i++) - total_allocs += buckets[i]; - - __pstat(s, "Alloc histogram:\n"); - - /* - * Actually compute the histogram lines. - */ - for (i = 0; i < (int)nr_buckets; i++) { - char this_line[sizeof(histogram_line) + 1]; - u64 line_length; - u64 hr_bytes; - const char *hr_suffix; - - memset(this_line, 0, sizeof(this_line)); - - /* - * Compute the normalized line length. Cant use floating point - * so we will just multiply everything by 1000 and use fixed - * point. - */ - line_length = (1000 * buckets[i]) / total_allocs; - line_length *= sizeof(histogram_line); - line_length /= 1000; - - memset(this_line, '+', line_length); - - __to_human_readable_bytes(1 << (__ffs(pot_min) + i), - &hr_bytes, &hr_suffix); - __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", - hr_bytes, hr_bytes << 1, - hr_suffix, buckets[i], this_line); - } -} - -/** - * nvgpu_kmem_print_stats - Print kmem tracking stats. - * - * @tracker The tracking to pull data from. - * @s A seq_file to dump info into. - * - * Print stats from a tracker. If @s is non-null then seq_printf() will be - * used with @s. Otherwise the stats are pr_info()ed. - */ -void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - nvgpu_lock_tracker(tracker); - - __pstat(s, "Mem tracker: %s\n\n", tracker->name); - - __pstat(s, "Basic Stats:\n"); - __pstat(s, " Number of allocs %lld\n", - tracker->nr_allocs); - __pstat(s, " Number of frees %lld\n", - tracker->nr_frees); - print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); - print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); - print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); - print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); - print_hr_bytes(s, " Bytes allocated (real) ", - tracker->bytes_alloced_real); - print_hr_bytes(s, " Bytes freed (real) ", - tracker->bytes_freed_real); - __pstat(s, "\n"); - - print_histogram(tracker, s); - - nvgpu_unlock_tracker(tracker); -} - -static int __kmem_tracking_show(struct seq_file *s, void *unused) -{ - struct nvgpu_mem_alloc_tracker *tracker = s->private; - - nvgpu_kmem_print_stats(tracker, s); - - return 0; -} - -static int __kmem_tracking_open(struct inode *inode, struct file *file) -{ - return single_open(file, __kmem_tracking_show, inode->i_private); -} - -static const struct file_operations __kmem_tracking_fops = { - .open = __kmem_tracking_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __kmem_traces_dump_tracker(struct gk20a *g, - struct nvgpu_mem_alloc_tracker *tracker, - struct seq_file *s) -{ - struct nvgpu_rbtree_node *node; - - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - kmem_print_mem_alloc(g, alloc, s); - - nvgpu_rbtree_enum_next(&node, node); - } - - return 0; -} - -static int __kmem_traces_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - - nvgpu_lock_tracker(g->vmallocs); - seq_puts(s, "Oustanding vmallocs:\n"); - __kmem_traces_dump_tracker(g, g->vmallocs, s); - seq_puts(s, "\n"); - nvgpu_unlock_tracker(g->vmallocs); - - nvgpu_lock_tracker(g->kmallocs); - seq_puts(s, "Oustanding kmallocs:\n"); - __kmem_traces_dump_tracker(g, g->kmallocs, s); - nvgpu_unlock_tracker(g->kmallocs); - - return 0; -} - -static int __kmem_traces_open(struct inode *inode, struct file *file) -{ - return single_open(file, __kmem_traces_show, inode->i_private); -} - -static const struct file_operations __kmem_traces_fops = { - .open = __kmem_traces_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void nvgpu_kmem_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *node; - - l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); - if (IS_ERR_OR_NULL(l->debugfs_kmem)) - return; - - node = debugfs_create_file(g->vmallocs->name, S_IRUGO, - l->debugfs_kmem, - g->vmallocs, &__kmem_tracking_fops); - node = debugfs_create_file(g->kmallocs->name, S_IRUGO, - l->debugfs_kmem, - g->kmallocs, &__kmem_tracking_fops); - node = debugfs_create_file("traces", S_IRUGO, - l->debugfs_kmem, - g, &__kmem_traces_fops); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h deleted file mode 100644 index 44322b53..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_kmem.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_KMEM_H__ -#define __NVGPU_DEBUG_KMEM_H__ - -struct gk20a; -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE -void nvgpu_kmem_debugfs_init(struct gk20a *g); -#endif - -#endif /* __NVGPU_DEBUG_KMEM_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c deleted file mode 100644 index f4ed992d..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c +++ /dev/null @@ -1,481 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include "debug_pmu.h" -#include "os_linux.h" - -#include -#include -#include - -static int lpwr_debug_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - if (g->ops.pmu.pmu_pg_engines_feature_list && - g->ops.pmu.pmu_pg_engines_feature_list(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != - NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { - seq_printf(s, "PSTATE: %u\n" - "RPPG Enabled: %u\n" - "RPPG ref count: %u\n" - "RPPG state: %u\n" - "MSCG Enabled: %u\n" - "MSCG pstate state: %u\n" - "MSCG transition state: %u\n", - g->ops.clk_arb.get_current_pstate(g), - g->elpg_enabled, g->pmu.elpg_refcnt, - g->pmu.elpg_stat, g->mscg_enabled, - g->pmu.mscg_stat, g->pmu.mscg_transition_state); - - } else - seq_printf(s, "ELPG Enabled: %u\n" - "ELPG ref count: %u\n" - "ELPG state: %u\n", - g->elpg_enabled, g->pmu.elpg_refcnt, - g->pmu.elpg_stat); - - return 0; - -} - -static int lpwr_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, lpwr_debug_show, inode->i_private); -} - -static const struct file_operations lpwr_debug_fops = { - .open = lpwr_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mscg_stat_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - u64 total_ingating, total_ungating, residency, divisor, dividend; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - int err; - - /* Don't unnecessarily power on the device */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); - gk20a_idle(g); - } - total_ingating = g->pg_ingating_time_us + - (u64)pg_stat_data.ingating_time; - total_ungating = g->pg_ungating_time_us + - (u64)pg_stat_data.ungating_time; - - divisor = total_ingating + total_ungating; - - /* We compute the residency on a scale of 1000 */ - dividend = total_ingating * 1000; - - if (divisor) - residency = div64_u64(dividend, divisor); - else - residency = 0; - - seq_printf(s, - "Time in MSCG: %llu us\n" - "Time out of MSCG: %llu us\n" - "MSCG residency ratio: %llu\n" - "MSCG Entry Count: %u\n" - "MSCG Avg Entry latency %u\n" - "MSCG Avg Exit latency %u\n", - total_ingating, total_ungating, - residency, pg_stat_data.gating_cnt, - pg_stat_data.avg_entry_latency_us, - pg_stat_data.avg_exit_latency_us); - return 0; - -} - -static int mscg_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, mscg_stat_show, inode->i_private); -} - -static const struct file_operations mscg_stat_fops = { - .open = mscg_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mscg_transitions_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u32 total_gating_cnt; - int err; - - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); - gk20a_idle(g); - } - total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; - - seq_printf(s, "%u\n", total_gating_cnt); - return 0; - -} - -static int mscg_transitions_open(struct inode *inode, struct file *file) -{ - return single_open(file, mscg_transitions_show, inode->i_private); -} - -static const struct file_operations mscg_transitions_fops = { - .open = mscg_transitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int elpg_stat_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u64 total_ingating, total_ungating, residency, divisor, dividend; - int err; - - /* Don't unnecessarily power on the device */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); - gk20a_idle(g); - } - total_ingating = g->pg_ingating_time_us + - (u64)pg_stat_data.ingating_time; - total_ungating = g->pg_ungating_time_us + - (u64)pg_stat_data.ungating_time; - divisor = total_ingating + total_ungating; - - /* We compute the residency on a scale of 1000 */ - dividend = total_ingating * 1000; - - if (divisor) - residency = div64_u64(dividend, divisor); - else - residency = 0; - - seq_printf(s, - "Time in ELPG: %llu us\n" - "Time out of ELPG: %llu us\n" - "ELPG residency ratio: %llu\n" - "ELPG Entry Count: %u\n" - "ELPG Avg Entry latency %u us\n" - "ELPG Avg Exit latency %u us\n", - total_ingating, total_ungating, - residency, pg_stat_data.gating_cnt, - pg_stat_data.avg_entry_latency_us, - pg_stat_data.avg_exit_latency_us); - return 0; - -} - -static int elpg_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, elpg_stat_show, inode->i_private); -} - -static const struct file_operations elpg_stat_fops = { - .open = elpg_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int elpg_transitions_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct pmu_pg_stats_data pg_stat_data = { 0 }; - u32 total_gating_cnt; - int err; - - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_pmu_get_pg_stats(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); - gk20a_idle(g); - } - total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; - - seq_printf(s, "%u\n", total_gating_cnt); - return 0; - -} - -static int elpg_transitions_open(struct inode *inode, struct file *file) -{ - return single_open(file, elpg_transitions_show, inode->i_private); -} - -static const struct file_operations elpg_transitions_fops = { - .open = elpg_transitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int falc_trace_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - struct nvgpu_pmu *pmu = &g->pmu; - u32 i = 0, j = 0, k, l, m; - char part_str[40]; - void *tracebuffer; - char *trace; - u32 *trace1; - - /* allocate system memory to copy pmu trace buffer */ - tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); - if (tracebuffer == NULL) - return -ENOMEM; - - /* read pmu traces into system memory buffer */ - nvgpu_mem_rd_n(g, &pmu->trace_buf, - 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); - - trace = (char *)tracebuffer; - trace1 = (u32 *)tracebuffer; - - for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { - for (j = 0; j < 0x40; j++) - if (trace1[(i / 4) + j]) - break; - if (j == 0x40) - break; - seq_printf(s, "Index %x: ", trace1[(i / 4)]); - l = 0; - m = 0; - while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { - if (k >= 40) - break; - strncpy(part_str, (trace+i+20+m), k); - part_str[k] = 0; - seq_printf(s, "%s0x%x", part_str, - trace1[(i / 4) + 1 + l]); - l++; - m += k + 2; - } - seq_printf(s, "%s", (trace+i+20+m)); - } - - nvgpu_kfree(g, tracebuffer); - return 0; -} - -static int falc_trace_open(struct inode *inode, struct file *file) -{ - return single_open(file, falc_trace_show, inode->i_private); -} - -static const struct file_operations falc_trace_fops = { - .open = falc_trace_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int perfmon_events_enable_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); - return 0; - -} - -static int perfmon_events_enable_open(struct inode *inode, struct file *file) -{ - return single_open(file, perfmon_events_enable_show, inode->i_private); -} - -static ssize_t perfmon_events_enable_write(struct file *file, - const char __user *userbuf, size_t count, loff_t *ppos) -{ - struct seq_file *s = file->private_data; - struct gk20a *g = s->private; - unsigned long val = 0; - char buf[40]; - int buf_size; - int err; - - memset(buf, 0, sizeof(buf)); - buf_size = min(count, (sizeof(buf)-1)); - - if (copy_from_user(buf, userbuf, buf_size)) - return -EFAULT; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - /* Don't turn on gk20a unnecessarily */ - if (g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - if (val && !g->pmu.perfmon_sampling_enabled && - nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { - g->pmu.perfmon_sampling_enabled = true; - g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); - } else if (!val && g->pmu.perfmon_sampling_enabled && - nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { - g->pmu.perfmon_sampling_enabled = false; - g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); - } - gk20a_idle(g); - } else { - g->pmu.perfmon_sampling_enabled = val ? true : false; - } - - return count; -} - -static const struct file_operations perfmon_events_enable_fops = { - .open = perfmon_events_enable_open, - .read = seq_read, - .write = perfmon_events_enable_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int perfmon_events_count_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); - return 0; - -} - -static int perfmon_events_count_open(struct inode *inode, struct file *file) -{ - return single_open(file, perfmon_events_count_show, inode->i_private); -} - -static const struct file_operations perfmon_events_count_fops = { - .open = perfmon_events_count_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int security_show(struct seq_file *s, void *data) -{ - struct gk20a *g = s->private; - - seq_printf(s, "%d\n", g->pmu.pmu_mode); - return 0; - -} - -static int security_open(struct inode *inode, struct file *file) -{ - return single_open(file, security_show, inode->i_private); -} - -static const struct file_operations security_fops = { - .open = security_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int gk20a_pmu_debugfs_init(struct gk20a *g) -{ - struct dentry *d; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - d = debugfs_create_file( - "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g, - &lpwr_debug_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, - &mscg_stat_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "mscg_transitions", S_IRUGO, l->debugfs, g, - &mscg_transitions_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, - &elpg_stat_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "elpg_transitions", S_IRUGO, l->debugfs, g, - &elpg_transitions_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "falc_trace", S_IRUGO, l->debugfs, g, - &falc_trace_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "perfmon_events_enable", S_IRUGO, l->debugfs, g, - &perfmon_events_enable_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "perfmon_events_count", S_IRUGO, l->debugfs, g, - &perfmon_events_count_fops); - if (!d) - goto err_out; - - d = debugfs_create_file( - "pmu_security", S_IRUGO, l->debugfs, g, - &security_fops); - if (!d) - goto err_out; - return 0; -err_out: - pr_err("%s: Failed to make debugfs node\n", __func__); - return -ENOMEM; -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h deleted file mode 100644 index c4e3243d..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_pmu.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_PMU_H__ -#define __NVGPU_DEBUG_PMU_H__ - -struct gk20a; -int gk20a_pmu_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_PMU_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c deleted file mode 100644 index 5b7cbddf..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_sched.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "debug_sched.h" -#include "os_linux.h" - -#include -#include - -static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - bool sched_busy = true; - - int n = sched->bitmap_size / sizeof(u64); - int i; - int err; - - err = gk20a_busy(g); - if (err) - return err; - - if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { - sched_busy = false; - nvgpu_mutex_release(&sched->busy_lock); - } - - seq_printf(s, "control_locked=%d\n", sched->control_locked); - seq_printf(s, "busy=%d\n", sched_busy); - seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); - - nvgpu_mutex_acquire(&sched->status_lock); - - seq_puts(s, "active_tsg_bitmap\n"); - for (i = 0; i < n; i++) - seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); - - seq_puts(s, "recent_tsg_bitmap\n"); - for (i = 0; i < n; i++) - seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); - - nvgpu_mutex_release(&sched->status_lock); - - gk20a_idle(g); - - return 0; -} - -static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, gk20a_sched_debugfs_show, inode->i_private); -} - -static const struct file_operations gk20a_sched_debugfs_fops = { - .open = gk20a_sched_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void gk20a_sched_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs, - g, &gk20a_sched_debugfs_fops); -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h deleted file mode 100644 index 34a8f55f..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_sched.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_SCHED_H__ -#define __NVGPU_DEBUG_SCHED_H__ - -struct gk20a; -void gk20a_sched_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_SCHED_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.c b/drivers/gpu/nvgpu/common/linux/debug_xve.c deleted file mode 100644 index 743702a2..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_xve.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include - -#include "debug_xve.h" -#include "os_linux.h" - -#include -#include - -static ssize_t xve_link_speed_write(struct file *filp, - const char __user *buff, - size_t len, loff_t *off) -{ - struct gk20a *g = ((struct seq_file *)filp->private_data)->private; - char kbuff[16]; - u32 buff_size, check_len; - u32 link_speed = 0; - int ret; - - buff_size = min_t(size_t, 16, len); - - memset(kbuff, 0, 16); - if (copy_from_user(kbuff, buff, buff_size)) - return -EFAULT; - - check_len = strlen("Gen1"); - if (strncmp(kbuff, "Gen1", check_len) == 0) - link_speed = GPU_XVE_SPEED_2P5; - else if (strncmp(kbuff, "Gen2", check_len) == 0) - link_speed = GPU_XVE_SPEED_5P0; - else if (strncmp(kbuff, "Gen3", check_len) == 0) - link_speed = GPU_XVE_SPEED_8P0; - else - nvgpu_err(g, "%s: Unknown PCIe speed: %s", - __func__, kbuff); - - if (!link_speed) - return -EINVAL; - - /* Brief pause... To help rate limit this. */ - nvgpu_msleep(250); - - /* - * And actually set the speed. Yay. - */ - ret = g->ops.xve.set_speed(g, link_speed); - if (ret) - return ret; - - return len; -} - -static int xve_link_speed_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - u32 speed; - int err; - - err = g->ops.xve.get_speed(g, &speed); - if (err) - return err; - - seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed)); - - return 0; -} - -static int xve_link_speed_open(struct inode *inode, struct file *file) -{ - return single_open(file, xve_link_speed_show, inode->i_private); -} - -static const struct file_operations xve_link_speed_fops = { - .open = xve_link_speed_open, - .read = seq_read, - .write = xve_link_speed_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int xve_available_speeds_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - u32 available_speeds; - - g->ops.xve.available_speeds(g, &available_speeds); - - seq_puts(s, "Available PCIe bus speeds:\n"); - if (available_speeds & GPU_XVE_SPEED_2P5) - seq_puts(s, " Gen1\n"); - if (available_speeds & GPU_XVE_SPEED_5P0) - seq_puts(s, " Gen2\n"); - if (available_speeds & GPU_XVE_SPEED_8P0) - seq_puts(s, " Gen3\n"); - - return 0; -} - -static int xve_available_speeds_open(struct inode *inode, struct file *file) -{ - return single_open(file, xve_available_speeds_show, inode->i_private); -} - -static const struct file_operations xve_available_speeds_fops = { - .open = xve_available_speeds_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int xve_link_control_status_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - u32 link_status; - - link_status = g->ops.xve.get_link_control_status(g); - seq_printf(s, "0x%08x\n", link_status); - - return 0; -} - -static int xve_link_control_status_open(struct inode *inode, struct file *file) -{ - return single_open(file, xve_link_control_status_show, inode->i_private); -} - -static const struct file_operations xve_link_control_status_fops = { - .open = xve_link_control_status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int nvgpu_xve_debugfs_init(struct gk20a *g) -{ - int err = -ENODEV; - - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *gpu_root = l->debugfs; - - l->debugfs_xve = debugfs_create_dir("xve", gpu_root); - if (IS_ERR_OR_NULL(l->debugfs_xve)) - goto fail; - - /* - * These are just debug nodes. If they fail to get made it's not worth - * worrying the higher level SW. - */ - debugfs_create_file("link_speed", S_IRUGO, - l->debugfs_xve, g, - &xve_link_speed_fops); - debugfs_create_file("available_speeds", S_IRUGO, - l->debugfs_xve, g, - &xve_available_speeds_fops); - debugfs_create_file("link_control_status", S_IRUGO, - l->debugfs_xve, g, - &xve_link_control_status_fops); - - err = 0; -fail: - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/debug_xve.h b/drivers/gpu/nvgpu/common/linux/debug_xve.h deleted file mode 100644 index f3b1ac54..00000000 --- a/drivers/gpu/nvgpu/common/linux/debug_xve.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __NVGPU_DEBUG_XVE_H__ -#define __NVGPU_DEBUG_XVE_H__ - -struct gk20a; -int nvgpu_xve_debugfs_init(struct gk20a *g); - -#endif /* __NVGPU_DEBUG_SVE_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c deleted file mode 100644 index f513dcd6..00000000 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ /dev/null @@ -1,694 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" -#include "os_linux.h" - -#ifdef __DMA_ATTRS_LONGS -#define NVGPU_DEFINE_DMA_ATTRS(x) \ - struct dma_attrs x = { \ - .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ - } -#define NVGPU_DMA_ATTR(attrs) &attrs -#else -#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0 -#define NVGPU_DMA_ATTR(attrs) attrs -#endif - -/* - * Enough to hold all the possible flags in string form. When a new flag is - * added it must be added here as well!! - */ -#define NVGPU_DMA_STR_SIZE \ - sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS") - -/* - * The returned string is kmalloc()ed here but must be freed by the caller. - */ -static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags) -{ - char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE); - int bytes_available = NVGPU_DMA_STR_SIZE; - - /* - * Return the empty buffer if there's no flags. Makes it easier on the - * calling code to just print it instead of any if (NULL) type logic. - */ - if (!flags) - return buf; - -#define APPEND_FLAG(flag, str_flag) \ - do { \ - if (flags & flag) { \ - strncat(buf, str_flag, bytes_available); \ - bytes_available -= strlen(str_flag); \ - } \ - } while (0) - - APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING "); - APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS "); -#undef APPEND_FLAG - - return buf; -} - -/** - * __dma_dbg - Debug print for DMA allocs and frees. - * - * @g - The GPU. - * @size - The requested size of the alloc (size_t). - * @flags - The flags (unsigned long). - * @type - A string describing the type (i.e: sysmem or vidmem). - * @what - A string with 'alloc' or 'free'. - * - * @flags is the DMA flags. If there are none or it doesn't make sense to print - * flags just pass 0. - * - * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function. - */ -static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags, - const char *type, const char *what) -{ - char *flags_str = NULL; - - /* - * Don't bother making the flags_str if debugging is - * not enabled. This saves a malloc and a free. - */ - if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma)) - return; - - flags_str = nvgpu_dma_flags_to_str(g, flags); - - __nvgpu_log_dbg(g, gpu_dbg_dma, - __func__, __LINE__, - "DMA %s: [%s] size=%-7zu " - "aligned=%-7zu total=%-10llukB %s", - what, type, - size, PAGE_ALIGN(size), - g->dma_memory_used >> 10, - flags_str); - - if (flags_str) - nvgpu_kfree(g, flags_str); -} - -#define dma_dbg_alloc(g, size, flags, type) \ - __dma_dbg(g, size, flags, type, "alloc") -#define dma_dbg_free(g, size, flags, type) \ - __dma_dbg(g, size, flags, type, "free") - -/* - * For after the DMA alloc is done. - */ -#define __dma_dbg_done(g, size, type, what) \ - nvgpu_log(g, gpu_dbg_dma, \ - "DMA %s: [%s] size=%-7zu Done!", \ - what, type, size); \ - -#define dma_dbg_alloc_done(g, size, type) \ - __dma_dbg_done(g, size, type, "alloc") -#define dma_dbg_free_done(g, size, type) \ - __dma_dbg_done(g, size, type, "free") - -#if defined(CONFIG_GK20A_VIDMEM) -static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, - size_t size) -{ - u64 addr = 0; - - if (at) - addr = nvgpu_alloc_fixed(allocator, at, size, 0); - else - addr = nvgpu_alloc(allocator, size); - - return addr; -} -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) -static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, - unsigned long flags) -#define ATTR_ARG(x) *x -#else -static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, - unsigned long flags) -#define ATTR_ARG(x) x -#endif -{ - if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); - if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) - dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); -#undef ATTR_ARG -} - -int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_flags(g, 0, size, mem); -} - -int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, - struct nvgpu_mem *mem) -{ - if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { - /* - * Force the no-kernel-mapping flag on because we don't support - * the lack of it for vidmem - the user should not care when - * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a - * difference, the user should use the flag explicitly anyway. - * - * Incoming flags are ignored here, since bits other than the - * no-kernel-mapping flag are ignored by the vidmem mapping - * functions anyway. - */ - int err = nvgpu_dma_alloc_flags_vid(g, - NVGPU_DMA_NO_KERNEL_MAPPING, - size, mem); - - if (!err) - return 0; - /* - * Fall back to sysmem (which may then also fail) in case - * vidmem is exhausted. - */ - } - - return nvgpu_dma_alloc_flags_sys(g, flags, size, mem); -} - -int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_flags_sys(g, 0, size, mem); -} - -int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, - size_t size, struct nvgpu_mem *mem) -{ - struct device *d = dev_from_gk20a(g); - int err; - dma_addr_t iova; - NVGPU_DEFINE_DMA_ATTRS(dma_attrs); - void *alloc_ret; - - if (nvgpu_mem_is_valid(mem)) { - nvgpu_warn(g, "memory leak !!"); - WARN_ON(1); - } - - /* - * WAR for IO coherent chips: the DMA API does not seem to generate - * mappings that work correctly. Unclear why - Bug ID: 2040115. - * - * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING - * and then make a vmap() ourselves. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - flags |= NVGPU_DMA_NO_KERNEL_MAPPING; - - /* - * Before the debug print so we see this in the total. But during - * cleanup in the fail path this has to be subtracted. - */ - g->dma_memory_used += PAGE_ALIGN(size); - - dma_dbg_alloc(g, size, flags, "sysmem"); - - /* - * Save the old size but for actual allocation purposes the size is - * going to be page aligned. - */ - mem->size = size; - size = PAGE_ALIGN(size); - - nvgpu_dma_flags_to_attrs(&dma_attrs, flags); - - alloc_ret = dma_alloc_attrs(d, size, &iova, - GFP_KERNEL|__GFP_ZERO, - NVGPU_DMA_ATTR(dma_attrs)); - if (!alloc_ret) - return -ENOMEM; - - if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { - mem->priv.pages = alloc_ret; - err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, - mem->priv.pages, - iova, size); - } else { - mem->cpu_va = alloc_ret; - err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va, - iova, size, flags); - } - if (err) - goto fail_free_dma; - - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { - mem->cpu_va = vmap(mem->priv.pages, - size >> PAGE_SHIFT, - 0, PAGE_KERNEL); - if (!mem->cpu_va) { - err = -ENOMEM; - goto fail_free_sgt; - } - } - - mem->aligned_size = size; - mem->aperture = APERTURE_SYSMEM; - mem->priv.flags = flags; - - dma_dbg_alloc_done(g, mem->size, "sysmem"); - - return 0; - -fail_free_sgt: - nvgpu_free_sgtable(g, &mem->priv.sgt); -fail_free_dma: - dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); - mem->cpu_va = NULL; - mem->priv.sgt = NULL; - mem->size = 0; - g->dma_memory_used -= mem->aligned_size; - return err; -} - -int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_flags_vid(g, - NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); -} - -int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, - size_t size, struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0); -} - -int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, - size_t size, struct nvgpu_mem *mem, u64 at) -{ -#if defined(CONFIG_GK20A_VIDMEM) - u64 addr; - int err; - struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? - &g->mm.vidmem.allocator : - &g->mm.vidmem.bootstrap_allocator; - int before_pending; - - if (nvgpu_mem_is_valid(mem)) { - nvgpu_warn(g, "memory leak !!"); - WARN_ON(1); - } - - dma_dbg_alloc(g, size, flags, "vidmem"); - - mem->size = size; - size = PAGE_ALIGN(size); - - if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) - return -ENOSYS; - - /* - * Our own allocator doesn't have any flags yet, and we can't - * kernel-map these, so require explicit flags. - */ - WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); - - nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); - before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var); - addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); - nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); - if (!addr) { - /* - * If memory is known to be freed soon, let the user know that - * it may be available after a while. - */ - if (before_pending) - return -EAGAIN; - else - return -ENOMEM; - } - - if (at) - mem->mem_flags |= NVGPU_MEM_FLAG_FIXED; - - mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); - if (!mem->priv.sgt) { - err = -ENOMEM; - goto fail_physfree; - } - - err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); - if (err) - goto fail_kfree; - - nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr); - sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); - - mem->aligned_size = size; - mem->aperture = APERTURE_VIDMEM; - mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr; - mem->allocator = vidmem_alloc; - mem->priv.flags = flags; - - nvgpu_init_list_node(&mem->clear_list_entry); - - dma_dbg_alloc_done(g, mem->size, "vidmem"); - - return 0; - -fail_kfree: - nvgpu_kfree(g, mem->priv.sgt); -fail_physfree: - nvgpu_free(&g->mm.vidmem.allocator, addr); - mem->size = 0; - return err; -#else - return -ENOSYS; -#endif -} - -int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, - struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_map_flags(vm, 0, size, mem); -} - -int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, - size_t size, struct nvgpu_mem *mem) -{ - if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) { - /* - * Force the no-kernel-mapping flag on because we don't support - * the lack of it for vidmem - the user should not care when - * using nvgpu_dma_alloc_map and it's vidmem, or if there's a - * difference, the user should use the flag explicitly anyway. - */ - int err = nvgpu_dma_alloc_map_flags_vid(vm, - flags | NVGPU_DMA_NO_KERNEL_MAPPING, - size, mem); - - if (!err) - return 0; - /* - * Fall back to sysmem (which may then also fail) in case - * vidmem is exhausted. - */ - } - - return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem); -} - -int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, - struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem); -} - -int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, - size_t size, struct nvgpu_mem *mem) -{ - int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem); - - if (err) - return err; - - mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, - gk20a_mem_flag_none, false, - mem->aperture); - if (!mem->gpu_va) { - err = -ENOMEM; - goto fail_free; - } - - return 0; - -fail_free: - nvgpu_dma_free(vm->mm->g, mem); - return err; -} - -int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, - struct nvgpu_mem *mem) -{ - return nvgpu_dma_alloc_map_flags_vid(vm, - NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); -} - -int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, - size_t size, struct nvgpu_mem *mem) -{ - int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem); - - if (err) - return err; - - mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, - gk20a_mem_flag_none, false, - mem->aperture); - if (!mem->gpu_va) { - err = -ENOMEM; - goto fail_free; - } - - return 0; - -fail_free: - nvgpu_dma_free(vm->mm->g, mem); - return err; -} - -static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) -{ - struct device *d = dev_from_gk20a(g); - - g->dma_memory_used -= mem->aligned_size; - - dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem"); - - if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && - !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && - (mem->cpu_va || mem->priv.pages)) { - /* - * Free side of WAR for bug 2040115. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - vunmap(mem->cpu_va); - - if (mem->priv.flags) { - NVGPU_DEFINE_DMA_ATTRS(dma_attrs); - - nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); - - if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { - dma_free_attrs(d, mem->aligned_size, mem->priv.pages, - sg_dma_address(mem->priv.sgt->sgl), - NVGPU_DMA_ATTR(dma_attrs)); - } else { - dma_free_attrs(d, mem->aligned_size, mem->cpu_va, - sg_dma_address(mem->priv.sgt->sgl), - NVGPU_DMA_ATTR(dma_attrs)); - } - } else { - dma_free_coherent(d, mem->aligned_size, mem->cpu_va, - sg_dma_address(mem->priv.sgt->sgl)); - } - mem->cpu_va = NULL; - mem->priv.pages = NULL; - } - - /* - * When this flag is set we expect that pages is still populated but not - * by the DMA API. - */ - if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) - nvgpu_kfree(g, mem->priv.pages); - - if (mem->priv.sgt) - nvgpu_free_sgtable(g, &mem->priv.sgt); - - dma_dbg_free_done(g, mem->size, "sysmem"); - - mem->size = 0; - mem->aligned_size = 0; - mem->aperture = APERTURE_INVALID; -} - -static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) -{ -#if defined(CONFIG_GK20A_VIDMEM) - size_t mem_size = mem->size; - - dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); - - /* Sanity check - only this supported when allocating. */ - WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); - - if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { - int err = nvgpu_vidmem_clear_list_enqueue(g, mem); - - /* - * If there's an error here then that means we can't clear the - * vidmem. That's too bad; however, we still own the nvgpu_mem - * buf so we have to free that. - * - * We don't need to worry about the vidmem allocator itself - * since when that gets cleaned up in the driver shutdown path - * all the outstanding allocs are force freed. - */ - if (err) - nvgpu_kfree(g, mem); - } else { - nvgpu_memset(g, mem, 0, 0, mem->aligned_size); - nvgpu_free(mem->allocator, - (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); - nvgpu_free_sgtable(g, &mem->priv.sgt); - - mem->size = 0; - mem->aligned_size = 0; - mem->aperture = APERTURE_INVALID; - } - - dma_dbg_free_done(g, mem_size, "vidmem"); -#endif -} - -void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem) -{ - switch (mem->aperture) { - case APERTURE_SYSMEM: - return nvgpu_dma_free_sys(g, mem); - case APERTURE_VIDMEM: - return nvgpu_dma_free_vid(g, mem); - default: - break; /* like free() on "null" memory */ - } -} - -void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem) -{ - if (mem->gpu_va) - nvgpu_gmmu_unmap(vm, mem, mem->gpu_va); - mem->gpu_va = 0; - - nvgpu_dma_free(vm->mm->g, mem); -} - -int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt, - void *cpuva, u64 iova, size_t size, unsigned long flags) -{ - int err = 0; - struct sg_table *tbl; - NVGPU_DEFINE_DMA_ATTRS(dma_attrs); - - tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); - if (!tbl) { - err = -ENOMEM; - goto fail; - } - - nvgpu_dma_flags_to_attrs(&dma_attrs, flags); - err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova, - size, NVGPU_DMA_ATTR(dma_attrs)); - if (err) - goto fail; - - sg_dma_address(tbl->sgl) = iova; - *sgt = tbl; - - return 0; - -fail: - if (tbl) - nvgpu_kfree(g, tbl); - - return err; -} - -int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt, - void *cpuva, u64 iova, size_t size) -{ - return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0); -} - -int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt, - struct page **pages, u64 iova, size_t size) -{ - int err = 0; - struct sg_table *tbl; - - tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); - if (!tbl) { - err = -ENOMEM; - goto fail; - } - - err = sg_alloc_table_from_pages(tbl, pages, - DIV_ROUND_UP(size, PAGE_SIZE), - 0, size, GFP_KERNEL); - if (err) - goto fail; - - sg_dma_address(tbl->sgl) = iova; - *sgt = tbl; - - return 0; - -fail: - if (tbl) - nvgpu_kfree(g, tbl); - - return err; -} - -void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt) -{ - sg_free_table(*sgt); - nvgpu_kfree(g, *sgt); - *sgt = NULL; -} - -bool nvgpu_iommuable(struct gk20a *g) -{ -#ifdef CONFIG_TEGRA_GK20A - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - /* - * Check against the nvgpu device to see if it's been marked as - * IOMMU'able. - */ - if (!device_is_iommuable(l->dev)) - return false; -#endif - - return true; -} diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c deleted file mode 100644 index 129739f0..00000000 --- a/drivers/gpu/nvgpu/common/linux/dmabuf.c +++ /dev/null @@ -1,218 +0,0 @@ -/* -* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include - -#include -#include - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" -#include "dmabuf.h" -#include "os_linux.h" - -static void gk20a_mm_delete_priv(void *_priv) -{ - struct gk20a_buffer_state *s, *s_tmp; - struct gk20a_dmabuf_priv *priv = _priv; - struct gk20a *g; - - if (!priv) - return; - - g = priv->g; - - if (priv->comptags.allocated && priv->comptags.lines) { - BUG_ON(!priv->comptag_allocator); - gk20a_comptaglines_free(priv->comptag_allocator, - priv->comptags.offset, - priv->comptags.lines); - } - - /* Free buffer states */ - nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states, - gk20a_buffer_state, list) { - gk20a_fence_put(s->fence); - nvgpu_list_del(&s->list); - nvgpu_kfree(g, s); - } - - nvgpu_kfree(g, priv); -} - -enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, - struct dma_buf *dmabuf) -{ - struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf); - bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY); - - if (buf_owner == NULL) { - /* Not nvgpu-allocated, assume system memory */ - return APERTURE_SYSMEM; - } else if (WARN_ON(buf_owner == g && unified_memory)) { - /* Looks like our video memory, but this gpu doesn't support - * it. Warn about a bug and bail out */ - nvgpu_warn(g, - "dmabuf is our vidmem but we don't have local vidmem"); - return APERTURE_INVALID; - } else if (buf_owner != g) { - /* Someone else's vidmem */ - return APERTURE_INVALID; - } else { - /* Yay, buf_owner == g */ - return APERTURE_VIDMEM; - } -} - -struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, - struct dma_buf_attachment **attachment) -{ - struct gk20a_dmabuf_priv *priv; - - priv = dma_buf_get_drvdata(dmabuf, dev); - if (WARN_ON(!priv)) - return ERR_PTR(-EINVAL); - - nvgpu_mutex_acquire(&priv->lock); - - if (priv->pin_count == 0) { - priv->attach = dma_buf_attach(dmabuf, dev); - if (IS_ERR(priv->attach)) { - nvgpu_mutex_release(&priv->lock); - return (struct sg_table *)priv->attach; - } - - priv->sgt = dma_buf_map_attachment(priv->attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(priv->sgt)) { - dma_buf_detach(dmabuf, priv->attach); - nvgpu_mutex_release(&priv->lock); - return priv->sgt; - } - } - - priv->pin_count++; - nvgpu_mutex_release(&priv->lock); - *attachment = priv->attach; - return priv->sgt; -} - -void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, - struct dma_buf_attachment *attachment, - struct sg_table *sgt) -{ - struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); - dma_addr_t dma_addr; - - if (IS_ERR(priv) || !priv) - return; - - nvgpu_mutex_acquire(&priv->lock); - WARN_ON(priv->sgt != sgt); - WARN_ON(priv->attach != attachment); - priv->pin_count--; - WARN_ON(priv->pin_count < 0); - dma_addr = sg_dma_address(priv->sgt->sgl); - if (priv->pin_count == 0) { - dma_buf_unmap_attachment(priv->attach, priv->sgt, - DMA_BIDIRECTIONAL); - dma_buf_detach(dmabuf, priv->attach); - } - nvgpu_mutex_release(&priv->lock); -} - -int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) -{ - struct gk20a *g = gk20a_get_platform(dev)->g; - struct gk20a_dmabuf_priv *priv; - - priv = dma_buf_get_drvdata(dmabuf, dev); - if (likely(priv)) - return 0; - - nvgpu_mutex_acquire(&g->mm.priv_lock); - priv = dma_buf_get_drvdata(dmabuf, dev); - if (priv) - goto priv_exist_or_err; - - priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) { - priv = ERR_PTR(-ENOMEM); - goto priv_exist_or_err; - } - - nvgpu_mutex_init(&priv->lock); - nvgpu_init_list_node(&priv->states); - priv->g = g; - dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); - -priv_exist_or_err: - nvgpu_mutex_release(&g->mm.priv_lock); - if (IS_ERR(priv)) - return -ENOMEM; - - return 0; -} - -int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, - u64 offset, struct gk20a_buffer_state **state) -{ - int err = 0; - struct gk20a_dmabuf_priv *priv; - struct gk20a_buffer_state *s; - struct device *dev = dev_from_gk20a(g); - - if (WARN_ON(offset >= (u64)dmabuf->size)) - return -EINVAL; - - err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); - if (err) - return err; - - priv = dma_buf_get_drvdata(dmabuf, dev); - if (WARN_ON(!priv)) - return -ENOSYS; - - nvgpu_mutex_acquire(&priv->lock); - - nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list) - if (s->offset == offset) - goto out; - - /* State not found, create state. */ - s = nvgpu_kzalloc(g, sizeof(*s)); - if (!s) { - err = -ENOMEM; - goto out; - } - - s->offset = offset; - nvgpu_init_list_node(&s->list); - nvgpu_mutex_init(&s->lock); - nvgpu_list_add_tail(&s->list, &priv->states); - -out: - nvgpu_mutex_release(&priv->lock); - if (!err) - *state = s; - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.h b/drivers/gpu/nvgpu/common/linux/dmabuf.h deleted file mode 100644 index 8399eaaf..00000000 --- a/drivers/gpu/nvgpu/common/linux/dmabuf.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __COMMON_LINUX_DMABUF_H__ -#define __COMMON_LINUX_DMABUF_H__ - -#include -#include -#include -#include - -struct sg_table; -struct dma_buf; -struct dma_buf_attachment; -struct device; - -struct gk20a; -struct gk20a_buffer_state; - -struct gk20a_dmabuf_priv { - struct nvgpu_mutex lock; - - struct gk20a *g; - - struct gk20a_comptag_allocator *comptag_allocator; - struct gk20a_comptags comptags; - - struct dma_buf_attachment *attach; - struct sg_table *sgt; - - int pin_count; - - struct nvgpu_list_node states; - - u64 buffer_id; -}; - -struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, - struct dma_buf_attachment **attachment); -void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, - struct dma_buf_attachment *attachment, - struct sg_table *sgt); - -int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); - -int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, - u64 offset, struct gk20a_buffer_state **state); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c deleted file mode 100644 index 8f33c5d2..00000000 --- a/drivers/gpu/nvgpu/common/linux/driver_common.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "module.h" -#include "os_linux.h" -#include "sysfs.h" -#include "ioctl.h" -#include "gk20a/regops_gk20a.h" - -#define EMC3D_DEFAULT_RATIO 750 - -void nvgpu_kernel_restart(void *cmd) -{ - kernel_restart(cmd); -} - -static void nvgpu_init_vars(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev); - - nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq); - nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq); - - init_rwsem(&l->busy_lock); - nvgpu_rwsem_init(&g->deterministic_busy); - - nvgpu_spinlock_init(&g->mc_enable_lock); - - nvgpu_mutex_init(&platform->railgate_lock); - nvgpu_mutex_init(&g->dbg_sessions_lock); - nvgpu_mutex_init(&g->client_lock); - nvgpu_mutex_init(&g->poweron_lock); - nvgpu_mutex_init(&g->poweroff_lock); - nvgpu_mutex_init(&g->ctxsw_disable_lock); - - l->regs_saved = l->regs; - l->bar1_saved = l->bar1; - - g->emc3d_ratio = EMC3D_DEFAULT_RATIO; - - /* Set DMA parameters to allow larger sgt lists */ - dev->dma_parms = &l->dma_parms; - dma_set_max_seg_size(dev, UINT_MAX); - - /* - * A default of 16GB is the largest supported DMA size that is - * acceptable to all currently supported Tegra SoCs. - */ - if (!platform->dma_mask) - platform->dma_mask = DMA_BIT_MASK(34); - - dma_set_mask(dev, platform->dma_mask); - dma_set_coherent_mask(dev, platform->dma_mask); - - nvgpu_init_list_node(&g->profiler_objects); - - nvgpu_init_list_node(&g->boardobj_head); - nvgpu_init_list_node(&g->boardobjgrp_head); -} - -static void nvgpu_init_gr_vars(struct gk20a *g) -{ - gk20a_init_gr(g); - - nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); - g->gr.max_comptag_mem = totalram_pages - >> (10 - (PAGE_SHIFT - 10)); -} - -static void nvgpu_init_timeout(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - g->timeouts_disabled_by_user = false; - nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0); - - if (nvgpu_platform_is_silicon(g)) { - g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; - } else if (nvgpu_platform_is_fpga(g)) { - g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA; - } else { - g->gr_idle_timeout_default = (u32)ULONG_MAX; - } - g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; - g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US; -} - -static void nvgpu_init_timeslice(struct gk20a *g) -{ - g->runlist_interleave = true; - - g->timeslice_low_priority_us = 1300; - g->timeslice_medium_priority_us = 2600; - g->timeslice_high_priority_us = 5200; - - g->min_timeslice_us = 1000; - g->max_timeslice_us = 50000; -} - -static void nvgpu_init_pm_vars(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - /* - * Set up initial power settings. For non-slicon platforms, disable - * power features and for silicon platforms, read from platform data - */ - g->slcg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false; - g->blcg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false; - g->elcg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false; - g->elpg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false; - g->aelpg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false; - g->mscg_enabled = - nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false; - g->can_elpg = - nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false; - - __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG, - nvgpu_platform_is_silicon(g) ? platform->can_elcg : false); - __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG, - nvgpu_platform_is_silicon(g) ? platform->can_slcg : false); - __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG, - nvgpu_platform_is_silicon(g) ? platform->can_blcg : false); - - g->aggressive_sync_destroy = platform->aggressive_sync_destroy; - g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; - g->has_syncpoints = platform->has_syncpoints; -#ifdef CONFIG_NVGPU_SUPPORT_CDE - g->has_cde = platform->has_cde; -#endif - g->ptimer_src_freq = platform->ptimer_src_freq; - g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); - g->can_railgate = platform->can_railgate_init; - g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; - /* if default delay is not set, set default delay to 500msec */ - if (platform->railgate_delay_init) - g->railgate_delay = platform->railgate_delay_init; - else - g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT; - __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon); - - /* set default values to aelpg parameters */ - g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; - g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; - g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; - g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; - g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; - - __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm); -} - -static void nvgpu_init_vbios_vars(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos); - g->vbios_min_version = platform->vbios_min_version; -} - -static void nvgpu_init_ltc_vars(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - g->ltc_streamid = platform->ltc_streamid; -} - -static void nvgpu_init_mm_vars(struct gk20a *g) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); - - g->mm.disable_bigpage = platform->disable_bigpage; - __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, - platform->honors_aperture); - __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, - platform->unified_memory); - __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, - platform->unify_address_spaces); - - nvgpu_mutex_init(&g->mm.tlb_lock); - nvgpu_mutex_init(&g->mm.priv_lock); -} - -int nvgpu_probe(struct gk20a *g, - const char *debugfs_symlink, - const char *interface_name, - struct class *class) -{ - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev); - int err = 0; - - nvgpu_init_vars(g); - nvgpu_init_gr_vars(g); - nvgpu_init_timeout(g); - nvgpu_init_timeslice(g); - nvgpu_init_pm_vars(g); - nvgpu_init_vbios_vars(g); - nvgpu_init_ltc_vars(g); - err = nvgpu_init_soc_vars(g); - if (err) { - nvgpu_err(g, "init soc vars failed"); - return err; - } - - /* Initialize the platform interface. */ - err = platform->probe(dev); - if (err) { - if (err == -EPROBE_DEFER) - nvgpu_info(g, "platform probe failed"); - else - nvgpu_err(g, "platform probe failed"); - return err; - } - - nvgpu_init_mm_vars(g); - - /* platform probe can defer do user init only if probe succeeds */ - err = gk20a_user_init(dev, interface_name, class); - if (err) - return err; - - if (platform->late_probe) { - err = platform->late_probe(dev); - if (err) { - nvgpu_err(g, "late probe failed"); - return err; - } - } - - nvgpu_create_sysfs(dev); - gk20a_debug_init(g, debugfs_symlink); - - g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); - if (!g->dbg_regops_tmp_buf) { - nvgpu_err(g, "couldn't allocate regops tmp buf"); - return -ENOMEM; - } - g->dbg_regops_tmp_buf_ops = - SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); - - g->remove_support = gk20a_remove_support; - - nvgpu_ref_init(&g->refcount); - - return 0; -} - -/** - * cyclic_delta - Returns delta of cyclic integers a and b. - * - * @a - First integer - * @b - Second integer - * - * Note: if a is ahead of b, delta is positive. - */ -static int cyclic_delta(int a, int b) -{ - return a - b; -} - -/** - * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete - * - * @g - The GPU to wait on. - * - * Waits until all interrupt handlers that have been scheduled to run have - * completed. - */ -void nvgpu_wait_for_deferred_interrupts(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count); - int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count); - - /* wait until all stalling irqs are handled */ - NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq, - cyclic_delta(stall_irq_threshold, - atomic_read(&l->sw_irq_stall_last_handled)) - <= 0, 0); - - /* wait until all non-stalling irqs are handled */ - NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq, - cyclic_delta(nonstall_irq_threshold, - atomic_read(&l->sw_irq_nonstall_last_handled)) - <= 0, 0); -} - -static void nvgpu_free_gk20a(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - kfree(l); -} - -void nvgpu_init_gk20a(struct gk20a *g) -{ - g->free = nvgpu_free_gk20a; -} diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.h b/drivers/gpu/nvgpu/common/linux/driver_common.h deleted file mode 100644 index 6f42f775..00000000 --- a/drivers/gpu/nvgpu/common/linux/driver_common.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef NVGPU_LINUX_DRIVER_COMMON -#define NVGPU_LINUX_DRIVER_COMMON - -void nvgpu_init_gk20a(struct gk20a *g); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/dt.c b/drivers/gpu/nvgpu/common/linux/dt.c deleted file mode 100644 index 88e391e3..00000000 --- a/drivers/gpu/nvgpu/common/linux/dt.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "os_linux.h" - -int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name, - u32 index, u32 *value) -{ - struct device *dev = dev_from_gk20a(g); - struct device_node *np = dev->of_node; - - return of_property_read_u32_index(np, name, index, value); -} diff --git a/drivers/gpu/nvgpu/common/linux/firmware.c b/drivers/gpu/nvgpu/common/linux/firmware.c deleted file mode 100644 index 9a4dc653..00000000 --- a/drivers/gpu/nvgpu/common/linux/firmware.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "os_linux.h" - -static const struct firmware *do_request_firmware(struct device *dev, - const char *prefix, const char *fw_name, int flags) -{ - const struct firmware *fw; - char *fw_path = NULL; - int path_len, err; - - if (prefix) { - path_len = strlen(prefix) + strlen(fw_name); - path_len += 2; /* for the path separator and zero terminator*/ - - fw_path = nvgpu_kzalloc(get_gk20a(dev), - sizeof(*fw_path) * path_len); - if (!fw_path) - return NULL; - - sprintf(fw_path, "%s/%s", prefix, fw_name); - fw_name = fw_path; - } - - if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN) - err = request_firmware_direct(&fw, fw_name, dev); - else - err = request_firmware(&fw, fw_name, dev); - - nvgpu_kfree(get_gk20a(dev), fw_path); - if (err) - return NULL; - return fw; -} - -/* This is a simple wrapper around request_firmware that takes 'fw_name' and - * applies an IP specific relative path prefix to it. The caller is - * responsible for calling nvgpu_release_firmware later. */ -struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g, - const char *fw_name, - int flags) -{ - struct device *dev = dev_from_gk20a(g); - struct nvgpu_firmware *fw; - const struct firmware *linux_fw; - - /* current->fs is NULL when calling from SYS_EXIT. - Add a check here to prevent crash in request_firmware */ - if (!current->fs || !fw_name) - return NULL; - - fw = nvgpu_kzalloc(g, sizeof(*fw)); - if (!fw) - return NULL; - - linux_fw = do_request_firmware(dev, g->name, fw_name, flags); - -#ifdef CONFIG_TEGRA_GK20A - /* TO BE REMOVED - Support loading from legacy SOC specific path. */ - if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) { - struct gk20a_platform *platform = gk20a_get_platform(dev); - linux_fw = do_request_firmware(dev, - platform->soc_name, fw_name, flags); - } -#endif - - if (!linux_fw) - goto err; - - fw->data = nvgpu_kmalloc(g, linux_fw->size); - if (!fw->data) - goto err_release; - - memcpy(fw->data, linux_fw->data, linux_fw->size); - fw->size = linux_fw->size; - - release_firmware(linux_fw); - - return fw; - -err_release: - release_firmware(linux_fw); -err: - nvgpu_kfree(g, fw); - return NULL; -} - -void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw) -{ - if(!fw) - return; - - nvgpu_kfree(g, fw->data); - nvgpu_kfree(g, fw); -} diff --git a/drivers/gpu/nvgpu/common/linux/fuse.c b/drivers/gpu/nvgpu/common/linux/fuse.c deleted file mode 100644 index 27851f92..00000000 --- a/drivers/gpu/nvgpu/common/linux/fuse.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include - -#include - -int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g) -{ - return tegra_sku_info.gpu_speedo_id; -} - -/* - * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100 - * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100 - */ -void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val) -{ - tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0); -} - -void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val) -{ - tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0); -} - -void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val) -{ - tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0); -} - -void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val) -{ - tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0); -} - -int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val) -{ - return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val); -} - -int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val) -{ - return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val); -} diff --git a/drivers/gpu/nvgpu/common/linux/intr.c b/drivers/gpu/nvgpu/common/linux/intr.c deleted file mode 100644 index 7ffc7e87..00000000 --- a/drivers/gpu/nvgpu/common/linux/intr.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/mc_gk20a.h" - -#include -#include -#include "os_linux.h" - -irqreturn_t nvgpu_intr_stall(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 mc_intr_0; - - trace_mc_gk20a_intr_stall(g->name); - - if (!g->power_on) - return IRQ_NONE; - - /* not from gpu when sharing irq with others */ - mc_intr_0 = g->ops.mc.intr_stall(g); - if (unlikely(!mc_intr_0)) - return IRQ_NONE; - - g->ops.mc.intr_stall_pause(g); - - atomic_inc(&l->hw_irq_stall_count); - - trace_mc_gk20a_intr_stall_done(g->name); - - return IRQ_WAKE_THREAD; -} - -irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int hw_irq_count; - - nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched"); - - trace_mc_gk20a_intr_thread_stall(g->name); - - hw_irq_count = atomic_read(&l->hw_irq_stall_count); - g->ops.mc.isr_stall(g); - g->ops.mc.intr_stall_resume(g); - /* sync handled irq counter before re-enabling interrupts */ - atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count); - - nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq); - - trace_mc_gk20a_intr_thread_stall_done(g->name); - - return IRQ_HANDLED; -} - -irqreturn_t nvgpu_intr_nonstall(struct gk20a *g) -{ - u32 non_stall_intr_val; - u32 hw_irq_count; - int ops_old, ops_new, ops = 0; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (!g->power_on) - return IRQ_NONE; - - /* not from gpu when sharing irq with others */ - non_stall_intr_val = g->ops.mc.intr_nonstall(g); - if (unlikely(!non_stall_intr_val)) - return IRQ_NONE; - - g->ops.mc.intr_nonstall_pause(g); - - ops = g->ops.mc.isr_nonstall(g); - if (ops) { - do { - ops_old = atomic_read(&l->nonstall_ops); - ops_new = ops_old | ops; - } while (ops_old != atomic_cmpxchg(&l->nonstall_ops, - ops_old, ops_new)); - - queue_work(l->nonstall_work_queue, &l->nonstall_fn_work); - } - - hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count); - - /* sync handled irq counter before re-enabling interrupts */ - atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count); - - g->ops.mc.intr_nonstall_resume(g); - - nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq); - - return IRQ_HANDLED; -} - -void nvgpu_intr_nonstall_cb(struct work_struct *work) -{ - struct nvgpu_os_linux *l = - container_of(work, struct nvgpu_os_linux, nonstall_fn_work); - struct gk20a *g = &l->g; - - do { - u32 ops; - - ops = atomic_xchg(&l->nonstall_ops, 0); - mc_gk20a_handle_intr_nonstall(g, ops); - } while (atomic_read(&l->nonstall_ops) != 0); -} diff --git a/drivers/gpu/nvgpu/common/linux/intr.h b/drivers/gpu/nvgpu/common/linux/intr.h deleted file mode 100644 index d43cdccb..00000000 --- a/drivers/gpu/nvgpu/common/linux/intr.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __NVGPU_LINUX_INTR_H__ -#define __NVGPU_LINUX_INTR_H__ -struct gk20a; - -irqreturn_t nvgpu_intr_stall(struct gk20a *g); -irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g); -irqreturn_t nvgpu_intr_nonstall(struct gk20a *g); -void nvgpu_intr_nonstall_cb(struct work_struct *work); -#endif diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c deleted file mode 100644 index c06512a5..00000000 --- a/drivers/gpu/nvgpu/common/linux/io.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include - -#include "os_linux.h" -#include "gk20a/gk20a.h" - -void nvgpu_writel(struct gk20a *g, u32 r, u32 v) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (unlikely(!l->regs)) { - __gk20a_warn_on_no_regs(); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); - } else { - writel_relaxed(v, l->regs + r); - nvgpu_wmb(); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); - } -} - -u32 nvgpu_readl(struct gk20a *g, u32 r) -{ - u32 v = __nvgpu_readl(g, r); - - if (v == 0xffffffff) - __nvgpu_check_gpu_state(g); - - return v; -} - -u32 __nvgpu_readl(struct gk20a *g, u32 r) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 v = 0xffffffff; - - if (unlikely(!l->regs)) { - __gk20a_warn_on_no_regs(); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); - } else { - v = readl(l->regs + r); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); - } - - return v; -} - -void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (unlikely(!l->regs)) { - __gk20a_warn_on_no_regs(); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); - } else { - nvgpu_wmb(); - do { - writel_relaxed(v, l->regs + r); - } while (readl(l->regs + r) != v); - nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); - } -} - -void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (unlikely(!l->bar1)) { - __gk20a_warn_on_no_regs(); - nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); - } else { - nvgpu_wmb(); - writel_relaxed(v, l->bar1 + b); - nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); - } -} - -u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 v = 0xffffffff; - - if (unlikely(!l->bar1)) { - __gk20a_warn_on_no_regs(); - nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); - } else { - v = readl(l->bar1 + b); - nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); - } - - return v; -} - -bool nvgpu_io_exists(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - return l->regs != NULL; -} - -bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - return r < resource_size(l->regs); -} diff --git a/drivers/gpu/nvgpu/common/linux/io_usermode.c b/drivers/gpu/nvgpu/common/linux/io_usermode.c deleted file mode 100644 index a7b728dd..00000000 --- a/drivers/gpu/nvgpu/common/linux/io_usermode.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include - -#include "common/linux/os_linux.h" -#include "gk20a/gk20a.h" - -#include - -void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r()); - - writel_relaxed(v, reg); - nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v); -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.c b/drivers/gpu/nvgpu/common/linux/ioctl.c deleted file mode 100644 index 359e5103..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * NVGPU IOCTLs - * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/dbg_gpu_gk20a.h" - -#include "ioctl_channel.h" -#include "ioctl_ctrl.h" -#include "ioctl_as.h" -#include "ioctl_tsg.h" -#include "ioctl_dbg.h" -#include "module.h" -#include "os_linux.h" -#include "ctxsw_trace.h" -#include "platform_gk20a.h" - -#define GK20A_NUM_CDEVS 7 - -const struct file_operations gk20a_channel_ops = { - .owner = THIS_MODULE, - .release = gk20a_channel_release, - .open = gk20a_channel_open, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_channel_ioctl, -#endif - .unlocked_ioctl = gk20a_channel_ioctl, -}; - -static const struct file_operations gk20a_ctrl_ops = { - .owner = THIS_MODULE, - .release = gk20a_ctrl_dev_release, - .open = gk20a_ctrl_dev_open, - .unlocked_ioctl = gk20a_ctrl_dev_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_ctrl_dev_ioctl, -#endif -}; - -static const struct file_operations gk20a_dbg_ops = { - .owner = THIS_MODULE, - .release = gk20a_dbg_gpu_dev_release, - .open = gk20a_dbg_gpu_dev_open, - .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, - .poll = gk20a_dbg_gpu_dev_poll, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, -#endif -}; - -static const struct file_operations gk20a_as_ops = { - .owner = THIS_MODULE, - .release = gk20a_as_dev_release, - .open = gk20a_as_dev_open, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_as_dev_ioctl, -#endif - .unlocked_ioctl = gk20a_as_dev_ioctl, -}; - -/* - * Note: We use a different 'open' to trigger handling of the profiler session. - * Most of the code is shared between them... Though, at some point if the - * code does get too tangled trying to handle each in the same path we can - * separate them cleanly. - */ -static const struct file_operations gk20a_prof_ops = { - .owner = THIS_MODULE, - .release = gk20a_dbg_gpu_dev_release, - .open = gk20a_prof_gpu_dev_open, - .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, -#endif -}; - -static const struct file_operations gk20a_tsg_ops = { - .owner = THIS_MODULE, - .release = nvgpu_ioctl_tsg_dev_release, - .open = nvgpu_ioctl_tsg_dev_open, -#ifdef CONFIG_COMPAT - .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl, -#endif - .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl, -}; - -#ifdef CONFIG_GK20A_CTXSW_TRACE -static const struct file_operations gk20a_ctxsw_ops = { - .owner = THIS_MODULE, - .release = gk20a_ctxsw_dev_release, - .open = gk20a_ctxsw_dev_open, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_ctxsw_dev_ioctl, -#endif - .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, - .poll = gk20a_ctxsw_dev_poll, - .read = gk20a_ctxsw_dev_read, - .mmap = gk20a_ctxsw_dev_mmap, -}; -#endif - -static const struct file_operations gk20a_sched_ops = { - .owner = THIS_MODULE, - .release = gk20a_sched_dev_release, - .open = gk20a_sched_dev_open, -#ifdef CONFIG_COMPAT - .compat_ioctl = gk20a_sched_dev_ioctl, -#endif - .unlocked_ioctl = gk20a_sched_dev_ioctl, - .poll = gk20a_sched_dev_poll, - .read = gk20a_sched_dev_read, -}; - -static int gk20a_create_device( - struct device *dev, int devno, - const char *interface_name, const char *cdev_name, - struct cdev *cdev, struct device **out, - const struct file_operations *ops, - struct class *class) -{ - struct device *subdev; - int err; - struct gk20a *g = gk20a_from_dev(dev); - - nvgpu_log_fn(g, " "); - - cdev_init(cdev, ops); - cdev->owner = THIS_MODULE; - - err = cdev_add(cdev, devno, 1); - if (err) { - dev_err(dev, "failed to add %s cdev\n", cdev_name); - return err; - } - - subdev = device_create(class, NULL, devno, NULL, - interface_name, cdev_name); - - if (IS_ERR(subdev)) { - err = PTR_ERR(dev); - cdev_del(cdev); - dev_err(dev, "failed to create %s device for %s\n", - cdev_name, dev_name(dev)); - return err; - } - - *out = subdev; - return 0; -} - -void gk20a_user_deinit(struct device *dev, struct class *class) -{ - struct gk20a *g = gk20a_from_dev(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (l->channel.node) { - device_destroy(class, l->channel.cdev.dev); - cdev_del(&l->channel.cdev); - } - - if (l->as_dev.node) { - device_destroy(class, l->as_dev.cdev.dev); - cdev_del(&l->as_dev.cdev); - } - - if (l->ctrl.node) { - device_destroy(class, l->ctrl.cdev.dev); - cdev_del(&l->ctrl.cdev); - } - - if (l->dbg.node) { - device_destroy(class, l->dbg.cdev.dev); - cdev_del(&l->dbg.cdev); - } - - if (l->prof.node) { - device_destroy(class, l->prof.cdev.dev); - cdev_del(&l->prof.cdev); - } - - if (l->tsg.node) { - device_destroy(class, l->tsg.cdev.dev); - cdev_del(&l->tsg.cdev); - } - - if (l->ctxsw.node) { - device_destroy(class, l->ctxsw.cdev.dev); - cdev_del(&l->ctxsw.cdev); - } - - if (l->sched.node) { - device_destroy(class, l->sched.cdev.dev); - cdev_del(&l->sched.cdev); - } - - if (l->cdev_region) - unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS); -} - -int gk20a_user_init(struct device *dev, const char *interface_name, - struct class *class) -{ - int err; - dev_t devno; - struct gk20a *g = gk20a_from_dev(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev)); - if (err) { - dev_err(dev, "failed to allocate devno\n"); - goto fail; - } - l->cdev_region = devno; - - err = gk20a_create_device(dev, devno++, interface_name, "", - &l->channel.cdev, &l->channel.node, - &gk20a_channel_ops, - class); - if (err) - goto fail; - - err = gk20a_create_device(dev, devno++, interface_name, "-as", - &l->as_dev.cdev, &l->as_dev.node, - &gk20a_as_ops, - class); - if (err) - goto fail; - - err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", - &l->ctrl.cdev, &l->ctrl.node, - &gk20a_ctrl_ops, - class); - if (err) - goto fail; - - err = gk20a_create_device(dev, devno++, interface_name, "-dbg", - &l->dbg.cdev, &l->dbg.node, - &gk20a_dbg_ops, - class); - if (err) - goto fail; - - err = gk20a_create_device(dev, devno++, interface_name, "-prof", - &l->prof.cdev, &l->prof.node, - &gk20a_prof_ops, - class); - if (err) - goto fail; - - err = gk20a_create_device(dev, devno++, interface_name, "-tsg", - &l->tsg.cdev, &l->tsg.node, - &gk20a_tsg_ops, - class); - if (err) - goto fail; - -#if defined(CONFIG_GK20A_CTXSW_TRACE) - err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", - &l->ctxsw.cdev, &l->ctxsw.node, - &gk20a_ctxsw_ops, - class); - if (err) - goto fail; -#endif - - err = gk20a_create_device(dev, devno++, interface_name, "-sched", - &l->sched.cdev, &l->sched.node, - &gk20a_sched_ops, - class); - if (err) - goto fail; - - return 0; -fail: - gk20a_user_deinit(dev, &nvgpu_class); - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl.h b/drivers/gpu/nvgpu/common/linux/ioctl.h deleted file mode 100644 index 7bf16711..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#ifndef __NVGPU_IOCTL_H__ -#define __NVGPU_IOCTL_H__ - -struct device; -struct class; - -int gk20a_user_init(struct device *dev, const char *interface_name, - struct class *class); -void gk20a_user_deinit(struct device *dev, struct class *class); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c deleted file mode 100644 index 47f612cc..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * GK20A Address Spaces - * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include - -#include - -#include - -#include -#include -#include - -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "ioctl_as.h" -#include "os_linux.h" - -static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) -{ - u32 core_flags = 0; - - if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) - core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET; - if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) - core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE; - - return core_flags; -} - -static int gk20a_as_ioctl_bind_channel( - struct gk20a_as_share *as_share, - struct nvgpu_as_bind_channel_args *args) -{ - int err = 0; - struct channel_gk20a *ch; - struct gk20a *g = gk20a_from_vm(as_share->vm); - - nvgpu_log_fn(g, " "); - - ch = gk20a_get_channel_from_file(args->channel_fd); - if (!ch) - return -EINVAL; - - if (gk20a_channel_as_bound(ch)) { - err = -EINVAL; - goto out; - } - - /* this will set channel_gk20a->vm */ - err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch); - -out: - gk20a_channel_put(ch); - return err; -} - -static int gk20a_as_ioctl_alloc_space( - struct gk20a_as_share *as_share, - struct nvgpu_as_alloc_space_args *args) -{ - struct gk20a *g = gk20a_from_vm(as_share->vm); - - nvgpu_log_fn(g, " "); - return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size, - &args->o_a.offset, - gk20a_as_translate_as_alloc_space_flags(g, - args->flags)); -} - -static int gk20a_as_ioctl_free_space( - struct gk20a_as_share *as_share, - struct nvgpu_as_free_space_args *args) -{ - struct gk20a *g = gk20a_from_vm(as_share->vm); - - nvgpu_log_fn(g, " "); - return nvgpu_vm_area_free(as_share->vm, args->offset); -} - -static int gk20a_as_ioctl_map_buffer_ex( - struct gk20a_as_share *as_share, - struct nvgpu_as_map_buffer_ex_args *args) -{ - struct gk20a *g = gk20a_from_vm(as_share->vm); - - nvgpu_log_fn(g, " "); - - /* unsupported, direct kind control must be used */ - if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) { - struct gk20a *g = as_share->vm->mm->g; - nvgpu_log_info(g, "Direct kind control must be requested"); - return -EINVAL; - } - - return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, - &args->offset, args->flags, - args->compr_kind, - args->incompr_kind, - args->buffer_offset, - args->mapping_size, - NULL); -} - -static int gk20a_as_ioctl_unmap_buffer( - struct gk20a_as_share *as_share, - struct nvgpu_as_unmap_buffer_args *args) -{ - struct gk20a *g = gk20a_from_vm(as_share->vm); - - nvgpu_log_fn(g, " "); - - nvgpu_vm_unmap(as_share->vm, args->offset, NULL); - - return 0; -} - -static int gk20a_as_ioctl_map_buffer_batch( - struct gk20a_as_share *as_share, - struct nvgpu_as_map_buffer_batch_args *args) -{ - struct gk20a *g = gk20a_from_vm(as_share->vm); - u32 i; - int err = 0; - - struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = - (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) - args->unmaps; - struct nvgpu_as_map_buffer_ex_args __user *user_map_args = - (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) - args->maps; - - struct vm_gk20a_mapping_batch batch; - - nvgpu_log_fn(g, " "); - - if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT || - args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT) - return -EINVAL; - - nvgpu_vm_mapping_batch_start(&batch); - - for (i = 0; i < args->num_unmaps; ++i) { - struct nvgpu_as_unmap_buffer_args unmap_args; - - if (copy_from_user(&unmap_args, &user_unmap_args[i], - sizeof(unmap_args))) { - err = -EFAULT; - break; - } - - nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); - } - - if (err) { - nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); - - args->num_unmaps = i; - args->num_maps = 0; - return err; - } - - for (i = 0; i < args->num_maps; ++i) { - s16 compressible_kind; - s16 incompressible_kind; - - struct nvgpu_as_map_buffer_ex_args map_args; - memset(&map_args, 0, sizeof(map_args)); - - if (copy_from_user(&map_args, &user_map_args[i], - sizeof(map_args))) { - err = -EFAULT; - break; - } - - if (map_args.flags & - NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { - compressible_kind = map_args.compr_kind; - incompressible_kind = map_args.incompr_kind; - } else { - /* direct kind control must be used */ - err = -EINVAL; - break; - } - - err = nvgpu_vm_map_buffer( - as_share->vm, map_args.dmabuf_fd, - &map_args.offset, map_args.flags, - compressible_kind, incompressible_kind, - map_args.buffer_offset, - map_args.mapping_size, - &batch); - if (err) - break; - } - - nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); - - if (err) - args->num_maps = i; - /* note: args->num_unmaps will be unmodified, which is ok - * since all unmaps are done */ - - return err; -} - -static int gk20a_as_ioctl_get_va_regions( - struct gk20a_as_share *as_share, - struct nvgpu_as_get_va_regions_args *args) -{ - unsigned int i; - unsigned int write_entries; - struct nvgpu_as_va_region __user *user_region_ptr; - struct vm_gk20a *vm = as_share->vm; - struct gk20a *g = gk20a_from_vm(vm); - unsigned int page_sizes = gmmu_page_size_kernel; - - nvgpu_log_fn(g, " "); - - if (!vm->big_pages) - page_sizes--; - - write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); - if (write_entries > page_sizes) - write_entries = page_sizes; - - user_region_ptr = - (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; - - for (i = 0; i < write_entries; ++i) { - struct nvgpu_as_va_region region; - struct nvgpu_allocator *vma = vm->vma[i]; - - memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); - - region.page_size = vm->gmmu_page_sizes[i]; - region.offset = nvgpu_alloc_base(vma); - /* No __aeabi_uldivmod() on some platforms... */ - region.pages = (nvgpu_alloc_end(vma) - - nvgpu_alloc_base(vma)) >> ilog2(region.page_size); - - if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) - return -EFAULT; - } - - args->buf_size = - page_sizes * sizeof(struct nvgpu_as_va_region); - - return 0; -} - -static int nvgpu_as_ioctl_get_sync_ro_map( - struct gk20a_as_share *as_share, - struct nvgpu_as_get_sync_ro_map_args *args) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct vm_gk20a *vm = as_share->vm; - struct gk20a *g = gk20a_from_vm(vm); - u64 base_gpuva; - u32 sync_size; - int err = 0; - - if (!g->ops.fifo.get_sync_ro_map) - return -EINVAL; - - if (!gk20a_platform_has_syncpoints(g)) - return -EINVAL; - - err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size); - if (err) - return err; - - args->base_gpuva = base_gpuva; - args->sync_size = sync_size; - - return err; -#else - return -EINVAL; -#endif -} - -int gk20a_as_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l; - struct gk20a_as_share *as_share; - struct gk20a *g; - int err; - - l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev); - g = &l->g; - - nvgpu_log_fn(g, " "); - - err = gk20a_as_alloc_share(g, 0, 0, &as_share); - if (err) { - nvgpu_log_fn(g, "failed to alloc share"); - return err; - } - - filp->private_data = as_share; - return 0; -} - -int gk20a_as_dev_release(struct inode *inode, struct file *filp) -{ - struct gk20a_as_share *as_share = filp->private_data; - - if (!as_share) - return 0; - - return gk20a_as_release_share(as_share); -} - -long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - int err = 0; - struct gk20a_as_share *as_share = filp->private_data; - struct gk20a *g = gk20a_from_as(as_share->as); - - u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE]; - - nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - err = gk20a_busy(g); - if (err) - return err; - - switch (cmd) { - case NVGPU_AS_IOCTL_BIND_CHANNEL: - trace_gk20a_as_ioctl_bind_channel(g->name); - err = gk20a_as_ioctl_bind_channel(as_share, - (struct nvgpu_as_bind_channel_args *)buf); - - break; - case NVGPU32_AS_IOCTL_ALLOC_SPACE: - { - struct nvgpu32_as_alloc_space_args *args32 = - (struct nvgpu32_as_alloc_space_args *)buf; - struct nvgpu_as_alloc_space_args args; - - args.pages = args32->pages; - args.page_size = args32->page_size; - args.flags = args32->flags; - args.o_a.offset = args32->o_a.offset; - trace_gk20a_as_ioctl_alloc_space(g->name); - err = gk20a_as_ioctl_alloc_space(as_share, &args); - args32->o_a.offset = args.o_a.offset; - break; - } - case NVGPU_AS_IOCTL_ALLOC_SPACE: - trace_gk20a_as_ioctl_alloc_space(g->name); - err = gk20a_as_ioctl_alloc_space(as_share, - (struct nvgpu_as_alloc_space_args *)buf); - break; - case NVGPU_AS_IOCTL_FREE_SPACE: - trace_gk20a_as_ioctl_free_space(g->name); - err = gk20a_as_ioctl_free_space(as_share, - (struct nvgpu_as_free_space_args *)buf); - break; - case NVGPU_AS_IOCTL_MAP_BUFFER_EX: - trace_gk20a_as_ioctl_map_buffer(g->name); - err = gk20a_as_ioctl_map_buffer_ex(as_share, - (struct nvgpu_as_map_buffer_ex_args *)buf); - break; - case NVGPU_AS_IOCTL_UNMAP_BUFFER: - trace_gk20a_as_ioctl_unmap_buffer(g->name); - err = gk20a_as_ioctl_unmap_buffer(as_share, - (struct nvgpu_as_unmap_buffer_args *)buf); - break; - case NVGPU_AS_IOCTL_GET_VA_REGIONS: - trace_gk20a_as_ioctl_get_va_regions(g->name); - err = gk20a_as_ioctl_get_va_regions(as_share, - (struct nvgpu_as_get_va_regions_args *)buf); - break; - case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: - err = gk20a_as_ioctl_map_buffer_batch(as_share, - (struct nvgpu_as_map_buffer_batch_args *)buf); - break; - case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP: - err = nvgpu_as_ioctl_get_sync_ro_map(as_share, - (struct nvgpu_as_get_sync_ro_map_args *)buf); - break; - default: - err = -ENOTTY; - break; - } - - gk20a_idle(g); - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) - err = -EFAULT; - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.h b/drivers/gpu/nvgpu/common/linux/ioctl_as.h deleted file mode 100644 index b3de3782..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * GK20A Address Spaces - * - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#ifndef __NVGPU_COMMON_LINUX_AS_H__ -#define __NVGPU_COMMON_LINUX_AS_H__ - -struct inode; -struct file; - -/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and - * num_maps */ -#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256 - -/* struct file_operations driver interface */ -int gk20a_as_dev_open(struct inode *inode, struct file *filp); -int gk20a_as_dev_release(struct inode *inode, struct file *filp); -long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c deleted file mode 100644 index b04bb9de..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ /dev/null @@ -1,1388 +0,0 @@ -/* - * GK20A Graphics channel - * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/dbg_gpu_gk20a.h" -#include "gk20a/fence_gk20a.h" - -#include "platform_gk20a.h" -#include "ioctl_channel.h" -#include "channel.h" -#include "os_linux.h" -#include "ctxsw_trace.h" - -/* the minimal size of client buffer */ -#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ - (sizeof(struct gk20a_cs_snapshot_fifo) + \ - sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) - -static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) -{ - switch (graphics_preempt_mode) { - case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: - return "WFI"; - default: - return "?"; - } -} - -static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) -{ - switch (compute_preempt_mode) { - case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: - return "WFI"; - case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: - return "CTA"; - default: - return "?"; - } -} - -static void gk20a_channel_trace_sched_param( - void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, - u32 timeout, const char *interleave, - const char *graphics_preempt_mode, - const char *compute_preempt_mode), - struct channel_gk20a *ch) -{ - struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); - - if (!tsg) - return; - - (trace)(ch->chid, ch->tsgid, ch->pid, - tsg_gk20a_from_ch(ch)->timeslice_us, - ch->timeout_ms_max, - gk20a_fifo_interleave_level_name(tsg->interleave_level), - gr_gk20a_graphics_preempt_mode_name( - tsg->gr_ctx.graphics_preempt_mode), - gr_gk20a_compute_preempt_mode_name( - tsg->gr_ctx.compute_preempt_mode)); -} - -/* - * Although channels do have pointers back to the gk20a struct that they were - * created under in cases where the driver is killed that pointer can be bad. - * The channel memory can be freed before the release() function for a given - * channel is called. This happens when the driver dies and userspace doesn't - * get a chance to call release() until after the entire gk20a driver data is - * unloaded and freed. - */ -struct channel_priv { - struct gk20a *g; - struct channel_gk20a *c; -}; - -#if defined(CONFIG_GK20A_CYCLE_STATS) - -void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - /* disable existing cyclestats buffer */ - nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); - if (priv->cyclestate_buffer_handler) { - dma_buf_vunmap(priv->cyclestate_buffer_handler, - ch->cyclestate.cyclestate_buffer); - dma_buf_put(priv->cyclestate_buffer_handler); - priv->cyclestate_buffer_handler = NULL; - ch->cyclestate.cyclestate_buffer = NULL; - ch->cyclestate.cyclestate_buffer_size = 0; - } - nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); -} - -static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, - struct nvgpu_cycle_stats_args *args) -{ - struct dma_buf *dmabuf; - void *virtual_address; - struct nvgpu_channel_linux *priv = ch->os_priv; - - /* is it allowed to handle calls for current GPU? */ - if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) - return -ENOSYS; - - if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) { - - /* set up new cyclestats buffer */ - dmabuf = dma_buf_get(args->dmabuf_fd); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - virtual_address = dma_buf_vmap(dmabuf); - if (!virtual_address) - return -ENOMEM; - - priv->cyclestate_buffer_handler = dmabuf; - ch->cyclestate.cyclestate_buffer = virtual_address; - ch->cyclestate.cyclestate_buffer_size = dmabuf->size; - return 0; - - } else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) { - gk20a_channel_free_cycle_stats_buffer(ch); - return 0; - - } else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) { - /* no requst from GL */ - return 0; - - } else { - pr_err("channel already has cyclestats buffer\n"); - return -EINVAL; - } -} - -static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) -{ - int ret; - - nvgpu_mutex_acquire(&ch->cs_client_mutex); - if (ch->cs_client) - ret = gr_gk20a_css_flush(ch, ch->cs_client); - else - ret = -EBADF; - nvgpu_mutex_release(&ch->cs_client_mutex); - - return ret; -} - -static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, - u32 dmabuf_fd, - u32 perfmon_id_count, - u32 *perfmon_id_start) -{ - int ret = 0; - struct gk20a *g = ch->g; - struct gk20a_cs_snapshot_client_linux *client_linux; - struct gk20a_cs_snapshot_client *client; - - nvgpu_mutex_acquire(&ch->cs_client_mutex); - if (ch->cs_client) { - nvgpu_mutex_release(&ch->cs_client_mutex); - return -EEXIST; - } - - client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); - if (!client_linux) { - ret = -ENOMEM; - goto err; - } - - client_linux->dmabuf_fd = dmabuf_fd; - client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); - if (IS_ERR(client_linux->dma_handler)) { - ret = PTR_ERR(client_linux->dma_handler); - client_linux->dma_handler = NULL; - goto err_free; - } - - client = &client_linux->cs_client; - client->snapshot_size = client_linux->dma_handler->size; - if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { - ret = -ENOMEM; - goto err_put; - } - - client->snapshot = (struct gk20a_cs_snapshot_fifo *) - dma_buf_vmap(client_linux->dma_handler); - if (!client->snapshot) { - ret = -ENOMEM; - goto err_put; - } - - ch->cs_client = client; - - ret = gr_gk20a_css_attach(ch, - perfmon_id_count, - perfmon_id_start, - ch->cs_client); - - nvgpu_mutex_release(&ch->cs_client_mutex); - - return ret; - -err_put: - dma_buf_put(client_linux->dma_handler); -err_free: - nvgpu_kfree(g, client_linux); -err: - nvgpu_mutex_release(&ch->cs_client_mutex); - return ret; -} - -int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) -{ - int ret; - struct gk20a_cs_snapshot_client_linux *client_linux; - - nvgpu_mutex_acquire(&ch->cs_client_mutex); - if (!ch->cs_client) { - nvgpu_mutex_release(&ch->cs_client_mutex); - return 0; - } - - client_linux = container_of(ch->cs_client, - struct gk20a_cs_snapshot_client_linux, - cs_client); - - ret = gr_gk20a_css_detach(ch, ch->cs_client); - - if (client_linux->dma_handler) { - if (ch->cs_client->snapshot) - dma_buf_vunmap(client_linux->dma_handler, - ch->cs_client->snapshot); - dma_buf_put(client_linux->dma_handler); - } - - ch->cs_client = NULL; - nvgpu_kfree(ch->g, client_linux); - - nvgpu_mutex_release(&ch->cs_client_mutex); - - return ret; -} - -static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, - struct nvgpu_cycle_stats_snapshot_args *args) -{ - int ret; - - /* is it allowed to handle calls for current GPU? */ - if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) - return -ENOSYS; - - if (!args->dmabuf_fd) - return -EINVAL; - - /* handle the command (most frequent cases first) */ - switch (args->cmd) { - case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: - ret = gk20a_flush_cycle_stats_snapshot(ch); - args->extra = 0; - break; - - case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: - ret = gk20a_attach_cycle_stats_snapshot(ch, - args->dmabuf_fd, - args->extra, - &args->extra); - break; - - case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: - ret = gk20a_channel_free_cycle_stats_snapshot(ch); - args->extra = 0; - break; - - default: - pr_err("cyclestats: unknown command %u\n", args->cmd); - ret = -EINVAL; - break; - } - - return ret; -} -#endif - -static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, - struct nvgpu_channel_wdt_args *args) -{ - u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | - NVGPU_IOCTL_CHANNEL_ENABLE_WDT); - - if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) - ch->timeout.enabled = false; - else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) - ch->timeout.enabled = true; - else - return -EINVAL; - - if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) - ch->timeout.limit_ms = args->timeout_ms; - - ch->timeout.debug_dump = (args->wdt_status & - NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; - - return 0; -} - -static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) -{ - struct nvgpu_channel_linux *priv = ch->os_priv; - - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - if (priv->error_notifier.dmabuf) { - dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); - dma_buf_put(priv->error_notifier.dmabuf); - priv->error_notifier.dmabuf = NULL; - priv->error_notifier.notification = NULL; - priv->error_notifier.vaddr = NULL; - } - nvgpu_mutex_release(&priv->error_notifier.mutex); -} - -static int gk20a_init_error_notifier(struct channel_gk20a *ch, - struct nvgpu_set_error_notifier *args) -{ - struct dma_buf *dmabuf; - void *va; - u64 end = args->offset + sizeof(struct nvgpu_notification); - struct nvgpu_channel_linux *priv = ch->os_priv; - - if (!args->mem) { - pr_err("gk20a_init_error_notifier: invalid memory handle\n"); - return -EINVAL; - } - - dmabuf = dma_buf_get(args->mem); - - gk20a_channel_free_error_notifiers(ch); - - if (IS_ERR(dmabuf)) { - pr_err("Invalid handle: %d\n", args->mem); - return -EINVAL; - } - - if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { - dma_buf_put(dmabuf); - nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); - return -EINVAL; - } - - nvgpu_speculation_barrier(); - - /* map handle */ - va = dma_buf_vmap(dmabuf); - if (!va) { - dma_buf_put(dmabuf); - pr_err("Cannot map notifier handle\n"); - return -ENOMEM; - } - - priv->error_notifier.notification = va + args->offset; - priv->error_notifier.vaddr = va; - memset(priv->error_notifier.notification, 0, - sizeof(struct nvgpu_notification)); - - /* set channel notifiers pointer */ - nvgpu_mutex_acquire(&priv->error_notifier.mutex); - priv->error_notifier.dmabuf = dmabuf; - nvgpu_mutex_release(&priv->error_notifier.mutex); - - return 0; -} - -/* - * This returns the channel with a reference. The caller must - * gk20a_channel_put() the ref back after use. - * - * NULL is returned if the channel was not found. - */ -struct channel_gk20a *gk20a_get_channel_from_file(int fd) -{ - struct channel_gk20a *ch; - struct channel_priv *priv; - struct file *f = fget(fd); - - if (!f) - return NULL; - - if (f->f_op != &gk20a_channel_ops) { - fput(f); - return NULL; - } - - priv = (struct channel_priv *)f->private_data; - ch = gk20a_channel_get(priv->c); - fput(f); - return ch; -} - -int gk20a_channel_release(struct inode *inode, struct file *filp) -{ - struct channel_priv *priv = filp->private_data; - struct channel_gk20a *ch; - struct gk20a *g; - - int err; - - /* We could still end up here even if the channel_open failed, e.g. - * if we ran out of hw channel IDs. - */ - if (!priv) - return 0; - - ch = priv->c; - g = priv->g; - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to release a channel!"); - goto channel_release; - } - - trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); - - gk20a_channel_close(ch); - gk20a_channel_free_error_notifiers(ch); - - gk20a_idle(g); - -channel_release: - gk20a_put(g); - nvgpu_kfree(g, filp->private_data); - filp->private_data = NULL; - return 0; -} - -/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ -static int __gk20a_channel_open(struct gk20a *g, - struct file *filp, s32 runlist_id) -{ - int err; - struct channel_gk20a *ch; - struct channel_priv *priv; - - nvgpu_log_fn(g, " "); - - g = gk20a_get(g); - if (!g) - return -ENODEV; - - trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); - - priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) { - err = -ENOMEM; - goto free_ref; - } - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on, %d", err); - goto fail_busy; - } - /* All the user space channel should be non privilege */ - ch = gk20a_open_new_channel(g, runlist_id, false, - nvgpu_current_pid(g), nvgpu_current_tid(g)); - gk20a_idle(g); - if (!ch) { - nvgpu_err(g, - "failed to get f"); - err = -ENOMEM; - goto fail_busy; - } - - gk20a_channel_trace_sched_param( - trace_gk20a_channel_sched_defaults, ch); - - priv->g = g; - priv->c = ch; - - filp->private_data = priv; - return 0; - -fail_busy: - nvgpu_kfree(g, priv); -free_ref: - gk20a_put(g); - return err; -} - -int gk20a_channel_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l = container_of(inode->i_cdev, - struct nvgpu_os_linux, channel.cdev); - struct gk20a *g = &l->g; - int ret; - - nvgpu_log_fn(g, "start"); - ret = __gk20a_channel_open(g, filp, -1); - - nvgpu_log_fn(g, "end"); - return ret; -} - -int gk20a_channel_open_ioctl(struct gk20a *g, - struct nvgpu_channel_open_args *args) -{ - int err; - int fd; - struct file *file; - char name[64]; - s32 runlist_id = args->in.runlist_id; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - err = get_unused_fd_flags(O_RDWR); - if (err < 0) - return err; - fd = err; - - snprintf(name, sizeof(name), "nvhost-%s-fd%d", - dev_name(dev_from_gk20a(g)), fd); - - file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto clean_up; - } - - err = __gk20a_channel_open(g, file, runlist_id); - if (err) - goto clean_up_file; - - fd_install(fd, file); - args->out.channel_fd = fd; - return 0; - -clean_up_file: - fput(file); -clean_up: - put_unused_fd(fd); - return err; -} - -static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags) -{ - u32 flags = 0; - - if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) - flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR; - - if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) - flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC; - - if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) - flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE; - - return flags; -} - -static void nvgpu_get_gpfifo_ex_args( - struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, - struct nvgpu_gpfifo_args *gpfifo_args) -{ - gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries; - gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs; - gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( - alloc_gpfifo_ex_args->flags); -} - -static void nvgpu_get_gpfifo_args( - struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, - struct nvgpu_gpfifo_args *gpfifo_args) -{ - /* - * Kernel can insert one extra gpfifo entry before user - * submitted gpfifos and another one after, for internal usage. - * Triple the requested size. - */ - gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3; - gpfifo_args->num_inflight_jobs = 0; - gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( - alloc_gpfifo_args->flags); -} - -static void nvgpu_get_fence_args( - struct nvgpu_fence *fence_args_in, - struct nvgpu_channel_fence *fence_args_out) -{ - fence_args_out->id = fence_args_in->id; - fence_args_out->value = fence_args_in->value; -} - -static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, - ulong id, u32 offset, - u32 payload, u32 timeout) -{ - struct dma_buf *dmabuf; - void *data; - u32 *semaphore; - int ret = 0; - - /* do not wait if channel has timed out */ - if (ch->has_timedout) - return -ETIMEDOUT; - - dmabuf = dma_buf_get(id); - if (IS_ERR(dmabuf)) { - nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); - return -EINVAL; - } - - data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); - if (!data) { - nvgpu_err(ch->g, "failed to map notifier memory"); - ret = -EINVAL; - goto cleanup_put; - } - - semaphore = data + (offset & ~PAGE_MASK); - - ret = NVGPU_COND_WAIT_INTERRUPTIBLE( - &ch->semaphore_wq, - *semaphore == payload || ch->has_timedout, - timeout); - - dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); -cleanup_put: - dma_buf_put(dmabuf); - return ret; -} - -static int gk20a_channel_wait(struct channel_gk20a *ch, - struct nvgpu_wait_args *args) -{ - struct dma_buf *dmabuf; - struct gk20a *g = ch->g; - struct notification *notif; - struct timespec tv; - u64 jiffies; - ulong id; - u32 offset; - int remain, ret = 0; - u64 end; - - nvgpu_log_fn(g, " "); - - if (ch->has_timedout) - return -ETIMEDOUT; - - switch (args->type) { - case NVGPU_WAIT_TYPE_NOTIFIER: - id = args->condition.notifier.dmabuf_fd; - offset = args->condition.notifier.offset; - end = offset + sizeof(struct notification); - - dmabuf = dma_buf_get(id); - if (IS_ERR(dmabuf)) { - nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", - id); - return -EINVAL; - } - - if (end > dmabuf->size || end < sizeof(struct notification)) { - dma_buf_put(dmabuf); - nvgpu_err(g, "invalid notifier offset"); - return -EINVAL; - } - - nvgpu_speculation_barrier(); - - notif = dma_buf_vmap(dmabuf); - if (!notif) { - nvgpu_err(g, "failed to map notifier memory"); - return -ENOMEM; - } - - notif = (struct notification *)((uintptr_t)notif + offset); - - /* user should set status pending before - * calling this ioctl */ - remain = NVGPU_COND_WAIT_INTERRUPTIBLE( - &ch->notifier_wq, - notif->status == 0 || ch->has_timedout, - args->timeout); - - if (remain == 0 && notif->status != 0) { - ret = -ETIMEDOUT; - goto notif_clean_up; - } else if (remain < 0) { - ret = -EINTR; - goto notif_clean_up; - } - - /* TBD: fill in correct information */ - jiffies = get_jiffies_64(); - jiffies_to_timespec(jiffies, &tv); - notif->timestamp.nanoseconds[0] = tv.tv_nsec; - notif->timestamp.nanoseconds[1] = tv.tv_sec; - notif->info32 = 0xDEADBEEF; /* should be object name */ - notif->info16 = ch->chid; /* should be method offset */ - -notif_clean_up: - dma_buf_vunmap(dmabuf, notif); - return ret; - - case NVGPU_WAIT_TYPE_SEMAPHORE: - ret = gk20a_channel_wait_semaphore(ch, - args->condition.semaphore.dmabuf_fd, - args->condition.semaphore.offset, - args->condition.semaphore.payload, - args->timeout); - - break; - - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, - struct nvgpu_zcull_bind_args *args) -{ - struct gk20a *g = ch->g; - struct gr_gk20a *gr = &g->gr; - - nvgpu_log_fn(gr->g, " "); - - return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, - args->gpu_va, args->mode); -} - -static int gk20a_ioctl_channel_submit_gpfifo( - struct channel_gk20a *ch, - struct nvgpu_submit_gpfifo_args *args) -{ - struct nvgpu_channel_fence fence; - struct gk20a_fence *fence_out; - struct fifo_profile_gk20a *profile = NULL; - u32 submit_flags = 0; - int fd = -1; - struct gk20a *g = ch->g; - - int ret = 0; - nvgpu_log_fn(g, " "); - - profile = gk20a_fifo_profile_acquire(ch->g); - gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); - - if (ch->has_timedout) - return -ETIMEDOUT; - - nvgpu_get_fence_args(&args->fence, &fence); - submit_flags = - nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); - - /* Try and allocate an fd here*/ - if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) - && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { - fd = get_unused_fd_flags(O_RDWR); - if (fd < 0) - return fd; - } - - ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, - submit_flags, &fence, - &fence_out, profile); - - if (ret) { - if (fd != -1) - put_unused_fd(fd); - goto clean_up; - } - - /* Convert fence_out to something we can pass back to user space. */ - if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { - if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { - ret = gk20a_fence_install_fd(fence_out, fd); - if (ret) - put_unused_fd(fd); - else - args->fence.id = fd; - } else { - args->fence.id = fence_out->syncpt_id; - args->fence.value = fence_out->syncpt_value; - } - } - gk20a_fence_put(fence_out); - - gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); - if (profile) - gk20a_fifo_profile_release(ch->g, profile); - -clean_up: - return ret; -} - -/* - * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* - * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* - */ -u32 nvgpu_get_common_runlist_level(u32 level) -{ - switch (level) { - case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: - return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; - case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: - return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; - case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: - return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; - default: - pr_err("%s: incorrect runlist level\n", __func__); - } - - return level; -} - -static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) -{ - u32 flags = 0; - - if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) - flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; - - if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) - flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; - - return flags; -} - -static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, - u32 class_num, u32 user_flags) -{ - return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, - nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); -} - -/* - * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* - * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* - */ -u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) -{ - u32 flags = 0; - - if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) - flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; - if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) - flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; - - return flags; -} - -/* - * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* - * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* - */ -u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) -{ - u32 flags = 0; - - if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) - flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; - if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) - flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; - if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) - flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; - - return flags; -} - -/* - * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* - * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* - */ -u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) -{ - switch (graphics_preempt_mode) { - case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: - return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; - case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: - return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; - } - - return graphics_preempt_mode; -} - -/* - * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* - * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* - */ -u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) -{ - switch (compute_preempt_mode) { - case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: - return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; - case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: - return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; - case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: - return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; - } - - return compute_preempt_mode; -} - -/* - * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* - * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* - */ -static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) -{ - switch (graphics_preempt_mode) { - case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: - return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; - case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: - return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; - } - - return graphics_preempt_mode; -} - -/* - * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* - * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* - */ -static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) -{ - switch (compute_preempt_mode) { - case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: - return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; - case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: - return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; - case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: - return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; - } - - return compute_preempt_mode; -} - -static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, - u32 graphics_preempt_mode, u32 compute_preempt_mode) -{ - int err; - - if (ch->g->ops.gr.set_preemption_mode) { - err = gk20a_busy(ch->g); - if (err) { - nvgpu_err(ch->g, "failed to power on, %d", err); - return err; - } - err = ch->g->ops.gr.set_preemption_mode(ch, - nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), - nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); - gk20a_idle(ch->g); - } else { - err = -EINVAL; - } - - return err; -} - -static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, - struct nvgpu_get_user_syncpoint_args *args) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct gk20a *g = ch->g; - int err; - - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { - nvgpu_err(g, "user syncpoints not supported"); - return -EINVAL; - } - - if (!gk20a_platform_has_syncpoints(g)) { - nvgpu_err(g, "syncpoints not supported"); - return -EINVAL; - } - - if (g->aggressive_sync_destroy_thresh) { - nvgpu_err(g, "sufficient syncpoints not available"); - return -EINVAL; - } - - nvgpu_mutex_acquire(&ch->sync_lock); - if (ch->user_sync) { - nvgpu_mutex_release(&ch->sync_lock); - } else { - ch->user_sync = gk20a_channel_sync_create(ch, true); - if (!ch->user_sync) { - nvgpu_mutex_release(&ch->sync_lock); - return -ENOMEM; - } - nvgpu_mutex_release(&ch->sync_lock); - - if (g->ops.fifo.resetup_ramfc) { - err = g->ops.fifo.resetup_ramfc(ch); - if (err) - return err; - } - } - - args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); - args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, - args->syncpoint_id); - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) - args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); - else - args->gpu_va = 0; - - return 0; -#else - return -EINVAL; -#endif -} - -long gk20a_channel_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct channel_priv *priv = filp->private_data; - struct channel_gk20a *ch = priv->c; - struct device *dev = dev_from_gk20a(ch->g); - u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; - int err = 0; - struct gk20a *g = ch->g; - - nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) - return -EINVAL; - - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - /* take a ref or return timeout if channel refs can't be taken */ - ch = gk20a_channel_get(ch); - if (!ch) - return -ETIMEDOUT; - - /* protect our sanity for threaded userspace - most of the channel is - * not thread safe */ - nvgpu_mutex_acquire(&ch->ioctl_lock); - - /* this ioctl call keeps a ref to the file which keeps a ref to the - * channel */ - - switch (cmd) { - case NVGPU_IOCTL_CHANNEL_OPEN: - err = gk20a_channel_open_ioctl(ch->g, - (struct nvgpu_channel_open_args *)buf); - break; - case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: - break; - case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: - { - struct nvgpu_alloc_obj_ctx_args *args = - (struct nvgpu_alloc_obj_ctx_args *)buf; - - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); - gk20a_idle(ch->g); - break; - } - case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: - { - struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = - (struct nvgpu_alloc_gpfifo_ex_args *)buf; - struct nvgpu_gpfifo_args gpfifo_args; - - nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args); - - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - - if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { - err = -EINVAL; - gk20a_idle(ch->g); - break; - } - err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); - gk20a_idle(ch->g); - break; - } - case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: - { - struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = - (struct nvgpu_alloc_gpfifo_args *)buf; - struct nvgpu_gpfifo_args gpfifo_args; - - nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args); - - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - - err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); - gk20a_idle(ch->g); - break; - } - case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: - err = gk20a_ioctl_channel_submit_gpfifo(ch, - (struct nvgpu_submit_gpfifo_args *)buf); - break; - case NVGPU_IOCTL_CHANNEL_WAIT: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - - /* waiting is thread-safe, not dropping this mutex could - * deadlock in certain conditions */ - nvgpu_mutex_release(&ch->ioctl_lock); - - err = gk20a_channel_wait(ch, - (struct nvgpu_wait_args *)buf); - - nvgpu_mutex_acquire(&ch->ioctl_lock); - - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = gk20a_channel_zcull_bind(ch, - (struct nvgpu_zcull_bind_args *)buf); - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = gk20a_init_error_notifier(ch, - (struct nvgpu_set_error_notifier *)buf); - gk20a_idle(ch->g); - break; -#ifdef CONFIG_GK20A_CYCLE_STATS - case NVGPU_IOCTL_CHANNEL_CYCLE_STATS: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = gk20a_channel_cycle_stats(ch, - (struct nvgpu_cycle_stats_args *)buf); - gk20a_idle(ch->g); - break; -#endif - case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: - { - u32 timeout = - (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; - nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", - timeout, ch->chid); - ch->timeout_ms_max = timeout; - gk20a_channel_trace_sched_param( - trace_gk20a_channel_set_timeout, ch); - break; - } - case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: - { - u32 timeout = - (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; - bool timeout_debug_dump = !((u32) - ((struct nvgpu_set_timeout_ex_args *)buf)->flags & - (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); - nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", - timeout, ch->chid); - ch->timeout_ms_max = timeout; - ch->timeout_debug_dump = timeout_debug_dump; - gk20a_channel_trace_sched_param( - trace_gk20a_channel_set_timeout, ch); - break; - } - case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: - ((struct nvgpu_get_param_args *)buf)->value = - ch->has_timedout; - break; - case NVGPU_IOCTL_CHANNEL_ENABLE: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - if (ch->g->ops.fifo.enable_channel) - ch->g->ops.fifo.enable_channel(ch); - else - err = -ENOSYS; - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_DISABLE: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - if (ch->g->ops.fifo.disable_channel) - ch->g->ops.fifo.disable_channel(ch); - else - err = -ENOSYS; - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_PREEMPT: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = gk20a_fifo_preempt(ch->g, ch); - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: - if (!capable(CAP_SYS_NICE)) { - err = -EPERM; - break; - } - if (!ch->g->ops.fifo.reschedule_runlist) { - err = -ENOSYS; - break; - } - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = ch->g->ops.fifo.reschedule_runlist(ch, - NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & - ((struct nvgpu_reschedule_runlist_args *)buf)->flags); - gk20a_idle(ch->g); - break; - case NVGPU_IOCTL_CHANNEL_FORCE_RESET: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = ch->g->ops.fifo.force_reset_ch(ch, - NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); - gk20a_idle(ch->g); - break; -#ifdef CONFIG_GK20A_CYCLE_STATS - case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = gk20a_channel_cycle_stats_snapshot(ch, - (struct nvgpu_cycle_stats_snapshot_args *)buf); - gk20a_idle(ch->g); - break; -#endif - case NVGPU_IOCTL_CHANNEL_WDT: - err = gk20a_channel_set_wdt_status(ch, - (struct nvgpu_channel_wdt_args *)buf); - break; - case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: - err = nvgpu_ioctl_channel_set_preemption_mode(ch, - ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, - ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); - break; - case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: - if (ch->g->ops.gr.set_boosted_ctx) { - bool boost = - ((struct nvgpu_boosted_ctx_args *)buf)->boost; - - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = ch->g->ops.gr.set_boosted_ctx(ch, boost); - gk20a_idle(ch->g); - } else { - err = -EINVAL; - } - break; - case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: - err = gk20a_busy(ch->g); - if (err) { - dev_err(dev, - "%s: failed to host gk20a for ioctl cmd: 0x%x", - __func__, cmd); - break; - } - err = nvgpu_ioctl_channel_get_user_syncpoint(ch, - (struct nvgpu_get_user_syncpoint_args *)buf); - gk20a_idle(ch->g); - break; - default: - dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); - err = -ENOTTY; - break; - } - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); - - nvgpu_mutex_release(&ch->ioctl_lock); - - gk20a_channel_put(ch); - - nvgpu_log_fn(g, "end"); - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h b/drivers/gpu/nvgpu/common/linux/ioctl_channel.h deleted file mode 100644 index 48cff1ea..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#ifndef __NVGPU_IOCTL_CHANNEL_H__ -#define __NVGPU_IOCTL_CHANNEL_H__ - -#include - -#include "gk20a/css_gr_gk20a.h" - -struct inode; -struct file; -struct gk20a; -struct nvgpu_channel_open_args; - -struct gk20a_cs_snapshot_client_linux { - struct gk20a_cs_snapshot_client cs_client; - - u32 dmabuf_fd; - struct dma_buf *dma_handler; -}; - -int gk20a_channel_open(struct inode *inode, struct file *filp); -int gk20a_channel_release(struct inode *inode, struct file *filp); -long gk20a_channel_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg); -int gk20a_channel_open_ioctl(struct gk20a *g, - struct nvgpu_channel_open_args *args); - -int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); -void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); - -extern const struct file_operations gk20a_channel_ops; - -u32 nvgpu_get_common_runlist_level(u32 level); - -u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags); -u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags); -u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode); -u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode); -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c deleted file mode 100644 index a7c6a607..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#ifdef CONFIG_DEBUG_FS -#include -#endif -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "clk/clk.h" -#include "pstate/pstate.h" -#include "lpwr/lpwr.h" -#include "volt/volt.h" - -#ifdef CONFIG_DEBUG_FS -#include "common/linux/os_linux.h" -#endif - -static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, - struct file *filp) -{ - struct nvgpu_clk_dev *dev = filp->private_data; - struct nvgpu_clk_session *session = dev->session; - - - clk_arb_dbg(session->g, " "); - - nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); - nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); - return 0; -} - -static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask) -{ - unsigned int poll_mask = 0; - - if (nvgpu_poll_mask & NVGPU_POLLIN) - poll_mask |= POLLIN; - if (nvgpu_poll_mask & NVGPU_POLLPRI) - poll_mask |= POLLPRI; - if (nvgpu_poll_mask & NVGPU_POLLOUT) - poll_mask |= POLLOUT; - if (nvgpu_poll_mask & NVGPU_POLLRDNORM) - poll_mask |= POLLRDNORM; - if (nvgpu_poll_mask & NVGPU_POLLHUP) - poll_mask |= POLLHUP; - - return poll_mask; -} - -static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) -{ - struct nvgpu_clk_dev *dev = filp->private_data; - - clk_arb_dbg(dev->session->g, " "); - - poll_wait(filp, &dev->readout_wq.wq, wait); - return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0)); -} - -void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) -{ - nvgpu_cond_broadcast_interruptible(&dev->readout_wq); -} - -static int nvgpu_clk_arb_release_event_dev(struct inode *inode, - struct file *filp) -{ - struct nvgpu_clk_dev *dev = filp->private_data; - struct nvgpu_clk_session *session = dev->session; - struct nvgpu_clk_arb *arb; - - arb = session->g->clk_arb; - - clk_arb_dbg(session->g, " "); - - if (arb) { - nvgpu_spinlock_acquire(&arb->users_lock); - nvgpu_list_del(&dev->link); - nvgpu_spinlock_release(&arb->users_lock); - nvgpu_clk_notification_queue_free(arb->g, &dev->queue); - } - - nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); - nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); - - return 0; -} - -static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event) -{ - u32 nvgpu_gpu_event; - - switch (nvgpu_event) { - case NVGPU_EVENT_VF_UPDATE: - nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE; - break; - case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE; - break; - case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE; - break; - case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED; - break; - case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED; - break; - case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD; - break; - case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD; - break; - case NVGPU_EVENT_ALARM_GPU_LOST: - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST; - break; - default: - /* Control shouldn't come here */ - nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1; - break; - } - return nvgpu_gpu_event; -} - -static inline u32 __pending_event(struct nvgpu_clk_dev *dev, - struct nvgpu_gpu_event_info *info) { - - u32 tail, head; - u32 events = 0; - struct nvgpu_clk_notification *p_notif; - - tail = nvgpu_atomic_read(&dev->queue.tail); - head = nvgpu_atomic_read(&dev->queue.head); - - head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; - - if (_WRAPGTEQ(tail, head) && info) { - head++; - p_notif = &dev->queue.notifications[head % dev->queue.size]; - events |= nvgpu_convert_gpu_event(p_notif->notification); - info->event_id = ffs(events) - 1; - info->timestamp = p_notif->timestamp; - nvgpu_atomic_set(&dev->queue.head, head); - } - - return events; -} - -static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, - size_t size, loff_t *off) -{ - struct nvgpu_clk_dev *dev = filp->private_data; - struct nvgpu_gpu_event_info info; - ssize_t err; - - clk_arb_dbg(dev->session->g, - "filp=%p, buf=%p, size=%zu", filp, buf, size); - - if ((size - *off) < sizeof(info)) - return 0; - - memset(&info, 0, sizeof(info)); - /* Get the oldest event from the queue */ - while (!__pending_event(dev, &info)) { - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, - __pending_event(dev, &info), 0); - if (err) - return err; - if (info.timestamp) - break; - } - - if (copy_to_user(buf + *off, &info, sizeof(info))) - return -EFAULT; - - return sizeof(info); -} - -static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, - struct nvgpu_gpu_set_event_filter_args *args) -{ - struct gk20a *g = dev->session->g; - u32 mask; - - nvgpu_log(g, gpu_dbg_fn, " "); - - if (args->flags) - return -EINVAL; - - if (args->size != 1) - return -EINVAL; - - if (copy_from_user(&mask, (void __user *) args->buffer, - args->size * sizeof(u32))) - return -EFAULT; - - /* update alarm mask */ - nvgpu_atomic_set(&dev->enabled_mask, mask); - - return 0; -} - -static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct nvgpu_clk_dev *dev = filp->private_data; - struct gk20a *g = dev->session->g; - u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) - || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) - return -EINVAL; - - BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - switch (cmd) { - case NVGPU_EVENT_IOCTL_SET_FILTER: - err = nvgpu_clk_arb_set_event_filter(dev, - (struct nvgpu_gpu_set_event_filter_args *)buf); - break; - default: - nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); - err = -ENOTTY; - } - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); - - return err; -} - -static const struct file_operations completion_dev_ops = { - .owner = THIS_MODULE, - .release = nvgpu_clk_arb_release_completion_dev, - .poll = nvgpu_clk_arb_poll_dev, -}; - -static const struct file_operations event_dev_ops = { - .owner = THIS_MODULE, - .release = nvgpu_clk_arb_release_event_dev, - .poll = nvgpu_clk_arb_poll_dev, - .read = nvgpu_clk_arb_read_event_dev, -#ifdef CONFIG_COMPAT - .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, -#endif - .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, -}; - -static int nvgpu_clk_arb_install_fd(struct gk20a *g, - struct nvgpu_clk_session *session, - const struct file_operations *fops, - struct nvgpu_clk_dev **_dev) -{ - struct file *file; - int fd; - int err; - int status; - char name[64]; - struct nvgpu_clk_dev *dev; - - clk_arb_dbg(g, " "); - - dev = nvgpu_kzalloc(g, sizeof(*dev)); - if (!dev) - return -ENOMEM; - - status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, - DEFAULT_EVENT_NUMBER); - if (status < 0) { - err = status; - goto fail; - } - - fd = get_unused_fd_flags(O_RDWR); - if (fd < 0) { - err = fd; - goto fail; - } - - snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); - file = anon_inode_getfile(name, fops, dev, O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto fail_fd; - } - - fd_install(fd, file); - - nvgpu_cond_init(&dev->readout_wq); - - nvgpu_atomic_set(&dev->poll_mask, 0); - - dev->session = session; - nvgpu_ref_init(&dev->refcount); - - nvgpu_ref_get(&session->refcount); - - *_dev = dev; - - return fd; - -fail_fd: - put_unused_fd(fd); -fail: - nvgpu_kfree(g, dev); - - return err; -} - -int nvgpu_clk_arb_install_event_fd(struct gk20a *g, - struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) -{ - struct nvgpu_clk_arb *arb = g->clk_arb; - struct nvgpu_clk_dev *dev; - int fd; - - clk_arb_dbg(g, " "); - - fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); - if (fd < 0) - return fd; - - /* TODO: alarm mask needs to be set to default value to prevent - * failures of legacy tests. This will be removed when sanity is - * updated - */ - if (alarm_mask) - nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); - else - nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); - - dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); - - nvgpu_spinlock_acquire(&arb->users_lock); - nvgpu_list_add_tail(&dev->link, &arb->users); - nvgpu_spinlock_release(&arb->users_lock); - - *event_fd = fd; - - return 0; -} - -int nvgpu_clk_arb_install_request_fd(struct gk20a *g, - struct nvgpu_clk_session *session, int *request_fd) -{ - struct nvgpu_clk_dev *dev; - int fd; - - clk_arb_dbg(g, " "); - - fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); - if (fd < 0) - return fd; - - *request_fd = fd; - - return 0; -} - -int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, - struct nvgpu_clk_session *session, int request_fd) -{ - struct nvgpu_clk_arb *arb = g->clk_arb; - struct nvgpu_clk_dev *dev; - struct fd fd; - int err = 0; - - clk_arb_dbg(g, " "); - - fd = fdget(request_fd); - if (!fd.file) - return -EINVAL; - - if (fd.file->f_op != &completion_dev_ops) { - err = -EINVAL; - goto fdput_fd; - } - - dev = (struct nvgpu_clk_dev *) fd.file->private_data; - - if (!dev || dev->session != session) { - err = -EINVAL; - goto fdput_fd; - } - nvgpu_ref_get(&dev->refcount); - nvgpu_spinlock_acquire(&session->session_lock); - nvgpu_list_add(&dev->node, &session->targets); - nvgpu_spinlock_release(&session->session_lock); - nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); - -fdput_fd: - fdput(fd); - return err; -} - -int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, - int request_fd, u32 api_domain, u16 target_mhz) -{ - struct nvgpu_clk_dev *dev; - struct fd fd; - int err = 0; - - clk_arb_dbg(session->g, - "domain=0x%08x target_mhz=%u", api_domain, target_mhz); - - fd = fdget(request_fd); - if (!fd.file) - return -EINVAL; - - if (fd.file->f_op != &completion_dev_ops) { - err = -EINVAL; - goto fdput_fd; - } - - dev = fd.file->private_data; - if (!dev || dev->session != session) { - err = -EINVAL; - goto fdput_fd; - } - - switch (api_domain) { - case NVGPU_CLK_DOMAIN_MCLK: - dev->mclk_target_mhz = target_mhz; - break; - - case NVGPU_CLK_DOMAIN_GPCCLK: - dev->gpc2clk_target_mhz = target_mhz * 2ULL; - break; - - default: - err = -EINVAL; - } - -fdput_fd: - fdput(fd); - return err; -} - -u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) -{ - u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); - u32 api_domains = 0; - - if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) - api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); - - if (clk_domains & CTRL_CLK_DOMAIN_MCLK) - api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); - - return api_domains; -} - -#ifdef CONFIG_DEBUG_FS -static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) -{ - struct gk20a *g = s->private; - struct nvgpu_clk_arb *arb = g->clk_arb; - struct nvgpu_clk_arb_debug *debug; - - u64 num; - s64 tmp, avg, std, max, min; - - debug = NV_ACCESS_ONCE(arb->debug); - /* Make copy of structure and ensure no reordering */ - nvgpu_smp_rmb(); - if (!debug) - return -EINVAL; - - std = debug->switch_std; - avg = debug->switch_avg; - max = debug->switch_max; - min = debug->switch_min; - num = debug->switch_num; - - tmp = std; - do_div(tmp, num); - seq_printf(s, "Number of transitions: %lld\n", - num); - seq_printf(s, "max / min : %lld / %lld usec\n", - max, min); - seq_printf(s, "avg / std : %lld / %ld usec\n", - avg, int_sqrt(tmp)); - - return 0; -} - -static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); -} - -static const struct file_operations nvgpu_clk_arb_stats_fops = { - .open = nvgpu_clk_arb_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - -int nvgpu_clk_arb_debugfs_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct dentry *gpu_root = l->debugfs; - struct dentry *d; - - nvgpu_log(g, gpu_dbg_info, "g=%p", g); - - d = debugfs_create_file( - "arb_stats", - S_IRUGO, - gpu_root, - g, - &nvgpu_clk_arb_stats_fops); - if (!d) - return -ENOMEM; - - return 0; -} -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c deleted file mode 100644 index 73a8131d..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ /dev/null @@ -1,1962 +0,0 @@ -/* - * Copyright (c) 2011-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "ioctl_ctrl.h" -#include "ioctl_dbg.h" -#include "ioctl_as.h" -#include "ioctl_tsg.h" -#include "ioctl_channel.h" -#include "gk20a/gk20a.h" -#include "gk20a/fence_gk20a.h" - -#include "platform_gk20a.h" -#include "os_linux.h" -#include "dmabuf.h" -#include "channel.h" - -#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \ - (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) -#define MHZ_TO_HZ(a) ((u64)a * MHZ) - -struct gk20a_ctrl_priv { - struct device *dev; - struct gk20a *g; - struct nvgpu_clk_session *clk_session; -}; - -static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags) -{ - u32 core_flags = 0; - - if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) - core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED; - - return core_flags; -} - -int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l; - struct gk20a *g; - struct gk20a_ctrl_priv *priv; - int err = 0; - - l = container_of(inode->i_cdev, - struct nvgpu_os_linux, ctrl.cdev); - g = gk20a_get(&l->g); - if (!g) - return -ENODEV; - - nvgpu_log_fn(g, " "); - - priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv)); - if (!priv) { - err = -ENOMEM; - goto free_ref; - } - filp->private_data = priv; - priv->dev = dev_from_gk20a(g); - /* - * We dont close the arbiter fd's after driver teardown to support - * GPU_LOST events, so we store g here, instead of dereferencing the - * dev structure on teardown - */ - priv->g = g; - - if (!g->sw_ready) { - err = gk20a_busy(g); - if (err) - goto free_ref; - gk20a_idle(g); - } - - err = nvgpu_clk_arb_init_session(g, &priv->clk_session); -free_ref: - if (err) - gk20a_put(g); - return err; -} -int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) -{ - struct gk20a_ctrl_priv *priv = filp->private_data; - struct gk20a *g = priv->g; - - nvgpu_log_fn(g, " "); - - if (priv->clk_session) - nvgpu_clk_arb_release_session(g, priv->clk_session); - - gk20a_put(g); - nvgpu_kfree(g, priv); - - return 0; -} - -struct nvgpu_flags_mapping { - u64 ioctl_flag; - int enabled_flag; -}; - -static struct nvgpu_flags_mapping flags_mapping[] = { - {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS, - NVGPU_HAS_SYNCPOINTS}, - {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS, - NVGPU_SUPPORT_PARTIAL_MAPPINGS}, - {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS, - NVGPU_SUPPORT_SPARSE_ALLOCS}, - {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS, - NVGPU_SUPPORT_SYNC_FENCE_FDS}, - {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS, - NVGPU_SUPPORT_CYCLE_STATS}, - {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT, - NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT}, - {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS, - NVGPU_SUPPORT_USERSPACE_MANAGED_AS}, - {NVGPU_GPU_FLAGS_SUPPORT_TSG, - NVGPU_SUPPORT_TSG}, - {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS, - NVGPU_SUPPORT_CLOCK_CONTROLS}, - {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE, - NVGPU_SUPPORT_GET_VOLTAGE}, - {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT, - NVGPU_SUPPORT_GET_CURRENT}, - {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER, - NVGPU_SUPPORT_GET_POWER}, - {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE, - NVGPU_SUPPORT_GET_TEMPERATURE}, - {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT, - NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT}, - {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS, - NVGPU_SUPPORT_DEVICE_EVENTS}, - {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE, - NVGPU_SUPPORT_FECS_CTXSW_TRACE}, - {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, - NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, - {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, - NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, - {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, - NVGPU_SUPPORT_DETERMINISTIC_OPTS}, - {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, - NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, - {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, - NVGPU_SUPPORT_USER_SYNCPOINT}, - {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, - NVGPU_SUPPORT_IO_COHERENCE}, - {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, - NVGPU_SUPPORT_RESCHEDULE_RUNLIST}, - {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL, - NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL}, - {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF, - NVGPU_ECC_ENABLED_SM_LRF}, - {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM, - NVGPU_ECC_ENABLED_SM_SHM}, - {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX, - NVGPU_ECC_ENABLED_TEX}, - {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC, - NVGPU_ECC_ENABLED_LTC}, - {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS, - NVGPU_SUPPORT_TSG_SUBCONTEXTS}, - {NVGPU_GPU_FLAGS_SUPPORT_SCG, - NVGPU_SUPPORT_SCG}, - {NVGPU_GPU_FLAGS_SUPPORT_VPR, - NVGPU_SUPPORT_VPR}, -}; - -static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) -{ - unsigned int i; - u64 ioctl_flags = 0; - - for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) { - if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag)) - ioctl_flags |= flags_mapping[i].ioctl_flag; - } - - return ioctl_flags; -} - -static void nvgpu_set_preemption_mode_flags(struct gk20a *g, - struct nvgpu_gpu_characteristics *gpu) -{ - struct nvgpu_preemption_modes_rec preemption_mode_rec; - - g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec); - - gpu->graphics_preemption_mode_flags = - nvgpu_get_ioctl_graphics_preempt_mode_flags( - preemption_mode_rec.graphics_preemption_mode_flags); - gpu->compute_preemption_mode_flags = - nvgpu_get_ioctl_compute_preempt_mode_flags( - preemption_mode_rec.compute_preemption_mode_flags); - - gpu->default_graphics_preempt_mode = - nvgpu_get_ioctl_graphics_preempt_mode( - preemption_mode_rec.default_graphics_preempt_mode); - gpu->default_compute_preempt_mode = - nvgpu_get_ioctl_compute_preempt_mode( - preemption_mode_rec.default_compute_preempt_mode); -} - -static long -gk20a_ctrl_ioctl_gpu_characteristics( - struct gk20a *g, - struct nvgpu_gpu_get_characteristics *request) -{ - struct nvgpu_gpu_characteristics gpu; - long err = 0; - - if (gk20a_busy(g)) { - nvgpu_err(g, "failed to power on gpu"); - return -EINVAL; - } - - memset(&gpu, 0, sizeof(gpu)); - - gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); - gpu.on_board_video_memory_size = 0; /* integrated GPU */ - - gpu.num_gpc = g->gr.gpc_count; - gpu.max_gpc_count = g->gr.max_gpc_count; - - gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; - - gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ - - gpu.compression_page_size = g->ops.fb.compression_page_size(g); - - gpu.gpc_mask = (1 << g->gr.gpc_count)-1; - - gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); - - gpu.arch = g->params.gpu_arch; - gpu.impl = g->params.gpu_impl; - gpu.rev = g->params.gpu_rev; - gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT; - gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ? - NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0; - gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS); - gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS); - gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS); - gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS); - gpu.inline_to_memory_class = - g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS); - gpu.dma_copy_class = - g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); - - gpu.vbios_version = g->bios.vbios_version; - gpu.vbios_oem_version = g->bios.vbios_oem_version; - - gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g); - gpu.pde_coverage_bit_count = - g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0]; - gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g); - - gpu.sm_arch_sm_version = g->params.sm_arch_sm_version; - gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; - gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; - - gpu.max_css_buffer_size = g->gr.max_css_buffer_size; - - gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; - gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; - gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; - gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; - gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; - gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST; - gpu.gpu_va_bit_count = 40; - - strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname)); - gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g); - gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); - gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); - gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); - gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; - gpu.gr_gobs_per_comptagline_per_slice = - g->gr.gobs_per_comptagline_per_slice; - gpu.num_ltc = g->ltc_count; - gpu.lts_per_ltc = g->gr.slices_per_ltc; - gpu.cbc_cache_line_size = g->gr.cacheline_size; - gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; - - if (g->ops.clk.get_maxrate) - gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); - - gpu.local_video_memory_size = g->mm.vidmem.size; - - gpu.pci_vendor_id = g->pci_vendor_id; - gpu.pci_device_id = g->pci_device_id; - gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id; - gpu.pci_subsystem_device_id = g->pci_subsystem_device_id; - gpu.pci_class = g->pci_class; - gpu.pci_revision = g->pci_revision; - - nvgpu_set_preemption_mode_flags(g, &gpu); - - if (request->gpu_characteristics_buf_size > 0) { - size_t write_size = sizeof(gpu); - - if (write_size > request->gpu_characteristics_buf_size) - write_size = request->gpu_characteristics_buf_size; - - err = copy_to_user((void __user *)(uintptr_t) - request->gpu_characteristics_buf_addr, - &gpu, write_size); - } - - if (err == 0) - request->gpu_characteristics_buf_size = sizeof(gpu); - - gk20a_idle(g); - - return err; -} - -static int gk20a_ctrl_prepare_compressible_read( - struct gk20a *g, - struct nvgpu_gpu_prepare_compressible_read_args *args) -{ - int ret = -ENOSYS; - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct nvgpu_channel_fence fence; - struct gk20a_fence *fence_out = NULL; - int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( - args->submit_flags); - int fd = -1; - - fence.id = args->fence.syncpt_id; - fence.value = args->fence.syncpt_value; - - /* Try and allocate an fd here*/ - if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) - && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { - fd = get_unused_fd_flags(O_RDWR); - if (fd < 0) - return fd; - } - - ret = gk20a_prepare_compressible_read(l, args->handle, - args->request_compbits, args->offset, - args->compbits_hoffset, args->compbits_voffset, - args->scatterbuffer_offset, - args->width, args->height, args->block_height_log2, - submit_flags, &fence, &args->valid_compbits, - &args->zbc_color, &fence_out); - - if (ret) { - if (fd != -1) - put_unused_fd(fd); - return ret; - } - - /* Convert fence_out to something we can pass back to user space. */ - if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { - if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { - if (fence_out) { - ret = gk20a_fence_install_fd(fence_out, fd); - if (ret) - put_unused_fd(fd); - else - args->fence.fd = fd; - } else { - args->fence.fd = -1; - put_unused_fd(fd); - } - } else { - if (fence_out) { - args->fence.syncpt_id = fence_out->syncpt_id; - args->fence.syncpt_value = - fence_out->syncpt_value; - } else { - args->fence.syncpt_id = -1; - args->fence.syncpt_value = 0; - } - } - } - gk20a_fence_put(fence_out); -#endif - - return ret; -} - -static int gk20a_ctrl_mark_compressible_write( - struct gk20a *g, - struct nvgpu_gpu_mark_compressible_write_args *args) -{ - int ret = -ENOSYS; - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - ret = gk20a_mark_compressible_write(g, args->handle, - args->valid_compbits, args->offset, args->zbc_color); -#endif - - return ret; -} - -static int gk20a_ctrl_alloc_as( - struct gk20a *g, - struct nvgpu_alloc_as_args *args) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_as_share *as_share; - int err; - int fd; - struct file *file; - char name[64]; - - err = get_unused_fd_flags(O_RDWR); - if (err < 0) - return err; - fd = err; - - snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd); - - file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto clean_up; - } - - err = gk20a_as_alloc_share(g, args->big_page_size, - gk20a_as_translate_as_alloc_flags(g, - args->flags), - &as_share); - if (err) - goto clean_up_file; - - fd_install(fd, file); - file->private_data = as_share; - - args->as_fd = fd; - return 0; - -clean_up_file: - fput(file); -clean_up: - put_unused_fd(fd); - return err; -} - -static int gk20a_ctrl_open_tsg(struct gk20a *g, - struct nvgpu_gpu_open_tsg_args *args) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int err; - int fd; - struct file *file; - char name[64]; - - err = get_unused_fd_flags(O_RDWR); - if (err < 0) - return err; - fd = err; - - snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd); - - file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto clean_up; - } - - err = nvgpu_ioctl_tsg_open(g, file); - if (err) - goto clean_up_file; - - fd_install(fd, file); - args->tsg_fd = fd; - return 0; - -clean_up_file: - fput(file); -clean_up: - put_unused_fd(fd); - return err; -} - -static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, - struct nvgpu_gpu_get_tpc_masks_args *args) -{ - struct gr_gk20a *gr = &g->gr; - int err = 0; - const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count; - - if (args->mask_buf_size > 0) { - size_t write_size = gpc_tpc_mask_size; - - if (write_size > args->mask_buf_size) - write_size = args->mask_buf_size; - - err = copy_to_user((void __user *)(uintptr_t) - args->mask_buf_addr, - gr->gpc_tpc_mask, write_size); - } - - if (err == 0) - args->mask_buf_size = gpc_tpc_mask_size; - - return err; -} - -static int gk20a_ctrl_get_fbp_l2_masks( - struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) -{ - struct gr_gk20a *gr = &g->gr; - int err = 0; - const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; - - if (args->mask_buf_size > 0) { - size_t write_size = fbp_l2_mask_size; - - if (write_size > args->mask_buf_size) - write_size = args->mask_buf_size; - - err = copy_to_user((void __user *)(uintptr_t) - args->mask_buf_addr, - gr->fbp_rop_l2_en_mask, write_size); - } - - if (err == 0) - args->mask_buf_size = fbp_l2_mask_size; - - return err; -} - -static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, - struct nvgpu_gpu_l2_fb_args *args) -{ - int err = 0; - - if ((!args->l2_flush && !args->fb_flush) || - (!args->l2_flush && args->l2_invalidate)) - return -EINVAL; - - if (args->l2_flush) - g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); - - if (args->fb_flush) - g->ops.mm.fb_flush(g); - - return err; -} - -/* Invalidate i-cache for kepler & maxwell */ -static int nvgpu_gpu_ioctl_inval_icache( - struct gk20a *g, - struct nvgpu_gpu_inval_icache_args *args) -{ - struct channel_gk20a *ch; - int err; - - ch = gk20a_get_channel_from_file(args->channel_fd); - if (!ch) - return -EINVAL; - - /* Take the global lock, since we'll be doing global regops */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.inval_icache(g, ch); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_channel_put(ch); - return err; -} - -static int nvgpu_gpu_ioctl_set_mmu_debug_mode( - struct gk20a *g, - struct nvgpu_gpu_mmu_debug_mode_args *args) -{ - if (gk20a_busy(g)) { - nvgpu_err(g, "failed to power on gpu"); - return -EINVAL; - } - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - g->ops.fb.set_debug_mode(g, args->state == 1); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_idle(g); - return 0; -} - -static int nvgpu_gpu_ioctl_set_debug_mode( - struct gk20a *g, - struct nvgpu_gpu_sm_debug_mode_args *args) -{ - struct channel_gk20a *ch; - int err; - - ch = gk20a_get_channel_from_file(args->channel_fd); - if (!ch) - return -EINVAL; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - if (g->ops.gr.set_sm_debug_mode) - err = g->ops.gr.set_sm_debug_mode(g, ch, - args->sms, !!args->enable); - else - err = -ENOSYS; - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_channel_put(ch); - return err; -} - -static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) -{ - int err; - - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.trigger_suspend(g); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, - struct nvgpu_gpu_wait_pause_args *args) -{ - int err; - struct warpstate *ioctl_w_state; - struct nvgpu_warpstate *w_state = NULL; - u32 sm_count, ioctl_size, size, sm_id; - - sm_count = g->gr.gpc_count * g->gr.tpc_count; - - ioctl_size = sm_count * sizeof(struct warpstate); - ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); - if (!ioctl_w_state) - return -ENOMEM; - - size = sm_count * sizeof(struct nvgpu_warpstate); - w_state = nvgpu_kzalloc(g, size); - if (!w_state) { - err = -ENOMEM; - goto out_free; - } - - err = gk20a_busy(g); - if (err) - goto out_free; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - g->ops.gr.wait_for_pause(g, w_state); - - for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { - ioctl_w_state[sm_id].valid_warps[0] = - w_state[sm_id].valid_warps[0]; - ioctl_w_state[sm_id].valid_warps[1] = - w_state[sm_id].valid_warps[1]; - ioctl_w_state[sm_id].trapped_warps[0] = - w_state[sm_id].trapped_warps[0]; - ioctl_w_state[sm_id].trapped_warps[1] = - w_state[sm_id].trapped_warps[1]; - ioctl_w_state[sm_id].paused_warps[0] = - w_state[sm_id].paused_warps[0]; - ioctl_w_state[sm_id].paused_warps[1] = - w_state[sm_id].paused_warps[1]; - } - /* Copy to user space - pointed by "args->pwarpstate" */ - if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, - w_state, ioctl_size)) { - nvgpu_log_fn(g, "copy_to_user failed!"); - err = -EFAULT; - } - - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_idle(g); - -out_free: - nvgpu_kfree(g, w_state); - nvgpu_kfree(g, ioctl_w_state); - - return err; -} - -static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) -{ - int err; - - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.resume_from_pause(g); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) -{ - int err; - - err = gk20a_busy(g); - if (err) - return err; - - err = g->ops.gr.clear_sm_errors(g); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_ioctl_has_any_exception( - struct gk20a *g, - struct nvgpu_gpu_tpc_exception_en_status_args *args) -{ - u32 tpc_exception_en; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - args->tpc_exception_en_sm_mask = tpc_exception_en; - - return 0; -} - -static int gk20a_ctrl_get_num_vsms(struct gk20a *g, - struct nvgpu_gpu_num_vsms *args) -{ - struct gr_gk20a *gr = &g->gr; - args->num_vsms = gr->no_of_sm; - return 0; -} - -static int gk20a_ctrl_vsm_mapping(struct gk20a *g, - struct nvgpu_gpu_vsms_mapping *args) -{ - int err = 0; - struct gr_gk20a *gr = &g->gr; - size_t write_size = gr->no_of_sm * - sizeof(struct nvgpu_gpu_vsms_mapping_entry); - struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; - u32 i; - - vsms_buf = nvgpu_kzalloc(g, write_size); - if (vsms_buf == NULL) - return -ENOMEM; - - for (i = 0; i < gr->no_of_sm; i++) { - vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; - if (g->ops.gr.get_nonpes_aware_tpc) - vsms_buf[i].tpc_index = - g->ops.gr.get_nonpes_aware_tpc(g, - gr->sm_to_cluster[i].gpc_index, - gr->sm_to_cluster[i].tpc_index); - else - vsms_buf[i].tpc_index = - gr->sm_to_cluster[i].tpc_index; - } - - err = copy_to_user((void __user *)(uintptr_t) - args->vsms_map_buf_addr, - vsms_buf, write_size); - nvgpu_kfree(g, vsms_buf); - - return err; -} - -static int nvgpu_gpu_get_cpu_time_correlation_info( - struct gk20a *g, - struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) -{ - struct nvgpu_cpu_time_correlation_sample *samples; - int err; - u32 i; - - if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT || - args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) - return -EINVAL; - - samples = nvgpu_kzalloc(g, args->count * - sizeof(struct nvgpu_cpu_time_correlation_sample)); - if (!samples) { - return -ENOMEM; - } - - err = g->ops.ptimer.get_timestamps_zipper(g, - args->source_id, args->count, samples); - if (!err) { - for (i = 0; i < args->count; i++) { - args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; - args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; - } - } - - nvgpu_kfree(g, samples); - - return err; -} - -static int nvgpu_gpu_get_gpu_time( - struct gk20a *g, - struct nvgpu_gpu_get_gpu_time_args *args) -{ - u64 time; - int err; - - err = gk20a_busy(g); - if (err) - return err; - - err = g->ops.ptimer.read_ptimer(g, &time); - if (!err) - args->gpu_timestamp = time; - - gk20a_idle(g); - return err; -} - -static int nvgpu_gpu_get_engine_info( - struct gk20a *g, - struct nvgpu_gpu_get_engine_info_args *args) -{ - int err = 0; - u32 engine_enum = ENGINE_INVAL_GK20A; - u32 report_index = 0; - u32 engine_id_idx; - const u32 max_buffer_engines = args->engine_info_buf_size / - sizeof(struct nvgpu_gpu_get_engine_info_item); - struct nvgpu_gpu_get_engine_info_item __user *dst_item_list = - (void __user *)(uintptr_t)args->engine_info_buf_addr; - - for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; - ++engine_id_idx) { - u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx]; - const struct fifo_engine_info_gk20a *src_info = - &g->fifo.engine_info[active_engine_id]; - struct nvgpu_gpu_get_engine_info_item dst_info; - - memset(&dst_info, 0, sizeof(dst_info)); - - engine_enum = src_info->engine_enum; - - switch (engine_enum) { - case ENGINE_GR_GK20A: - dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR; - break; - - case ENGINE_GRCE_GK20A: - dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY; - break; - - case ENGINE_ASYNC_CE_GK20A: - dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY; - break; - - default: - nvgpu_err(g, "Unmapped engine enum %u", - engine_enum); - continue; - } - - dst_info.engine_instance = src_info->inst_id; - dst_info.runlist_id = src_info->runlist_id; - - if (report_index < max_buffer_engines) { - err = copy_to_user(&dst_item_list[report_index], - &dst_info, sizeof(dst_info)); - if (err) - goto clean_up; - } - - ++report_index; - } - - args->engine_info_buf_size = - report_index * sizeof(struct nvgpu_gpu_get_engine_info_item); - -clean_up: - return err; -} - -static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, - struct nvgpu_gpu_alloc_vidmem_args *args) -{ - u32 align = args->in.alignment ? args->in.alignment : SZ_4K; - int fd; - - nvgpu_log_fn(g, " "); - - /* not yet supported */ - if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) - return -EINVAL; - - /* not yet supported */ - if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) - return -EINVAL; - - if (args->in.size & (SZ_4K - 1)) - return -EINVAL; - - if (!args->in.size) - return -EINVAL; - - if (align & (align - 1)) - return -EINVAL; - - if (align > roundup_pow_of_two(args->in.size)) { - /* log this special case, buddy allocator detail */ - nvgpu_warn(g, - "alignment larger than buffer size rounded up to power of 2 is not supported"); - return -EINVAL; - } - - fd = nvgpu_vidmem_export_linux(g, args->in.size); - if (fd < 0) - return fd; - - args->out.dmabuf_fd = fd; - - nvgpu_log_fn(g, "done, fd=%d", fd); - - return 0; -} - -static int nvgpu_gpu_get_memory_state(struct gk20a *g, - struct nvgpu_gpu_get_memory_state_args *args) -{ - int err; - - nvgpu_log_fn(g, " "); - - if (args->reserved[0] || args->reserved[1] || - args->reserved[2] || args->reserved[3]) - return -EINVAL; - - err = nvgpu_vidmem_get_space(g, &args->total_free_bytes); - - nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes); - - return err; -} - -static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain) -{ - u32 domain = 0; - - if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK) - domain = NVGPU_CLK_DOMAIN_MCLK; - else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK) - domain = NVGPU_CLK_DOMAIN_GPCCLK; - else - domain = NVGPU_CLK_DOMAIN_MAX + 1; - - return domain; -} - -static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, - struct gk20a_ctrl_priv *priv, - struct nvgpu_gpu_clk_vf_points_args *args) -{ - struct nvgpu_gpu_clk_vf_point clk_point; - struct nvgpu_gpu_clk_vf_point __user *entry; - struct nvgpu_clk_session *session = priv->clk_session; - u32 clk_domains = 0; - int err; - u16 last_mhz; - u16 *fpoints; - u32 i; - u32 max_points = 0; - u32 num_points = 0; - u16 min_mhz; - u16 max_mhz; - - nvgpu_log_fn(g, " "); - - if (!session || args->flags) - return -EINVAL; - - clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); - args->num_entries = 0; - - if (!nvgpu_clk_arb_is_valid_domain(g, - nvgpu_gpu_convert_clk_domain(args->clk_domain))) - return -EINVAL; - - err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, - nvgpu_gpu_convert_clk_domain(args->clk_domain), - &max_points, NULL); - if (err) - return err; - - if (!args->max_entries) { - args->max_entries = max_points; - return 0; - } - - if (args->max_entries < max_points) - return -EINVAL; - - err = nvgpu_clk_arb_get_arbiter_clk_range(g, - nvgpu_gpu_convert_clk_domain(args->clk_domain), - &min_mhz, &max_mhz); - if (err) - return err; - - fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16)); - if (!fpoints) - return -ENOMEM; - - err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, - nvgpu_gpu_convert_clk_domain(args->clk_domain), - &max_points, fpoints); - if (err) - goto fail; - - entry = (struct nvgpu_gpu_clk_vf_point __user *) - (uintptr_t)args->clk_vf_point_entries; - - last_mhz = 0; - num_points = 0; - for (i = 0; (i < max_points) && !err; i++) { - - /* filter out duplicate frequencies */ - if (fpoints[i] == last_mhz) - continue; - - /* filter out out-of-range frequencies */ - if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) - continue; - - last_mhz = fpoints[i]; - clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]); - - err = copy_to_user((void __user *)entry, &clk_point, - sizeof(clk_point)); - - num_points++; - entry++; - } - - args->num_entries = num_points; - -fail: - nvgpu_kfree(g, fpoints); - return err; -} - -static int nvgpu_gpu_clk_get_range(struct gk20a *g, - struct gk20a_ctrl_priv *priv, - struct nvgpu_gpu_clk_range_args *args) -{ - struct nvgpu_gpu_clk_range clk_range; - struct nvgpu_gpu_clk_range __user *entry; - struct nvgpu_clk_session *session = priv->clk_session; - - u32 clk_domains = 0; - u32 num_domains; - u32 num_entries; - u32 i; - int bit; - int err; - u16 min_mhz, max_mhz; - - nvgpu_log_fn(g, " "); - - if (!session) - return -EINVAL; - - clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); - num_domains = hweight_long(clk_domains); - - if (!args->flags) { - if (!args->num_entries) { - args->num_entries = num_domains; - return 0; - } - - if (args->num_entries < num_domains) - return -EINVAL; - - args->num_entries = 0; - num_entries = num_domains; - - } else { - if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) - return -EINVAL; - - num_entries = args->num_entries; - if (num_entries > num_domains) - return -EINVAL; - } - - entry = (struct nvgpu_gpu_clk_range __user *) - (uintptr_t)args->clk_range_entries; - - for (i = 0; i < num_entries; i++, entry++) { - - if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { - if (copy_from_user(&clk_range, (void __user *)entry, - sizeof(clk_range))) - return -EFAULT; - } else { - bit = ffs(clk_domains) - 1; - clk_range.clk_domain = bit; - clk_domains &= ~BIT(bit); - } - - clk_range.flags = 0; - err = nvgpu_clk_arb_get_arbiter_clk_range(g, - nvgpu_gpu_convert_clk_domain(clk_range.clk_domain), - &min_mhz, &max_mhz); - clk_range.min_hz = MHZ_TO_HZ(min_mhz); - clk_range.max_hz = MHZ_TO_HZ(max_mhz); - - if (err) - return err; - - err = copy_to_user(entry, &clk_range, sizeof(clk_range)); - if (err) - return -EFAULT; - } - - args->num_entries = num_entries; - - return 0; -} - -static int nvgpu_gpu_clk_set_info(struct gk20a *g, - struct gk20a_ctrl_priv *priv, - struct nvgpu_gpu_clk_set_info_args *args) -{ - struct nvgpu_gpu_clk_info clk_info; - struct nvgpu_gpu_clk_info __user *entry; - struct nvgpu_clk_session *session = priv->clk_session; - - int fd; - u32 clk_domains = 0; - u16 freq_mhz; - int i; - int ret; - - nvgpu_log_fn(g, " "); - - if (!session || args->flags) - return -EINVAL; - - clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); - if (!clk_domains) - return -EINVAL; - - entry = (struct nvgpu_gpu_clk_info __user *) - (uintptr_t)args->clk_info_entries; - - for (i = 0; i < args->num_entries; i++, entry++) { - - if (copy_from_user(&clk_info, entry, sizeof(clk_info))) - return -EFAULT; - - if (!nvgpu_clk_arb_is_valid_domain(g, - nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) - return -EINVAL; - } - - entry = (struct nvgpu_gpu_clk_info __user *) - (uintptr_t)args->clk_info_entries; - - ret = nvgpu_clk_arb_install_request_fd(g, session, &fd); - if (ret < 0) - return ret; - - for (i = 0; i < args->num_entries; i++, entry++) { - - if (copy_from_user(&clk_info, (void __user *)entry, - sizeof(clk_info))) - return -EFAULT; - freq_mhz = HZ_TO_MHZ(clk_info.freq_hz); - - nvgpu_clk_arb_set_session_target_mhz(session, fd, - nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); - } - - ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); - if (ret < 0) - return ret; - - args->completion_fd = fd; - - return ret; -} - -static int nvgpu_gpu_clk_get_info(struct gk20a *g, - struct gk20a_ctrl_priv *priv, - struct nvgpu_gpu_clk_get_info_args *args) -{ - struct nvgpu_gpu_clk_info clk_info; - struct nvgpu_gpu_clk_info __user *entry; - struct nvgpu_clk_session *session = priv->clk_session; - u32 clk_domains = 0; - u32 num_domains; - u32 num_entries; - u32 i; - u16 freq_mhz; - int err; - int bit; - - nvgpu_log_fn(g, " "); - - if (!session) - return -EINVAL; - - clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); - num_domains = hweight_long(clk_domains); - - if (!args->flags) { - if (!args->num_entries) { - args->num_entries = num_domains; - return 0; - } - - if (args->num_entries < num_domains) - return -EINVAL; - - args->num_entries = 0; - num_entries = num_domains; - - } else { - if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) - return -EINVAL; - - num_entries = args->num_entries; - if (num_entries > num_domains * 3) - return -EINVAL; - } - - entry = (struct nvgpu_gpu_clk_info __user *) - (uintptr_t)args->clk_info_entries; - - for (i = 0; i < num_entries; i++, entry++) { - - if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { - if (copy_from_user(&clk_info, (void __user *)entry, - sizeof(clk_info))) - return -EFAULT; - } else { - bit = ffs(clk_domains) - 1; - clk_info.clk_domain = bit; - clk_domains &= ~BIT(bit); - clk_info.clk_type = args->clk_type; - } - - switch (clk_info.clk_type) { - case NVGPU_GPU_CLK_TYPE_TARGET: - err = nvgpu_clk_arb_get_session_target_mhz(session, - nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), - &freq_mhz); - break; - case NVGPU_GPU_CLK_TYPE_ACTUAL: - err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, - nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), - &freq_mhz); - break; - case NVGPU_GPU_CLK_TYPE_EFFECTIVE: - err = nvgpu_clk_arb_get_arbiter_effective_mhz(g, - nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), - &freq_mhz); - break; - default: - freq_mhz = 0; - err = -EINVAL; - break; - } - if (err) - return err; - - clk_info.flags = 0; - clk_info.freq_hz = MHZ_TO_HZ(freq_mhz); - - err = copy_to_user((void __user *)entry, &clk_info, - sizeof(clk_info)); - if (err) - return -EFAULT; - } - - args->num_entries = num_entries; - - return 0; -} - -static int nvgpu_gpu_get_event_fd(struct gk20a *g, - struct gk20a_ctrl_priv *priv, - struct nvgpu_gpu_get_event_fd_args *args) -{ - struct nvgpu_clk_session *session = priv->clk_session; - - nvgpu_log_fn(g, " "); - - if (!session) - return -EINVAL; - - return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd, - args->flags); -} - -static int nvgpu_gpu_get_voltage(struct gk20a *g, - struct nvgpu_gpu_get_voltage_args *args) -{ - int err = -EINVAL; - - nvgpu_log_fn(g, " "); - - if (args->reserved) - return -EINVAL; - - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE)) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - switch (args->which) { - case NVGPU_GPU_VOLTAGE_CORE: - err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); - break; - case NVGPU_GPU_VOLTAGE_SRAM: - err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage); - break; - case NVGPU_GPU_VOLTAGE_BUS: - err = pmgr_pwr_devices_get_voltage(g, &args->voltage); - break; - default: - err = -EINVAL; - } - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_get_current(struct gk20a *g, - struct nvgpu_gpu_get_current_args *args) -{ - int err; - - nvgpu_log_fn(g, " "); - - if (args->reserved[0] || args->reserved[1] || args->reserved[2]) - return -EINVAL; - - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT)) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - err = pmgr_pwr_devices_get_current(g, &args->currnt); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_get_power(struct gk20a *g, - struct nvgpu_gpu_get_power_args *args) -{ - int err; - - nvgpu_log_fn(g, " "); - - if (args->reserved[0] || args->reserved[1] || args->reserved[2]) - return -EINVAL; - - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER)) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - err = pmgr_pwr_devices_get_power(g, &args->power); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_get_temperature(struct gk20a *g, - struct nvgpu_gpu_get_temperature_args *args) -{ - int err; - u32 temp_f24_8; - - nvgpu_log_fn(g, " "); - - if (args->reserved[0] || args->reserved[1] || args->reserved[2]) - return -EINVAL; - - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) - return -EINVAL; - - if (!g->ops.therm.get_internal_sensor_curr_temp) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); - - gk20a_idle(g); - - args->temp_f24_8 = (s32)temp_f24_8; - - return err; -} - -static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, - struct nvgpu_gpu_set_therm_alert_limit_args *args) -{ - int err; - - nvgpu_log_fn(g, " "); - - if (args->reserved[0] || args->reserved[1] || args->reserved[2]) - return -EINVAL; - - if (!g->ops.therm.configure_therm_alert) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8); - - gk20a_idle(g); - - return err; -} - -static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, - u32 flags) -{ - int err = 0; - bool allow; - bool disallow; - - allow = flags & - NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; - - disallow = flags & - NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; - - /* Can't be both at the same time */ - if (allow && disallow) - return -EINVAL; - - /* Nothing to do */ - if (!allow && !disallow) - return 0; - - /* - * Moving into explicit idle or back from it? A call that doesn't - * change the status is a no-op. - */ - if (!ch->deterministic_railgate_allowed && - allow) { - gk20a_idle(ch->g); - } else if (ch->deterministic_railgate_allowed && - !allow) { - err = gk20a_busy(ch->g); - if (err) { - nvgpu_warn(ch->g, - "cannot busy to restore deterministic ch"); - return err; - } - } - ch->deterministic_railgate_allowed = allow; - - return err; -} - -static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) -{ - if (!ch->deterministic) - return -EINVAL; - - return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); -} - -static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, - struct nvgpu_gpu_set_deterministic_opts_args *args) -{ - int __user *user_channels; - u32 i = 0; - int err = 0; - - nvgpu_log_fn(g, " "); - - user_channels = (int __user *)(uintptr_t)args->channels; - - /* Upper limit; prevent holding deterministic_busy for long */ - if (args->num_channels > g->fifo.num_channels) { - err = -EINVAL; - goto out; - } - - /* Trivial sanity check first */ - if (!access_ok(VERIFY_READ, user_channels, - args->num_channels * sizeof(int))) { - err = -EFAULT; - goto out; - } - - nvgpu_rwsem_down_read(&g->deterministic_busy); - - /* note: we exit at the first failure */ - for (; i < args->num_channels; i++) { - int ch_fd = 0; - struct channel_gk20a *ch; - - if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { - /* User raced with above access_ok */ - err = -EFAULT; - break; - } - - ch = gk20a_get_channel_from_file(ch_fd); - if (!ch) { - err = -EINVAL; - break; - } - - err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); - - gk20a_channel_put(ch); - - if (err) - break; - } - - nvgpu_rwsem_up_read(&g->deterministic_busy); - -out: - args->num_channels = i; - return err; -} - -static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g, - struct nvgpu_gpu_read_single_sm_error_state_args *args) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_gr_sm_error_state *sm_error_state; - struct nvgpu_gpu_sm_error_state_record sm_error_state_record; - u32 sm_id; - int err = 0; - - sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) - return -EINVAL; - - nvgpu_speculation_barrier(); - - sm_error_state = gr->sm_error_states + sm_id; - sm_error_state_record.global_esr = - sm_error_state->hww_global_esr; - sm_error_state_record.warp_esr = - sm_error_state->hww_warp_esr; - sm_error_state_record.warp_esr_pc = - sm_error_state->hww_warp_esr_pc; - sm_error_state_record.global_esr_report_mask = - sm_error_state->hww_global_esr_report_mask; - sm_error_state_record.warp_esr_report_mask = - sm_error_state->hww_warp_esr_report_mask; - - if (args->record_size > 0) { - size_t write_size = sizeof(*sm_error_state); - - if (write_size > args->record_size) - write_size = args->record_size; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = copy_to_user((void __user *)(uintptr_t) - args->record_mem, - &sm_error_state_record, - write_size); - nvgpu_mutex_release(&g->dbg_sessions_lock); - if (err) { - nvgpu_err(g, "copy_to_user failed!"); - return err; - } - - args->record_size = write_size; - } - - return 0; -} - -long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct gk20a_ctrl_priv *priv = filp->private_data; - struct gk20a *g = priv->g; - struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; - struct nvgpu_gpu_zcull_get_info_args *get_info_args; - struct nvgpu_gpu_zbc_set_table_args *set_table_args; - struct nvgpu_gpu_zbc_query_table_args *query_table_args; - u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; - struct gr_zcull_info *zcull_info; - struct zbc_entry *zbc_val; - struct zbc_query_params *zbc_tbl; - int i, err = 0; - - nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - if (!g->sw_ready) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_idle(g); - } - - switch (cmd) { - case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: - get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; - - get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); - - break; - case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: - get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf; - - memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); - - zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); - if (zcull_info == NULL) - return -ENOMEM; - - err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); - if (err) { - nvgpu_kfree(g, zcull_info); - break; - } - - get_info_args->width_align_pixels = zcull_info->width_align_pixels; - get_info_args->height_align_pixels = zcull_info->height_align_pixels; - get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; - get_info_args->aliquot_total = zcull_info->aliquot_total; - get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; - get_info_args->region_header_size = zcull_info->region_header_size; - get_info_args->subregion_header_size = zcull_info->subregion_header_size; - get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; - get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; - get_info_args->subregion_count = zcull_info->subregion_count; - - nvgpu_kfree(g, zcull_info); - break; - case NVGPU_GPU_IOCTL_ZBC_SET_TABLE: - set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf; - - zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry)); - if (zbc_val == NULL) - return -ENOMEM; - - zbc_val->format = set_table_args->format; - zbc_val->type = set_table_args->type; - - switch (zbc_val->type) { - case GK20A_ZBC_TYPE_COLOR: - for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { - zbc_val->color_ds[i] = set_table_args->color_ds[i]; - zbc_val->color_l2[i] = set_table_args->color_l2[i]; - } - break; - case GK20A_ZBC_TYPE_DEPTH: - case T19X_ZBC: - zbc_val->depth = set_table_args->depth; - break; - default: - err = -EINVAL; - } - - if (!err) { - err = gk20a_busy(g); - if (!err) { - err = g->ops.gr.zbc_set_table(g, &g->gr, - zbc_val); - gk20a_idle(g); - } - } - - if (zbc_val) - nvgpu_kfree(g, zbc_val); - break; - case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE: - query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf; - - zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params)); - if (zbc_tbl == NULL) - return -ENOMEM; - - zbc_tbl->type = query_table_args->type; - zbc_tbl->index_size = query_table_args->index_size; - - err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl); - - if (!err) { - switch (zbc_tbl->type) { - case GK20A_ZBC_TYPE_COLOR: - for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { - query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; - query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; - } - break; - case GK20A_ZBC_TYPE_DEPTH: - case T19X_ZBC: - query_table_args->depth = zbc_tbl->depth; - break; - case GK20A_ZBC_TYPE_INVALID: - query_table_args->index_size = zbc_tbl->index_size; - break; - default: - err = -EINVAL; - } - if (!err) { - query_table_args->format = zbc_tbl->format; - query_table_args->ref_cnt = zbc_tbl->ref_cnt; - } - } - - if (zbc_tbl) - nvgpu_kfree(g, zbc_tbl); - break; - - case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS: - err = gk20a_ctrl_ioctl_gpu_characteristics( - g, (struct nvgpu_gpu_get_characteristics *)buf); - break; - case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: - err = gk20a_ctrl_prepare_compressible_read(g, - (struct nvgpu_gpu_prepare_compressible_read_args *)buf); - break; - case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: - err = gk20a_ctrl_mark_compressible_write(g, - (struct nvgpu_gpu_mark_compressible_write_args *)buf); - break; - case NVGPU_GPU_IOCTL_ALLOC_AS: - err = gk20a_ctrl_alloc_as(g, - (struct nvgpu_alloc_as_args *)buf); - break; - case NVGPU_GPU_IOCTL_OPEN_TSG: - err = gk20a_ctrl_open_tsg(g, - (struct nvgpu_gpu_open_tsg_args *)buf); - break; - case NVGPU_GPU_IOCTL_GET_TPC_MASKS: - err = gk20a_ctrl_get_tpc_masks(g, - (struct nvgpu_gpu_get_tpc_masks_args *)buf); - break; - case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: - err = gk20a_ctrl_get_fbp_l2_masks(g, - (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); - break; - case NVGPU_GPU_IOCTL_OPEN_CHANNEL: - /* this arg type here, but ..gpu_open_channel_args in nvgpu.h - * for consistency - they are the same */ - err = gk20a_channel_open_ioctl(g, - (struct nvgpu_channel_open_args *)buf); - break; - case NVGPU_GPU_IOCTL_FLUSH_L2: - err = nvgpu_gpu_ioctl_l2_fb_ops(g, - (struct nvgpu_gpu_l2_fb_args *)buf); - break; - case NVGPU_GPU_IOCTL_INVAL_ICACHE: - err = gr_gk20a_elpg_protected_call(g, - nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf)); - break; - - case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: - err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, - (struct nvgpu_gpu_mmu_debug_mode_args *)buf); - break; - - case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: - err = gr_gk20a_elpg_protected_call(g, - nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); - break; - - case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND: - err = nvgpu_gpu_ioctl_trigger_suspend(g); - break; - - case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: - err = nvgpu_gpu_ioctl_wait_for_pause(g, - (struct nvgpu_gpu_wait_pause_args *)buf); - break; - - case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE: - err = nvgpu_gpu_ioctl_resume_from_pause(g); - break; - - case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS: - err = nvgpu_gpu_ioctl_clear_sm_errors(g); - break; - - case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: - err = nvgpu_gpu_ioctl_has_any_exception(g, - (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); - break; - - case NVGPU_GPU_IOCTL_NUM_VSMS: - err = gk20a_ctrl_get_num_vsms(g, - (struct nvgpu_gpu_num_vsms *)buf); - break; - case NVGPU_GPU_IOCTL_VSMS_MAPPING: - err = gk20a_ctrl_vsm_mapping(g, - (struct nvgpu_gpu_vsms_mapping *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: - err = nvgpu_gpu_get_cpu_time_correlation_info(g, - (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_GPU_TIME: - err = nvgpu_gpu_get_gpu_time(g, - (struct nvgpu_gpu_get_gpu_time_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_ENGINE_INFO: - err = nvgpu_gpu_get_engine_info(g, - (struct nvgpu_gpu_get_engine_info_args *)buf); - break; - - case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: - err = nvgpu_gpu_alloc_vidmem(g, - (struct nvgpu_gpu_alloc_vidmem_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_MEMORY_STATE: - err = nvgpu_gpu_get_memory_state(g, - (struct nvgpu_gpu_get_memory_state_args *)buf); - break; - - case NVGPU_GPU_IOCTL_CLK_GET_RANGE: - err = nvgpu_gpu_clk_get_range(g, priv, - (struct nvgpu_gpu_clk_range_args *)buf); - break; - - case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: - err = nvgpu_gpu_clk_get_vf_points(g, priv, - (struct nvgpu_gpu_clk_vf_points_args *)buf); - break; - - case NVGPU_GPU_IOCTL_CLK_SET_INFO: - err = nvgpu_gpu_clk_set_info(g, priv, - (struct nvgpu_gpu_clk_set_info_args *)buf); - break; - - case NVGPU_GPU_IOCTL_CLK_GET_INFO: - err = nvgpu_gpu_clk_get_info(g, priv, - (struct nvgpu_gpu_clk_get_info_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_EVENT_FD: - err = nvgpu_gpu_get_event_fd(g, priv, - (struct nvgpu_gpu_get_event_fd_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_VOLTAGE: - err = nvgpu_gpu_get_voltage(g, - (struct nvgpu_gpu_get_voltage_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_CURRENT: - err = nvgpu_gpu_get_current(g, - (struct nvgpu_gpu_get_current_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_POWER: - err = nvgpu_gpu_get_power(g, - (struct nvgpu_gpu_get_power_args *)buf); - break; - - case NVGPU_GPU_IOCTL_GET_TEMPERATURE: - err = nvgpu_gpu_get_temperature(g, - (struct nvgpu_gpu_get_temperature_args *)buf); - break; - - case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT: - err = nvgpu_gpu_set_therm_alert_limit(g, - (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); - break; - - case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: - err = nvgpu_gpu_set_deterministic_opts(g, - (struct nvgpu_gpu_set_deterministic_opts_args *)buf); - break; - - case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: - err = nvgpu_gpu_read_single_sm_error_state(g, - (struct nvgpu_gpu_read_single_sm_error_state_args *)buf); - break; - - default: - nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); - err = -ENOTTY; - break; - } - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h deleted file mode 100644 index 8b4a5e59..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __NVGPU_IOCTL_CTRL_H__ -#define __NVGPU_IOCTL_CTRL_H__ - -int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); -int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); -long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c deleted file mode 100644 index 31e7e2cb..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c +++ /dev/null @@ -1,2003 +0,0 @@ -/* - * Tegra GK20A GPU Debugger/Profiler Driver - * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/gr_gk20a.h" -#include "gk20a/regops_gk20a.h" -#include "gk20a/dbg_gpu_gk20a.h" -#include "os_linux.h" -#include "platform_gk20a.h" -#include "ioctl_dbg.h" - -/* turn seriously unwieldy names -> something shorter */ -#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x - -/* silly allocator - just increment id */ -static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); -static int generate_unique_id(void) -{ - return nvgpu_atomic_add_return(1, &unique_id); -} - -static int alloc_profiler(struct gk20a *g, - struct dbg_profiler_object_data **_prof) -{ - struct dbg_profiler_object_data *prof; - *_prof = NULL; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - prof = nvgpu_kzalloc(g, sizeof(*prof)); - if (!prof) - return -ENOMEM; - - prof->prof_handle = generate_unique_id(); - *_prof = prof; - return 0; -} - -static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux) -{ - struct dbg_session_gk20a_linux *dbg_s_linux; - *_dbg_s_linux = NULL; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux)); - if (!dbg_s_linux) - return -ENOMEM; - - dbg_s_linux->dbg_s.id = generate_unique_id(); - *_dbg_s_linux = dbg_s_linux; - return 0; -} - -static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, - struct gr_gk20a *gr); - -static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); - -static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_exec_reg_ops_args *args); - -static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_powergate_args *args); - -static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); - -static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); - -static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); - -static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s, - struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); - -static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux, - struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); - -static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_profiler_reserve_args *args); - -static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_perfbuf_map_args *args); - -static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); - -static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, - int timeout_mode); - -static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, - u32 profiler_handle); - -static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s); - -static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s); - -static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, - u32 profiler_handle); - -static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); - -static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, - struct file *filp, bool is_profiler); - -unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) -{ - unsigned int mask = 0; - struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data; - struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait); - - gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); - - if (dbg_s->dbg_events.events_enabled && - dbg_s->dbg_events.num_pending_events > 0) { - nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d", - dbg_s->id); - nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", - dbg_s->dbg_events.num_pending_events); - mask = (POLLPRI | POLLIN); - } - - gk20a_dbg_session_nvgpu_mutex_release(dbg_s); - - return mask; -} - -int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) -{ - struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data; - struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; - struct gk20a *g = dbg_s->g; - struct dbg_profiler_object_data *prof_obj, *tmp_obj; - - nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name); - - /* unbind channels */ - dbg_unbind_all_channels_gk20a(dbg_s); - - /* Powergate/Timeout enable is called here as possibility of dbg_session - * which called powergate/timeout disable ioctl, to be killed without - * calling powergate/timeout enable ioctl - */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); - nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); - - /* If this session owned the perf buffer, release it */ - if (g->perfbuf.owner == dbg_s) - gk20a_perfbuf_release_locked(g, g->perfbuf.offset); - - /* Per-context profiler objects were released when we called - * dbg_unbind_all_channels. We could still have global ones. - */ - nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if (prof_obj->session_id == dbg_s->id) { - if (prof_obj->has_reservation) - g->ops.dbg_session_ops. - release_profiler_reservation(dbg_s, prof_obj); - nvgpu_list_del(&prof_obj->prof_obj_entry); - nvgpu_kfree(g, prof_obj); - } - } - nvgpu_mutex_release(&g->dbg_sessions_lock); - - nvgpu_mutex_destroy(&dbg_s->ch_list_lock); - nvgpu_mutex_destroy(&dbg_s->ioctl_lock); - - nvgpu_kfree(g, dbg_session_linux); - gk20a_put(g); - - return 0; -} - -int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l = container_of(inode->i_cdev, - struct nvgpu_os_linux, prof.cdev); - struct gk20a *g = &l->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); -} - -static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_timeout_args *args) -{ - int err; - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = nvgpu_dbg_timeout_enable(dbg_s, args->enable); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - return err; -} - -static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args) -{ - struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; - u32 sm_id; - struct channel_gk20a *ch; - struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; - struct nvgpu_gr_sm_error_state sm_error_state; - int err = 0; - - /* Not currently supported in the virtual case */ - if (g->is_virtual) - return -ENOSYS; - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch) - return -EINVAL; - - sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) - return -EINVAL; - - nvgpu_speculation_barrier(); - - if (args->sm_error_state_record_size > 0) { - size_t read_size = sizeof(sm_error_state_record); - - if (read_size > args->sm_error_state_record_size) - read_size = args->sm_error_state_record_size; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = copy_from_user(&sm_error_state_record, - (void __user *)(uintptr_t) - args->sm_error_state_record_mem, - read_size); - nvgpu_mutex_release(&g->dbg_sessions_lock); - if (err) - return -ENOMEM; - } - - err = gk20a_busy(g); - if (err) - return err; - - sm_error_state.hww_global_esr = - sm_error_state_record.hww_global_esr; - sm_error_state.hww_warp_esr = - sm_error_state_record.hww_warp_esr; - sm_error_state.hww_warp_esr_pc = - sm_error_state_record.hww_warp_esr_pc; - sm_error_state.hww_global_esr_report_mask = - sm_error_state_record.hww_global_esr_report_mask; - sm_error_state.hww_warp_esr_report_mask = - sm_error_state_record.hww_warp_esr_report_mask; - - err = gr_gk20a_elpg_protected_call(g, - g->ops.gr.update_sm_error_state(g, ch, - sm_id, &sm_error_state)); - - gk20a_idle(g); - - return err; -} - - -static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) -{ - struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; - struct nvgpu_gr_sm_error_state *sm_error_state; - struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; - u32 sm_id; - int err = 0; - - sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) - return -EINVAL; - - nvgpu_speculation_barrier(); - - sm_error_state = gr->sm_error_states + sm_id; - sm_error_state_record.hww_global_esr = - sm_error_state->hww_global_esr; - sm_error_state_record.hww_warp_esr = - sm_error_state->hww_warp_esr; - sm_error_state_record.hww_warp_esr_pc = - sm_error_state->hww_warp_esr_pc; - sm_error_state_record.hww_global_esr_report_mask = - sm_error_state->hww_global_esr_report_mask; - sm_error_state_record.hww_warp_esr_report_mask = - sm_error_state->hww_warp_esr_report_mask; - - if (args->sm_error_state_record_size > 0) { - size_t write_size = sizeof(*sm_error_state); - - if (write_size > args->sm_error_state_record_size) - write_size = args->sm_error_state_record_size; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = copy_to_user((void __user *)(uintptr_t) - args->sm_error_state_record_mem, - &sm_error_state_record, - write_size); - nvgpu_mutex_release(&g->dbg_sessions_lock); - if (err) { - nvgpu_err(g, "copy_to_user failed!"); - return err; - } - - args->sm_error_state_record_size = write_size; - } - - return 0; -} - - -static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args) -{ - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); - - dbg_s->broadcast_stop_trigger = (args->broadcast != 0); - - gk20a_dbg_session_nvgpu_mutex_release(dbg_s); - - return 0; -} - -static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, - int timeout_mode) -{ - struct gk20a *g = dbg_s->g; - int err = 0; - - nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", - timeout_mode); - - switch (timeout_mode) { - case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: - if (dbg_s->is_timeout_disabled == true) - nvgpu_atomic_dec(&g->timeouts_disabled_refcount); - dbg_s->is_timeout_disabled = false; - break; - - case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE: - if (dbg_s->is_timeout_disabled == false) - nvgpu_atomic_inc(&g->timeouts_disabled_refcount); - dbg_s->is_timeout_disabled = true; - break; - - default: - nvgpu_err(g, - "unrecognized dbg gpu timeout mode : 0x%x", - timeout_mode); - err = -EINVAL; - break; - } - - if (!err) - nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, " - "timeouts disabled refcount %d", - dbg_s->is_timeout_disabled ? "true" : "false", - nvgpu_atomic_read(&g->timeouts_disabled_refcount)); - return err; -} - -static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, - struct file *filp, bool is_profiler) -{ - struct nvgpu_os_linux *l; - struct dbg_session_gk20a_linux *dbg_session_linux; - struct dbg_session_gk20a *dbg_s; - struct gk20a *g; - - struct device *dev; - - int err; - - if (!is_profiler) - l = container_of(inode->i_cdev, - struct nvgpu_os_linux, dbg.cdev); - else - l = container_of(inode->i_cdev, - struct nvgpu_os_linux, prof.cdev); - g = gk20a_get(&l->g); - if (!g) - return -ENODEV; - - dev = dev_from_gk20a(g); - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name); - - err = alloc_session(g, &dbg_session_linux); - if (err) - goto free_ref; - - dbg_s = &dbg_session_linux->dbg_s; - - filp->private_data = dbg_session_linux; - dbg_session_linux->dev = dev; - dbg_s->g = g; - dbg_s->is_profiler = is_profiler; - dbg_s->is_pg_disabled = false; - dbg_s->is_timeout_disabled = false; - - nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); - nvgpu_init_list_node(&dbg_s->ch_list); - err = nvgpu_mutex_init(&dbg_s->ch_list_lock); - if (err) - goto err_free_session; - err = nvgpu_mutex_init(&dbg_s->ioctl_lock); - if (err) - goto err_destroy_lock; - dbg_s->dbg_events.events_enabled = false; - dbg_s->dbg_events.num_pending_events = 0; - - return 0; - -err_destroy_lock: - nvgpu_mutex_destroy(&dbg_s->ch_list_lock); -err_free_session: - nvgpu_kfree(g, dbg_session_linux); -free_ref: - gk20a_put(g); - return err; -} - -void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) -{ - nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue); -} - -static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, - struct dbg_session_channel_data *ch_data) -{ - struct gk20a *g = dbg_s->g; - int chid; - struct dbg_session_data *session_data; - struct dbg_profiler_object_data *prof_obj, *tmp_obj; - struct dbg_session_channel_data_linux *ch_data_linux; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - chid = ch_data->chid; - - /* If there's a profiler ctx reservation record associated with this - * session/channel pair, release it. - */ - nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if ((prof_obj->session_id == dbg_s->id) && - (prof_obj->ch->chid == chid)) { - if (prof_obj->has_reservation) { - g->ops.dbg_session_ops. - release_profiler_reservation(dbg_s, prof_obj); - } - nvgpu_list_del(&prof_obj->prof_obj_entry); - nvgpu_kfree(g, prof_obj); - } - } - - nvgpu_list_del(&ch_data->ch_entry); - - session_data = ch_data->session_data; - nvgpu_list_del(&session_data->dbg_s_entry); - nvgpu_kfree(dbg_s->g, session_data); - - ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux, - ch_data); - - fput(ch_data_linux->ch_f); - nvgpu_kfree(dbg_s->g, ch_data_linux); - - return 0; -} - -static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_bind_channel_args *args) -{ - struct file *f; - struct gk20a *g = dbg_s->g; - struct channel_gk20a *ch; - struct dbg_session_channel_data_linux *ch_data_linux; - struct dbg_session_data *session_data; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", - g->name, args->channel_fd); - - /* - * Although gk20a_get_channel_from_file gives us a channel ref, need to - * hold a ref to the file during the session lifetime. See comment in - * struct dbg_session_channel_data. - */ - f = fget(args->channel_fd); - if (!f) - return -ENODEV; - - ch = gk20a_get_channel_from_file(args->channel_fd); - if (!ch) { - nvgpu_log_fn(g, "no channel found for fd"); - err = -EINVAL; - goto out_fput; - } - - nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - nvgpu_mutex_acquire(&ch->dbg_s_lock); - - ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux)); - if (!ch_data_linux) { - err = -ENOMEM; - goto out_chput; - } - ch_data_linux->ch_f = f; - ch_data_linux->ch_data.channel_fd = args->channel_fd; - ch_data_linux->ch_data.chid = ch->chid; - ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a; - nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry); - - session_data = nvgpu_kzalloc(g, sizeof(*session_data)); - if (!session_data) { - err = -ENOMEM; - goto out_kfree; - } - session_data->dbg_s = dbg_s; - nvgpu_init_list_node(&session_data->dbg_s_entry); - ch_data_linux->ch_data.session_data = session_data; - - nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list); - - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list); - nvgpu_mutex_release(&dbg_s->ch_list_lock); - - nvgpu_mutex_release(&ch->dbg_s_lock); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - gk20a_channel_put(ch); - - return 0; - -out_kfree: - nvgpu_kfree(g, ch_data_linux); -out_chput: - gk20a_channel_put(ch); - nvgpu_mutex_release(&ch->dbg_s_lock); - nvgpu_mutex_release(&g->dbg_sessions_lock); -out_fput: - fput(f); - return err; -} - -static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) -{ - struct dbg_session_channel_data *ch_data, *tmp; - struct gk20a *g = dbg_s->g; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, - dbg_session_channel_data, ch_entry) - ch_data->unbind_single_channel(dbg_s, ch_data); - nvgpu_mutex_release(&dbg_s->ch_list_lock); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - return 0; -} - -/* - * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* - * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* - */ -static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) -{ - switch (regops_op) { - case REGOP(READ_32): - return REGOP_LINUX(READ_32); - case REGOP(WRITE_32): - return REGOP_LINUX(WRITE_32); - case REGOP(READ_64): - return REGOP_LINUX(READ_64); - case REGOP(WRITE_64): - return REGOP_LINUX(WRITE_64); - case REGOP(READ_08): - return REGOP_LINUX(READ_08); - case REGOP(WRITE_08): - return REGOP_LINUX(WRITE_08); - } - - return regops_op; -} - -/* - * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* - * into common regops op values of the form of NVGPU_DBG_REG_OP_* - */ -static u32 nvgpu_get_regops_op_values_common(u32 regops_op) -{ - switch (regops_op) { - case REGOP_LINUX(READ_32): - return REGOP(READ_32); - case REGOP_LINUX(WRITE_32): - return REGOP(WRITE_32); - case REGOP_LINUX(READ_64): - return REGOP(READ_64); - case REGOP_LINUX(WRITE_64): - return REGOP(WRITE_64); - case REGOP_LINUX(READ_08): - return REGOP(READ_08); - case REGOP_LINUX(WRITE_08): - return REGOP(WRITE_08); - } - - return regops_op; -} - -/* - * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* - * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* - */ -static u32 nvgpu_get_regops_type_values_linux(u32 regops_type) -{ - switch (regops_type) { - case REGOP(TYPE_GLOBAL): - return REGOP_LINUX(TYPE_GLOBAL); - case REGOP(TYPE_GR_CTX): - return REGOP_LINUX(TYPE_GR_CTX); - case REGOP(TYPE_GR_CTX_TPC): - return REGOP_LINUX(TYPE_GR_CTX_TPC); - case REGOP(TYPE_GR_CTX_SM): - return REGOP_LINUX(TYPE_GR_CTX_SM); - case REGOP(TYPE_GR_CTX_CROP): - return REGOP_LINUX(TYPE_GR_CTX_CROP); - case REGOP(TYPE_GR_CTX_ZROP): - return REGOP_LINUX(TYPE_GR_CTX_ZROP); - case REGOP(TYPE_GR_CTX_QUAD): - return REGOP_LINUX(TYPE_GR_CTX_QUAD); - } - - return regops_type; -} - -/* - * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* - * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* - */ -static u32 nvgpu_get_regops_type_values_common(u32 regops_type) -{ - switch (regops_type) { - case REGOP_LINUX(TYPE_GLOBAL): - return REGOP(TYPE_GLOBAL); - case REGOP_LINUX(TYPE_GR_CTX): - return REGOP(TYPE_GR_CTX); - case REGOP_LINUX(TYPE_GR_CTX_TPC): - return REGOP(TYPE_GR_CTX_TPC); - case REGOP_LINUX(TYPE_GR_CTX_SM): - return REGOP(TYPE_GR_CTX_SM); - case REGOP_LINUX(TYPE_GR_CTX_CROP): - return REGOP(TYPE_GR_CTX_CROP); - case REGOP_LINUX(TYPE_GR_CTX_ZROP): - return REGOP(TYPE_GR_CTX_ZROP); - case REGOP_LINUX(TYPE_GR_CTX_QUAD): - return REGOP(TYPE_GR_CTX_QUAD); - } - - return regops_type; -} - -/* - * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* - * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* - */ -static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) -{ - switch (regops_status) { - case REGOP(STATUS_SUCCESS): - return REGOP_LINUX(STATUS_SUCCESS); - case REGOP(STATUS_INVALID_OP): - return REGOP_LINUX(STATUS_INVALID_OP); - case REGOP(STATUS_INVALID_TYPE): - return REGOP_LINUX(STATUS_INVALID_TYPE); - case REGOP(STATUS_INVALID_OFFSET): - return REGOP_LINUX(STATUS_INVALID_OFFSET); - case REGOP(STATUS_UNSUPPORTED_OP): - return REGOP_LINUX(STATUS_UNSUPPORTED_OP); - case REGOP(STATUS_INVALID_MASK ): - return REGOP_LINUX(STATUS_INVALID_MASK); - } - - return regops_status; -} - -/* - * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* - * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* - */ -static u32 nvgpu_get_regops_status_values_common(u32 regops_status) -{ - switch (regops_status) { - case REGOP_LINUX(STATUS_SUCCESS): - return REGOP(STATUS_SUCCESS); - case REGOP_LINUX(STATUS_INVALID_OP): - return REGOP(STATUS_INVALID_OP); - case REGOP_LINUX(STATUS_INVALID_TYPE): - return REGOP(STATUS_INVALID_TYPE); - case REGOP_LINUX(STATUS_INVALID_OFFSET): - return REGOP(STATUS_INVALID_OFFSET); - case REGOP_LINUX(STATUS_UNSUPPORTED_OP): - return REGOP(STATUS_UNSUPPORTED_OP); - case REGOP_LINUX(STATUS_INVALID_MASK ): - return REGOP(STATUS_INVALID_MASK); - } - - return regops_status; -} - -static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in, - struct nvgpu_dbg_reg_op *out, u32 num_ops) -{ - u32 i; - - if(in == NULL || out == NULL) - return -ENOMEM; - - for (i = 0; i < num_ops; i++) { - out[i].op = nvgpu_get_regops_op_values_common(in[i].op); - out[i].type = nvgpu_get_regops_type_values_common(in[i].type); - out[i].status = nvgpu_get_regops_status_values_common(in[i].status); - out[i].quad = in[i].quad; - out[i].group_mask = in[i].group_mask; - out[i].sub_group_mask = in[i].sub_group_mask; - out[i].offset = in[i].offset; - out[i].value_lo = in[i].value_lo; - out[i].value_hi = in[i].value_hi; - out[i].and_n_mask_lo = in[i].and_n_mask_lo; - out[i].and_n_mask_hi = in[i].and_n_mask_hi; - } - - return 0; -} - -static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in, - struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops) -{ - u32 i; - - if(in == NULL || out == NULL) - return -ENOMEM; - - for (i = 0; i < num_ops; i++) { - out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); - out[i].type = nvgpu_get_regops_type_values_linux(in[i].type); - out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); - out[i].quad = in[i].quad; - out[i].group_mask = in[i].group_mask; - out[i].sub_group_mask = in[i].sub_group_mask; - out[i].offset = in[i].offset; - out[i].value_lo = in[i].value_lo; - out[i].value_hi = in[i].value_hi; - out[i].and_n_mask_lo = in[i].and_n_mask_lo; - out[i].and_n_mask_hi = in[i].and_n_mask_hi; - } - - return 0; -} - -static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_exec_reg_ops_args *args) -{ - int err = 0, powergate_err = 0; - bool is_pg_disabled = false; - - struct gk20a *g = dbg_s->g; - struct channel_gk20a *ch; - - nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); - - if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { - nvgpu_err(g, "regops limit exceeded"); - return -EINVAL; - } - - if (args->num_ops == 0) { - /* Nothing to do */ - return 0; - } - - if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { - nvgpu_err(g, "reg ops work buffer not allocated"); - return -ENODEV; - } - - if (!dbg_s->id) { - nvgpu_err(g, "can't call reg_ops on an unbound debugger session"); - return -EINVAL; - } - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!dbg_s->is_profiler && !ch) { - nvgpu_err(g, "bind a channel before regops for a debugging session"); - return -EINVAL; - } - - /* be sure that ctx info is in place */ - if (!g->is_virtual && - !gr_context_info_available(dbg_s, &g->gr)) { - nvgpu_err(g, "gr context data not available"); - return -ENODEV; - } - - /* since exec_reg_ops sends methods to the ucode, it must take the - * global gpu lock to protect against mixing methods from debug sessions - * on other channels */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - if (!dbg_s->is_pg_disabled && !g->is_virtual) { - /* In the virtual case, the server will handle - * disabling/enabling powergating when processing reg ops - */ - powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, - true); - is_pg_disabled = true; - } - - if (!powergate_err) { - u64 ops_offset = 0; /* index offset */ - - struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL; - - linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops * - sizeof(struct nvgpu_dbg_gpu_reg_op)); - - if (!linux_fragment) - return -ENOMEM; - - while (ops_offset < args->num_ops && !err) { - const u64 num_ops = - min(args->num_ops - ops_offset, - (u64)(g->dbg_regops_tmp_buf_ops)); - const u64 fragment_size = - num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op); - - void __user *const fragment = - (void __user *)(uintptr_t) - (args->ops + - ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op)); - - nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu", - ops_offset, num_ops); - - nvgpu_log_fn(g, "Copying regops from userspace"); - - if (copy_from_user(linux_fragment, - fragment, fragment_size)) { - nvgpu_err(g, "copy_from_user failed!"); - err = -EFAULT; - break; - } - - err = nvgpu_get_regops_data_common(linux_fragment, - g->dbg_regops_tmp_buf, num_ops); - - if (err) - break; - - err = g->ops.dbg_session_ops.exec_reg_ops( - dbg_s, g->dbg_regops_tmp_buf, num_ops); - - err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, - linux_fragment, num_ops); - - if (err) - break; - - nvgpu_log_fn(g, "Copying result to userspace"); - - if (copy_to_user(fragment, linux_fragment, - fragment_size)) { - nvgpu_err(g, "copy_to_user failed!"); - err = -EFAULT; - break; - } - - ops_offset += num_ops; - } - - nvgpu_kfree(g, linux_fragment); - - /* enable powergate, if previously disabled */ - if (is_pg_disabled) { - powergate_err = - g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, - false); - } - } - - nvgpu_mutex_release(&g->dbg_sessions_lock); - - if (!err && powergate_err) - err = powergate_err; - - if (err) - nvgpu_err(g, "dbg regops failed"); - - return err; -} - -static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_powergate_args *args) -{ - int err; - struct gk20a *g = dbg_s->g; - nvgpu_log_fn(g, "%s powergate mode = %d", - g->name, args->mode); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) { - err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true); - } else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) { - err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); - } else { - nvgpu_err(g, "invalid powergate mode"); - err = -EINVAL; - } - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) -{ - int err; - struct gk20a *g = dbg_s->g; - struct channel_gk20a *ch_gk20a; - - nvgpu_log_fn(g, "%s smpc ctxsw mode = %d", - g->name, args->mode); - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to poweron"); - return err; - } - - /* Take the global lock, since we'll be doing global regops */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch_gk20a) { - nvgpu_err(g, - "no bound channel for smpc ctxsw mode update"); - err = -EINVAL; - goto clean_up; - } - - err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, - args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); - if (err) { - nvgpu_err(g, - "error (%d) during smpc ctxsw mode update", err); - goto clean_up; - } - - err = g->ops.regops.apply_smpc_war(dbg_s); - clean_up: - nvgpu_mutex_release(&g->dbg_sessions_lock); - gk20a_idle(g); - return err; -} - -static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) -{ - int err; - struct gk20a *g = dbg_s->g; - struct channel_gk20a *ch_gk20a; - - nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); - - /* Must have a valid reservation to enable/disable hwpm cxtsw. - * Just print an error message for now, but eventually this should - * return an error, at the point where all client sw has been - * cleaned up. - */ - if (!dbg_s->has_profiler_reservation) { - nvgpu_err(g, - "session doesn't have a valid reservation"); - } - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to poweron"); - return err; - } - - /* Take the global lock, since we'll be doing global regops */ - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch_gk20a) { - nvgpu_err(g, - "no bound channel for pm ctxsw mode update"); - err = -EINVAL; - goto clean_up; - } - if (!dbg_s->is_pg_disabled) { - nvgpu_err(g, "powergate is not disabled"); - err = -ENOSYS; - goto clean_up; - } - err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, - args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); - if (err) - nvgpu_err(g, - "error (%d) during pm ctxsw mode update", err); - /* gk20a would require a WAR to set the core PM_ENABLE bit, not - * added here with gk20a being deprecated - */ - clean_up: - nvgpu_mutex_release(&g->dbg_sessions_lock); - gk20a_idle(g); - return err; -} - -static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) -{ - struct gk20a *g = dbg_s->g; - struct channel_gk20a *ch; - int err = 0, action = args->mode; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch) - return -EINVAL; - - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to poweron"); - return err; - } - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - /* Suspend GPU context switching */ - err = gr_gk20a_disable_ctxsw(g); - if (err) { - nvgpu_err(g, "unable to stop gr ctxsw"); - /* this should probably be ctx-fatal... */ - goto clean_up; - } - - switch (action) { - case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: - gr_gk20a_suspend_context(ch); - break; - - case NVGPU_DBG_GPU_RESUME_ALL_SMS: - gr_gk20a_resume_context(ch); - break; - } - - err = gr_gk20a_enable_ctxsw(g); - if (err) - nvgpu_err(g, "unable to restart ctxsw!"); - -clean_up: - nvgpu_mutex_release(&g->dbg_sessions_lock); - gk20a_idle(g); - - return err; -} - -static int nvgpu_ioctl_allocate_profiler_object( - struct dbg_session_gk20a_linux *dbg_session_linux, - struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) -{ - int err = 0; - struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; - struct gk20a *g = get_gk20a(dbg_session_linux->dev); - struct dbg_profiler_object_data *prof_obj; - - nvgpu_log_fn(g, "%s", g->name); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - err = alloc_profiler(g, &prof_obj); - if (err) - goto clean_up; - - prof_obj->session_id = dbg_s->id; - - if (dbg_s->is_profiler) - prof_obj->ch = NULL; - else { - prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (prof_obj->ch == NULL) { - nvgpu_err(g, - "bind a channel for dbg session"); - nvgpu_kfree(g, prof_obj); - err = -EINVAL; - goto clean_up; - } - } - - /* Return handle to client */ - args->profiler_handle = prof_obj->prof_handle; - - nvgpu_init_list_node(&prof_obj->prof_obj_entry); - - nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); -clean_up: - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static int nvgpu_ioctl_free_profiler_object( - struct dbg_session_gk20a_linux *dbg_s_linux, - struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) -{ - int err = 0; - struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; - struct gk20a *g = get_gk20a(dbg_s_linux->dev); - struct dbg_profiler_object_data *prof_obj, *tmp_obj; - bool obj_found = false; - - nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x", - g->name, dbg_s->id, args->profiler_handle); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - /* Remove profiler object from the list, if a match is found */ - nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if (prof_obj->prof_handle == args->profiler_handle) { - if (prof_obj->session_id != dbg_s->id) { - nvgpu_err(g, - "invalid handle %x", - args->profiler_handle); - err = -EINVAL; - break; - } - if (prof_obj->has_reservation) - g->ops.dbg_session_ops. - release_profiler_reservation(dbg_s, prof_obj); - nvgpu_list_del(&prof_obj->prof_obj_entry); - nvgpu_kfree(g, prof_obj); - obj_found = true; - break; - } - } - if (!obj_found) { - nvgpu_err(g, "profiler %x not found", - args->profiler_handle); - err = -EINVAL; - } - - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static struct dbg_profiler_object_data *find_matching_prof_obj( - struct dbg_session_gk20a *dbg_s, - u32 profiler_handle) -{ - struct gk20a *g = dbg_s->g; - struct dbg_profiler_object_data *prof_obj; - - nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if (prof_obj->prof_handle == profiler_handle) { - if (prof_obj->session_id != dbg_s->id) { - nvgpu_err(g, - "invalid handle %x", - profiler_handle); - return NULL; - } - return prof_obj; - } - } - return NULL; -} - -/* used in scenarios where the debugger session can take just the inter-session - * lock for performance, but the profiler session must take the per-gpu lock - * since it might not have an associated channel. */ -static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s) -{ - struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - - if (dbg_s->is_profiler || !ch) - nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock); - else - nvgpu_mutex_acquire(&ch->dbg_s_lock); -} - -static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s) -{ - struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - - if (dbg_s->is_profiler || !ch) - nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock); - else - nvgpu_mutex_release(&ch->dbg_s_lock); -} - -static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) -{ - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); - - dbg_s->dbg_events.events_enabled = true; - dbg_s->dbg_events.num_pending_events = 0; - - gk20a_dbg_session_nvgpu_mutex_release(dbg_s); -} - -static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) -{ - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); - - dbg_s->dbg_events.events_enabled = false; - dbg_s->dbg_events.num_pending_events = 0; - - gk20a_dbg_session_nvgpu_mutex_release(dbg_s); -} - -static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) -{ - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); - - if (dbg_s->dbg_events.events_enabled && - dbg_s->dbg_events.num_pending_events > 0) - dbg_s->dbg_events.num_pending_events--; - - gk20a_dbg_session_nvgpu_mutex_release(dbg_s); -} - - -static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_events_ctrl_args *args) -{ - int ret = 0; - struct channel_gk20a *ch; - struct gk20a *g = dbg_s->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch) { - nvgpu_err(g, "no channel bound to dbg session"); - return -EINVAL; - } - - switch (args->cmd) { - case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: - gk20a_dbg_gpu_events_enable(dbg_s); - break; - - case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: - gk20a_dbg_gpu_events_disable(dbg_s); - break; - - case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: - gk20a_dbg_gpu_events_clear(dbg_s); - break; - - default: - nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x", - args->cmd); - ret = -EINVAL; - break; - } - - return ret; -} - -static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_perfbuf_map_args *args) -{ - struct gk20a *g = dbg_s->g; - struct mm_gk20a *mm = &g->mm; - int err; - u32 virt_size; - u32 big_page_size = g->ops.mm.get_default_big_page_size(); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - if (g->perfbuf.owner) { - nvgpu_mutex_release(&g->dbg_sessions_lock); - return -EBUSY; - } - - mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, - big_page_size << 10, - NV_MM_DEFAULT_KERNEL_SIZE, - NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, - false, false, "perfbuf"); - if (!mm->perfbuf.vm) { - nvgpu_mutex_release(&g->dbg_sessions_lock); - return -ENOMEM; - } - - err = nvgpu_vm_map_buffer(mm->perfbuf.vm, - args->dmabuf_fd, - &args->offset, - 0, - 0, - 0, - 0, - args->mapping_size, - NULL); - if (err) - goto err_remove_vm; - - /* perf output buffer may not cross a 4GB boundary */ - virt_size = u64_lo32(args->mapping_size); - if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { - err = -EINVAL; - goto err_unmap; - } - - err = g->ops.dbg_session_ops.perfbuffer_enable(g, - args->offset, virt_size); - if (err) - goto err_unmap; - - g->perfbuf.owner = dbg_s; - g->perfbuf.offset = args->offset; - nvgpu_mutex_release(&g->dbg_sessions_lock); - - return 0; - -err_unmap: - nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); -err_remove_vm: - nvgpu_vm_put(mm->perfbuf.vm); - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) -{ - struct gk20a *g = dbg_s->g; - int err; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - if ((g->perfbuf.owner != dbg_s) || - (g->perfbuf.offset != args->offset)) { - nvgpu_mutex_release(&g->dbg_sessions_lock); - return -EINVAL; - } - - err = gk20a_perfbuf_release_locked(g, args->offset); - - nvgpu_mutex_release(&g->dbg_sessions_lock); - - return err; -} - -static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_pc_sampling_args *args) -{ - struct channel_gk20a *ch; - struct gk20a *g = dbg_s->g; - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch) - return -EINVAL; - - nvgpu_log_fn(g, " "); - - return g->ops.gr.update_pc_sampling ? - g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; -} - -static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( - struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) -{ - struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; - u32 sm_id; - struct channel_gk20a *ch; - int err = 0; - - ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); - if (!ch) - return -EINVAL; - - sm_id = args->sm_id; - if (sm_id >= gr->no_of_sm) - return -EINVAL; - - nvgpu_speculation_barrier(); - - err = gk20a_busy(g); - if (err) - return err; - - err = gr_gk20a_elpg_protected_call(g, - g->ops.gr.clear_sm_error_state(g, ch, sm_id)); - - gk20a_idle(g); - - return err; -} - -static int -nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args) -{ - struct gk20a *g = dbg_s->g; - int err = 0; - int ctx_resident_ch_fd = -1; - - err = gk20a_busy(g); - if (err) - return err; - - switch (args->action) { - case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: - err = g->ops.gr.suspend_contexts(g, dbg_s, - &ctx_resident_ch_fd); - break; - - case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS: - err = g->ops.gr.resume_contexts(g, dbg_s, - &ctx_resident_ch_fd); - break; - } - - if (ctx_resident_ch_fd < 0) { - args->is_resident_context = 0; - } else { - args->is_resident_context = 1; - args->resident_context_fd = ctx_resident_ch_fd; - } - - gk20a_idle(g); - - return err; -} - -static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_access_fb_memory_args *args) -{ - struct gk20a *g = dbg_s->g; - struct dma_buf *dmabuf; - void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; - void *buffer; - u64 size, access_size, offset; - u64 access_limit_size = SZ_4K; - int err = 0; - - if ((args->offset & 3) || (!args->size) || (args->size & 3)) - return -EINVAL; - - dmabuf = dma_buf_get(args->dmabuf_fd); - if (IS_ERR(dmabuf)) - return -EINVAL; - - if ((args->offset > dmabuf->size) || - (args->size > dmabuf->size) || - (args->offset + args->size > dmabuf->size)) { - err = -EINVAL; - goto fail_dmabuf_put; - } - - buffer = nvgpu_big_zalloc(g, access_limit_size); - if (!buffer) { - err = -ENOMEM; - goto fail_dmabuf_put; - } - - size = args->size; - offset = 0; - - err = gk20a_busy(g); - if (err) - goto fail_free_buffer; - - while (size) { - /* Max access size of access_limit_size in one loop */ - access_size = min(access_limit_size, size); - - if (args->cmd == - NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) { - err = copy_from_user(buffer, user_buffer + offset, - access_size); - if (err) - goto fail_idle; - } - - err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer, - args->offset + offset, access_size, - args->cmd); - if (err) - goto fail_idle; - - if (args->cmd == - NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) { - err = copy_to_user(user_buffer + offset, - buffer, access_size); - if (err) - goto fail_idle; - } - - size -= access_size; - offset += access_size; - } - -fail_idle: - gk20a_idle(g); -fail_free_buffer: - nvgpu_big_free(g, buffer); -fail_dmabuf_put: - dma_buf_put(dmabuf); - - return err; -} - -static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_profiler_reserve_args *args) -{ - if (args->acquire) - return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); - - return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); -} - -static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_timeout_args *args) -{ - bool status; - struct gk20a *g = dbg_s->g; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - status = nvgpu_is_timeouts_enabled(g); - nvgpu_mutex_release(&g->dbg_sessions_lock); - - if (status) - args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE; - else - args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; -} - -/* In order to perform a context relative op the context has - * to be created already... which would imply that the - * context switch mechanism has already been put in place. - * So by the time we perform such an opertation it should always - * be possible to query for the appropriate context offsets, etc. - * - * But note: while the dbg_gpu bind requires the a channel fd, - * it doesn't require an allocated gr/compute obj at that point... - */ -static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, - struct gr_gk20a *gr) -{ - int err; - - nvgpu_mutex_acquire(&gr->ctx_mutex); - err = !gr->ctx_vars.golden_image_initialized; - nvgpu_mutex_release(&gr->ctx_mutex); - if (err) - return false; - return true; - -} - -static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) -{ - struct mm_gk20a *mm = &g->mm; - struct vm_gk20a *vm = mm->perfbuf.vm; - int err; - - err = g->ops.dbg_session_ops.perfbuffer_disable(g); - - nvgpu_vm_unmap(vm, offset, NULL); - nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); - nvgpu_vm_put(vm); - - g->perfbuf.owner = NULL; - g->perfbuf.offset = 0; - return err; -} - -static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, - u32 profiler_handle) -{ - struct gk20a *g = dbg_s->g; - struct dbg_profiler_object_data *prof_obj; - int err = 0; - - nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - /* Find matching object. */ - prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); - - if (!prof_obj) { - nvgpu_err(g, "object not found"); - err = -EINVAL; - goto exit; - } - - if (prof_obj->has_reservation) - g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj); - else { - nvgpu_err(g, "No reservation found"); - err = -EINVAL; - goto exit; - } -exit: - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, - u32 profiler_handle) -{ - struct gk20a *g = dbg_s->g; - struct dbg_profiler_object_data *prof_obj, *my_prof_obj; - int err = 0; - - nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); - - if (g->profiler_reservation_count < 0) { - nvgpu_err(g, "Negative reservation count!"); - return -EINVAL; - } - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - /* Find matching object. */ - my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); - - if (!my_prof_obj) { - nvgpu_err(g, "object not found"); - err = -EINVAL; - goto exit; - } - - /* If we already have the reservation, we're done */ - if (my_prof_obj->has_reservation) { - err = 0; - goto exit; - } - - if (my_prof_obj->ch == NULL) { - /* Global reservations are only allowed if there are no other - * global or per-context reservations currently held - */ - if (!g->ops.dbg_session_ops.check_and_set_global_reservation( - dbg_s, my_prof_obj)) { - nvgpu_err(g, - "global reserve: have existing reservation"); - err = -EBUSY; - } - } else if (g->global_profiler_reservation_held) { - /* If there's a global reservation, - * we can't take a per-context one. - */ - nvgpu_err(g, - "per-ctxt reserve: global reservation in effect"); - err = -EBUSY; - } else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) { - /* TSG: check that another channel in the TSG - * doesn't already have the reservation - */ - int my_tsgid = my_prof_obj->ch->tsgid; - - nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if (prof_obj->has_reservation && - (prof_obj->ch->tsgid == my_tsgid)) { - nvgpu_err(g, - "per-ctxt reserve (tsg): already reserved"); - err = -EBUSY; - goto exit; - } - } - - if (!g->ops.dbg_session_ops.check_and_set_context_reservation( - dbg_s, my_prof_obj)) { - /* Another guest OS has the global reservation */ - nvgpu_err(g, - "per-ctxt reserve: global reservation in effect"); - err = -EBUSY; - } - } else { - /* channel: check that some other profiler object doesn't - * already have the reservation. - */ - struct channel_gk20a *my_ch = my_prof_obj->ch; - - nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, - dbg_profiler_object_data, prof_obj_entry) { - if (prof_obj->has_reservation && - (prof_obj->ch == my_ch)) { - nvgpu_err(g, - "per-ctxt reserve (ch): already reserved"); - err = -EBUSY; - goto exit; - } - } - - if (!g->ops.dbg_session_ops.check_and_set_context_reservation( - dbg_s, my_prof_obj)) { - /* Another guest OS has the global reservation */ - nvgpu_err(g, - "per-ctxt reserve: global reservation in effect"); - err = -EBUSY; - } - } -exit: - nvgpu_mutex_release(&g->dbg_sessions_lock); - return err; -} - -static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, - struct nvgpu_dbg_gpu_unbind_channel_args *args) -{ - struct dbg_session_channel_data *ch_data; - struct gk20a *g = dbg_s->g; - bool channel_found = false; - struct channel_gk20a *ch; - int err; - - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", - g->name, args->channel_fd); - - ch = gk20a_get_channel_from_file(args->channel_fd); - if (!ch) { - nvgpu_log_fn(g, "no channel found for fd"); - return -EINVAL; - } - - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, - dbg_session_channel_data, ch_entry) { - if (ch->chid == ch_data->chid) { - channel_found = true; - break; - } - } - nvgpu_mutex_release(&dbg_s->ch_list_lock); - - if (!channel_found) { - nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd); - err = -EINVAL; - goto out; - } - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - nvgpu_mutex_acquire(&dbg_s->ch_list_lock); - err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data); - nvgpu_mutex_release(&dbg_s->ch_list_lock); - nvgpu_mutex_release(&g->dbg_sessions_lock); - -out: - gk20a_channel_put(ch); - return err; -} - -int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l = container_of(inode->i_cdev, - struct nvgpu_os_linux, dbg.cdev); - struct gk20a *g = &l->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); -} - -long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data; - struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; - struct gk20a *g = dbg_s->g; - u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - - if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - if (!g->sw_ready) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_idle(g); - } - - /* protect from threaded user space calls */ - nvgpu_mutex_acquire(&dbg_s->ioctl_lock); - - switch (cmd) { - case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: - err = dbg_bind_channel_gk20a(dbg_s, - (struct nvgpu_dbg_gpu_bind_channel_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_REG_OPS: - err = nvgpu_ioctl_channel_reg_ops(dbg_s, - (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_POWERGATE: - err = nvgpu_ioctl_powergate_gk20a(dbg_s, - (struct nvgpu_dbg_gpu_powergate_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: - err = gk20a_dbg_gpu_events_ctrl(dbg_s, - (struct nvgpu_dbg_gpu_events_ctrl_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: - err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, - (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: - err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, - (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: - err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, - (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: - err = gk20a_perfbuf_map(dbg_s, - (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: - err = gk20a_perfbuf_unmap(dbg_s, - (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: - err = gk20a_dbg_pc_sampling(dbg_s, - (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE: - err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s, - (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_TIMEOUT: - err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s, - (struct nvgpu_dbg_gpu_timeout_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT: - nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s, - (struct nvgpu_dbg_gpu_timeout_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: - err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, - (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE: - err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s, - (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE: - err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s, - (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL: - err = dbg_unbind_channel_gk20a(dbg_s, - (struct nvgpu_dbg_gpu_unbind_channel_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS: - err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s, - (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY: - err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s, - (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: - err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux, - (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: - err = nvgpu_ioctl_free_profiler_object(dbg_s_linux, - (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); - break; - - case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: - err = nvgpu_ioctl_profiler_reserve(dbg_s, - (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); - break; - - default: - nvgpu_err(g, - "unrecognized dbg gpu ioctl cmd: 0x%x", - cmd); - err = -ENOTTY; - break; - } - - nvgpu_mutex_release(&dbg_s->ioctl_lock); - - nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *)arg, - buf, _IOC_SIZE(cmd)); - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h deleted file mode 100644 index bd76045b..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Tegra GK20A GPU Debugger Driver - * - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef DBG_GPU_IOCTL_GK20A_H -#define DBG_GPU_IOCTL_GK20A_H -#include - -#include "gk20a/dbg_gpu_gk20a.h" - -/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number - * of regops */ -#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024 - -struct dbg_session_gk20a_linux { - struct device *dev; - struct dbg_session_gk20a dbg_s; -}; - -struct dbg_session_channel_data_linux { - /* - * We have to keep a ref to the _file_, not the channel, because - * close(channel_fd) is synchronous and would deadlock if we had an - * open debug session fd holding a channel ref at that time. Holding a - * ref to the file makes close(channel_fd) just drop a kernel ref to - * the file; the channel will close when the last file ref is dropped. - */ - struct file *ch_f; - struct dbg_session_channel_data ch_data; -}; - -/* module debug driver interface */ -int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); -int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); -long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); - -/* used by profiler driver interface */ -int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); - -#endif \ No newline at end of file diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c deleted file mode 100644 index 4ef99ded..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/tsg_gk20a.h" -#include "gv11b/fifo_gv11b.h" -#include "platform_gk20a.h" -#include "ioctl_tsg.h" -#include "ioctl_channel.h" -#include "os_linux.h" - -struct tsg_private { - struct gk20a *g; - struct tsg_gk20a *tsg; -}; - -static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) -{ - struct channel_gk20a *ch; - int err; - - ch = gk20a_get_channel_from_file(ch_fd); - if (!ch) - return -EINVAL; - - err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); - - gk20a_channel_put(ch); - return err; -} - -static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, - struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - struct channel_gk20a *ch; - struct gr_gk20a *gr = &g->gr; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); - - nvgpu_mutex_acquire(&sched->control_lock); - if (sched->control_locked) { - err = -EPERM; - goto mutex_release; - } - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on gpu"); - goto mutex_release; - } - - ch = gk20a_get_channel_from_file(arg->channel_fd); - if (!ch) { - err = -EINVAL; - goto idle; - } - - if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { - if ((arg->num_active_tpcs > gr->max_tpc_count) || - !(arg->num_active_tpcs)) { - nvgpu_err(g, "Invalid num of active TPCs"); - err = -EINVAL; - goto ch_put; - } - tsg->tpc_num_initialized = true; - tsg->num_active_tpcs = arg->num_active_tpcs; - tsg->tpc_pg_enabled = true; - } else { - tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); - } - - if (arg->subcontext_id < g->fifo.max_subctx_count) { - ch->subctx_id = arg->subcontext_id; - } else { - err = -EINVAL; - goto ch_put; - } - - nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", - ch->chid, ch->subctx_id); - - /* Use runqueue selector 1 for all ASYNC ids */ - if (ch->subctx_id > CHANNEL_INFO_VEID0) - ch->runqueue_sel = 1; - - err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); -ch_put: - gk20a_channel_put(ch); -idle: - gk20a_idle(g); -mutex_release: - nvgpu_mutex_release(&sched->control_lock); - return err; -} - -static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) -{ - struct channel_gk20a *ch; - int err = 0; - - ch = gk20a_get_channel_from_file(ch_fd); - if (!ch) - return -EINVAL; - - if (ch->tsgid != tsg->tsgid) { - err = -EINVAL; - goto out; - } - - err = gk20a_tsg_unbind_channel(ch); - - /* - * Mark the channel timedout since channel unbound from TSG - * has no context of its own so it can't serve any job - */ - ch->has_timedout = true; - -out: - gk20a_channel_put(ch); - return err; -} - -static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, - unsigned int event_id, - struct gk20a_event_id_data **event_id_data) -{ - struct gk20a_event_id_data *local_event_id_data; - bool event_found = false; - - nvgpu_mutex_acquire(&tsg->event_id_list_lock); - nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, - gk20a_event_id_data, event_id_node) { - if (local_event_id_data->event_id == event_id) { - event_found = true; - break; - } - } - nvgpu_mutex_release(&tsg->event_id_list_lock); - - if (event_found) { - *event_id_data = local_event_id_data; - return 0; - } else { - return -1; - } -} - -/* - * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific - * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs - */ -static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) -{ - switch (event_id) { - case NVGPU_EVENT_ID_BPT_INT: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; - case NVGPU_EVENT_ID_BPT_PAUSE: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; - case NVGPU_EVENT_ID_BLOCKING_SYNC: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; - case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; - case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; - case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: - return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; - } - - return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; -} - -void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, - int __event_id) -{ - struct gk20a_event_id_data *event_id_data; - u32 event_id; - int err = 0; - struct gk20a *g = tsg->g; - - event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); - if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) - return; - - err = gk20a_tsg_get_event_data_from_id(tsg, event_id, - &event_id_data); - if (err) - return; - - nvgpu_mutex_acquire(&event_id_data->lock); - - nvgpu_log_info(g, - "posting event for event_id=%d on tsg=%d\n", - event_id, tsg->tsgid); - event_id_data->event_posted = true; - - nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); - - nvgpu_mutex_release(&event_id_data->lock); -} - -static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) -{ - unsigned int mask = 0; - struct gk20a_event_id_data *event_id_data = filep->private_data; - struct gk20a *g = event_id_data->g; - u32 event_id = event_id_data->event_id; - struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); - - poll_wait(filep, &event_id_data->event_id_wq.wq, wait); - - nvgpu_mutex_acquire(&event_id_data->lock); - - if (event_id_data->event_posted) { - nvgpu_log_info(g, - "found pending event_id=%d on TSG=%d\n", - event_id, tsg->tsgid); - mask = (POLLPRI | POLLIN); - event_id_data->event_posted = false; - } - - nvgpu_mutex_release(&event_id_data->lock); - - return mask; -} - -static int gk20a_event_id_release(struct inode *inode, struct file *filp) -{ - struct gk20a_event_id_data *event_id_data = filp->private_data; - struct gk20a *g = event_id_data->g; - struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; - - nvgpu_mutex_acquire(&tsg->event_id_list_lock); - nvgpu_list_del(&event_id_data->event_id_node); - nvgpu_mutex_release(&tsg->event_id_list_lock); - - nvgpu_mutex_destroy(&event_id_data->lock); - gk20a_put(g); - nvgpu_kfree(g, event_id_data); - filp->private_data = NULL; - - return 0; -} - -const struct file_operations gk20a_event_id_ops = { - .owner = THIS_MODULE, - .poll = gk20a_event_id_poll, - .release = gk20a_event_id_release, -}; - -static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, - int event_id, - int *fd) -{ - int err = 0; - int local_fd; - struct file *file; - char name[64]; - struct gk20a_event_id_data *event_id_data; - struct gk20a *g; - - g = gk20a_get(tsg->g); - if (!g) - return -ENODEV; - - err = gk20a_tsg_get_event_data_from_id(tsg, - event_id, &event_id_data); - if (err == 0) { - /* We already have event enabled */ - err = -EINVAL; - goto free_ref; - } - - err = get_unused_fd_flags(O_RDWR); - if (err < 0) - goto free_ref; - local_fd = err; - - snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", - event_id, local_fd); - - file = anon_inode_getfile(name, &gk20a_event_id_ops, - NULL, O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto clean_up; - } - - event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); - if (!event_id_data) { - err = -ENOMEM; - goto clean_up_file; - } - event_id_data->g = g; - event_id_data->id = tsg->tsgid; - event_id_data->event_id = event_id; - - nvgpu_cond_init(&event_id_data->event_id_wq); - err = nvgpu_mutex_init(&event_id_data->lock); - if (err) - goto clean_up_free; - - nvgpu_init_list_node(&event_id_data->event_id_node); - - nvgpu_mutex_acquire(&tsg->event_id_list_lock); - nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); - nvgpu_mutex_release(&tsg->event_id_list_lock); - - fd_install(local_fd, file); - file->private_data = event_id_data; - - *fd = local_fd; - - return 0; - -clean_up_free: - nvgpu_kfree(g, event_id_data); -clean_up_file: - fput(file); -clean_up: - put_unused_fd(local_fd); -free_ref: - gk20a_put(g); - return err; -} - -static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, - struct nvgpu_event_id_ctrl_args *args) -{ - int err = 0; - int fd = -1; - - if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) - return -EINVAL; - - switch (args->cmd) { - case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: - err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); - if (!err) - args->event_fd = fd; - break; - - default: - nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", - args->cmd); - err = -EINVAL; - break; - } - - return err; -} - -int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) -{ - struct tsg_private *priv; - struct tsg_gk20a *tsg; - struct device *dev; - int err; - - g = gk20a_get(g); - if (!g) - return -ENODEV; - - dev = dev_from_gk20a(g); - - nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); - - priv = nvgpu_kmalloc(g, sizeof(*priv)); - if (!priv) { - err = -ENOMEM; - goto free_ref; - } - - tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); - if (!tsg) { - nvgpu_kfree(g, priv); - err = -ENOMEM; - goto free_ref; - } - - priv->g = g; - priv->tsg = tsg; - filp->private_data = priv; - - gk20a_sched_ctrl_tsg_added(g, tsg); - - return 0; - -free_ref: - gk20a_put(g); - return err; -} - -int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l; - struct gk20a *g; - int ret; - - l = container_of(inode->i_cdev, - struct nvgpu_os_linux, tsg.cdev); - g = &l->g; - - nvgpu_log_fn(g, " "); - - ret = gk20a_busy(g); - if (ret) { - nvgpu_err(g, "failed to power on, %d", ret); - return ret; - } - - ret = nvgpu_ioctl_tsg_open(&l->g, filp); - - gk20a_idle(g); - nvgpu_log_fn(g, "done"); - return ret; -} - -void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) -{ - struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); - struct gk20a *g = tsg->g; - - gk20a_sched_ctrl_tsg_removed(g, tsg); - - gk20a_tsg_release(ref); - gk20a_put(g); -} - -int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) -{ - struct tsg_private *priv = filp->private_data; - struct tsg_gk20a *tsg = priv->tsg; - - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - nvgpu_kfree(tsg->g, priv); - return 0; -} - -static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, - struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - u32 level = arg->level; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); - - nvgpu_mutex_acquire(&sched->control_lock); - if (sched->control_locked) { - err = -EPERM; - goto done; - } - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on gpu"); - goto done; - } - - level = nvgpu_get_common_runlist_level(level); - err = gk20a_tsg_set_runlist_interleave(tsg, level); - - gk20a_idle(g); -done: - nvgpu_mutex_release(&sched->control_lock); - return err; -} - -static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, - struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); - - nvgpu_mutex_acquire(&sched->control_lock); - if (sched->control_locked) { - err = -EPERM; - goto done; - } - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, "failed to power on gpu"); - goto done; - } - err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); - gk20a_idle(g); -done: - nvgpu_mutex_release(&sched->control_lock); - return err; -} - -static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, - struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) -{ - arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); - return 0; -} - -long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct tsg_private *priv = filp->private_data; - struct tsg_gk20a *tsg = priv->tsg; - struct gk20a *g = tsg->g; - u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; - int err = 0; - - nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - if (!g->sw_ready) { - err = gk20a_busy(g); - if (err) - return err; - - gk20a_idle(g); - } - - switch (cmd) { - case NVGPU_TSG_IOCTL_BIND_CHANNEL: - { - int ch_fd = *(int *)buf; - if (ch_fd < 0) { - err = -EINVAL; - break; - } - err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); - break; - } - - case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: - { - err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, - (struct nvgpu_tsg_bind_channel_ex_args *)buf); - break; - } - - case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: - { - int ch_fd = *(int *)buf; - - if (ch_fd < 0) { - err = -EINVAL; - break; - } - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - break; - } - err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); - gk20a_idle(g); - break; - } - - case NVGPU_IOCTL_TSG_ENABLE: - { - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - return err; - } - g->ops.fifo.enable_tsg(tsg); - gk20a_idle(g); - break; - } - - case NVGPU_IOCTL_TSG_DISABLE: - { - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - return err; - } - g->ops.fifo.disable_tsg(tsg); - gk20a_idle(g); - break; - } - - case NVGPU_IOCTL_TSG_PREEMPT: - { - err = gk20a_busy(g); - if (err) { - nvgpu_err(g, - "failed to host gk20a for ioctl cmd: 0x%x", cmd); - return err; - } - /* preempt TSG */ - err = g->ops.fifo.preempt_tsg(g, tsg->tsgid); - gk20a_idle(g); - break; - } - - case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: - { - err = gk20a_tsg_event_id_ctrl(g, tsg, - (struct nvgpu_event_id_ctrl_args *)buf); - break; - } - - case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: - err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, - (struct nvgpu_runlist_interleave_args *)buf); - break; - - case NVGPU_IOCTL_TSG_SET_TIMESLICE: - { - err = gk20a_tsg_ioctl_set_timeslice(g, tsg, - (struct nvgpu_timeslice_args *)buf); - break; - } - case NVGPU_IOCTL_TSG_GET_TIMESLICE: - { - err = gk20a_tsg_ioctl_get_timeslice(g, tsg, - (struct nvgpu_timeslice_args *)buf); - break; - } - - default: - nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", - cmd); - err = -ENOTTY; - break; - } - - if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) - err = copy_to_user((void __user *)arg, - buf, _IOC_SIZE(cmd)); - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h deleted file mode 100644 index 67399fd4..00000000 --- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#ifndef NVGPU_IOCTL_TSG_H -#define NVGPU_IOCTL_TSG_H - -struct inode; -struct file; -struct gk20a; -struct nvgpu_ref; - -int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp); -int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp); -int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp); -long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg); -void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c deleted file mode 100644 index 10946a08..00000000 --- a/drivers/gpu/nvgpu/common/linux/kmem.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "gk20a/gk20a.h" - -#include "kmem_priv.h" - -/* - * Statically declared because this needs to be shared across all nvgpu driver - * instances. This makes sure that all kmem caches are _definitely_ uniquely - * named. - */ -static atomic_t kmem_cache_id; - -void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) -{ - void *p; - - if (size > PAGE_SIZE) { - if (clear) - p = nvgpu_vzalloc(g, size); - else - p = nvgpu_vmalloc(g, size); - } else { - if (clear) - p = nvgpu_kzalloc(g, size); - else - p = nvgpu_kmalloc(g, size); - } - - return p; -} - -void nvgpu_big_free(struct gk20a *g, void *p) -{ - /* - * This will have to be fixed eventually. Allocs that use - * nvgpu_big_[mz]alloc() will need to remember the size of the alloc - * when freeing. - */ - if (is_vmalloc_addr(p)) - nvgpu_vfree(g, p); - else - nvgpu_kfree(g, p); -} - -void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip) -{ - void *alloc; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - alloc = __nvgpu_track_kmalloc(g, size, ip); -#else - alloc = kmalloc(size, GFP_KERNEL); -#endif - - kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", - size, alloc, GFP_KERNEL); - - return alloc; -} - -void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip) -{ - void *alloc; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - alloc = __nvgpu_track_kzalloc(g, size, ip); -#else - alloc = kzalloc(size, GFP_KERNEL); -#endif - - kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", - size, alloc, GFP_KERNEL); - - return alloc; -} - -void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip) -{ - void *alloc; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - alloc = __nvgpu_track_kcalloc(g, n, size, ip); -#else - alloc = kcalloc(n, size, GFP_KERNEL); -#endif - - kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", - n * size, alloc, GFP_KERNEL); - - return alloc; -} - -void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip) -{ - void *alloc; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - alloc = __nvgpu_track_vmalloc(g, size, ip); -#else - alloc = vmalloc(size); -#endif - - kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc); - - return alloc; -} - -void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip) -{ - void *alloc; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - alloc = __nvgpu_track_vzalloc(g, size, ip); -#else - alloc = vzalloc(size); -#endif - - kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc); - - return alloc; -} - -void __nvgpu_kfree(struct gk20a *g, void *addr) -{ - kmem_dbg(g, "kfree: addr=0x%p", addr); -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - __nvgpu_track_kfree(g, addr); -#else - kfree(addr); -#endif -} - -void __nvgpu_vfree(struct gk20a *g, void *addr) -{ - kmem_dbg(g, "vfree: addr=0x%p", addr); -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - __nvgpu_track_vfree(g, addr); -#else - vfree(addr); -#endif -} - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - -void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) -{ - nvgpu_mutex_acquire(&tracker->lock); -} - -void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) -{ - nvgpu_mutex_release(&tracker->lock); -} - -void kmem_print_mem_alloc(struct gk20a *g, - struct nvgpu_mem_alloc *alloc, - struct seq_file *s) -{ -#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES - int i; - - __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", - alloc->addr, alloc->size); - for (i = 0; i < alloc->stack_length; i++) - __pstat(s, " %3d [<%p>] %pS\n", i, - (void *)alloc->stack[i], - (void *)alloc->stack[i]); - __pstat(s, "\n"); -#else - __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", - alloc->addr, alloc->size, alloc->ip); -#endif -} - -static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, - struct nvgpu_mem_alloc *alloc) -{ - alloc->allocs_entry.key_start = alloc->addr; - alloc->allocs_entry.key_end = alloc->addr + alloc->size; - - nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs); - return 0; -} - -static struct nvgpu_mem_alloc *nvgpu_rem_alloc( - struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) -{ - struct nvgpu_mem_alloc *alloc; - struct nvgpu_rbtree_node *node = NULL; - - nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs); - if (!node) - return NULL; - - alloc = nvgpu_mem_alloc_from_rbtree_node(node); - - nvgpu_rbtree_unlink(node, &tracker->allocs); - - return alloc; -} - -static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, - unsigned long size, unsigned long real_size, - u64 addr, unsigned long ip) -{ - int ret; - struct nvgpu_mem_alloc *alloc; -#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES - struct stack_trace stack_trace; -#endif - - alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); - if (!alloc) - return -ENOMEM; - - alloc->owner = tracker; - alloc->size = size; - alloc->real_size = real_size; - alloc->addr = addr; - alloc->ip = (void *)(uintptr_t)ip; - -#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES - stack_trace.max_entries = MAX_STACK_TRACE; - stack_trace.nr_entries = 0; - stack_trace.entries = alloc->stack; - /* - * This 4 here skips the 2 function calls that happen for all traced - * allocs due to nvgpu: - * - * __nvgpu_save_kmem_alloc+0x7c/0x128 - * __nvgpu_track_kzalloc+0xcc/0xf8 - * - * And the function calls that get made by the stack trace code itself. - * If the trace savings code changes this will likely have to change - * as well. - */ - stack_trace.skip = 4; - save_stack_trace(&stack_trace); - alloc->stack_length = stack_trace.nr_entries; -#endif - - nvgpu_lock_tracker(tracker); - tracker->bytes_alloced += size; - tracker->bytes_alloced_real += real_size; - tracker->nr_allocs++; - - /* Keep track of this for building a histogram later on. */ - if (tracker->max_alloc < size) - tracker->max_alloc = size; - if (tracker->min_alloc > size) - tracker->min_alloc = size; - - ret = nvgpu_add_alloc(tracker, alloc); - if (ret) { - WARN(1, "Duplicate alloc??? 0x%llx\n", addr); - kfree(alloc); - nvgpu_unlock_tracker(tracker); - return ret; - } - nvgpu_unlock_tracker(tracker); - - return 0; -} - -static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, - u64 addr) -{ - struct nvgpu_mem_alloc *alloc; - - nvgpu_lock_tracker(tracker); - alloc = nvgpu_rem_alloc(tracker, addr); - if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { - nvgpu_unlock_tracker(tracker); - return -EINVAL; - } - - memset((void *)alloc->addr, 0, alloc->size); - - tracker->nr_frees++; - tracker->bytes_freed += alloc->size; - tracker->bytes_freed_real += alloc->real_size; - nvgpu_unlock_tracker(tracker); - - return 0; -} - -static void __nvgpu_check_valloc_size(unsigned long size) -{ - WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); -} - -static void __nvgpu_check_kalloc_size(size_t size) -{ - WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); -} - -void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, - unsigned long ip) -{ - void *alloc = vmalloc(size); - - if (!alloc) - return NULL; - - __nvgpu_check_valloc_size(size); - - /* - * Ignore the return message. If this fails let's not cause any issues - * for the rest of the driver. - */ - __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), - (u64)(uintptr_t)alloc, ip); - - return alloc; -} - -void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, - unsigned long ip) -{ - void *alloc = vzalloc(size); - - if (!alloc) - return NULL; - - __nvgpu_check_valloc_size(size); - - /* - * Ignore the return message. If this fails let's not cause any issues - * for the rest of the driver. - */ - __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), - (u64)(uintptr_t)alloc, ip); - - return alloc; -} - -void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip) -{ - void *alloc = kmalloc(size, GFP_KERNEL); - - if (!alloc) - return NULL; - - __nvgpu_check_kalloc_size(size); - - __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), - (u64)(uintptr_t)alloc, ip); - - return alloc; -} - -void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip) -{ - void *alloc = kzalloc(size, GFP_KERNEL); - - if (!alloc) - return NULL; - - __nvgpu_check_kalloc_size(size); - - __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), - (u64)(uintptr_t)alloc, ip); - - return alloc; -} - -void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, - unsigned long ip) -{ - void *alloc = kcalloc(n, size, GFP_KERNEL); - - if (!alloc) - return NULL; - - __nvgpu_check_kalloc_size(n * size); - - __nvgpu_save_kmem_alloc(g->kmallocs, n * size, - roundup_pow_of_two(n * size), - (u64)(uintptr_t)alloc, ip); - - return alloc; -} - -void __nvgpu_track_vfree(struct gk20a *g, void *addr) -{ - /* - * Often it is accepted practice to pass NULL pointers into free - * functions to save code. - */ - if (!addr) - return; - - __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); - - vfree(addr); -} - -void __nvgpu_track_kfree(struct gk20a *g, void *addr) -{ - if (!addr) - return; - - __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); - - kfree(addr); -} - -static int __do_check_for_outstanding_allocs( - struct gk20a *g, - struct nvgpu_mem_alloc_tracker *tracker, - const char *type, bool silent) -{ - struct nvgpu_rbtree_node *node; - int count = 0; - - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - if (!silent) - kmem_print_mem_alloc(g, alloc, NULL); - - count++; - nvgpu_rbtree_enum_next(&node, node); - } - - return count; -} - -/** - * check_for_outstanding_allocs - Count and display outstanding allocs - * - * @g - The GPU. - * @silent - If set don't print anything about the allocs. - * - * Dump (or just count) the number of allocations left outstanding. - */ -static int check_for_outstanding_allocs(struct gk20a *g, bool silent) -{ - int count = 0; - - count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", - silent); - count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", - silent); - - return count; -} - -static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, - void (*force_free_func)(const void *)) -{ - struct nvgpu_rbtree_node *node; - - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - while (node) { - struct nvgpu_mem_alloc *alloc = - nvgpu_mem_alloc_from_rbtree_node(node); - - if (force_free_func) - force_free_func((void *)alloc->addr); - - nvgpu_rbtree_unlink(node, &tracker->allocs); - kfree(alloc); - - nvgpu_rbtree_enum_start(0, &node, tracker->allocs); - } -} - -/** - * nvgpu_kmem_cleanup - Cleanup the kmem tracking - * - * @g - The GPU. - * @force_free - If set will also free leaked objects if possible. - * - * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free - * is non-zero then the allocation made by nvgpu is also freed. This is risky, - * though, as it is possible that the memory is still in use by other parts of - * the GPU driver not aware that this has happened. - * - * In theory it should be fine if the GPU driver has been deinitialized and - * there are no bugs in that code. However, if there are any bugs in that code - * then they could likely manifest as odd crashes indeterminate amounts of time - * in the future. So use @force_free at your own risk. - */ -static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) -{ - do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); - do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); -} - -void nvgpu_kmem_fini(struct gk20a *g, int flags) -{ - int count; - bool silent, force_free; - - if (!flags) - return; - - silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); - force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); - - count = check_for_outstanding_allocs(g, silent); - nvgpu_kmem_cleanup(g, force_free); - - /* - * If we leak objects we can either BUG() out or just WARN(). In general - * it doesn't make sense to BUG() on here since leaking a few objects - * won't crash the kernel but it can be helpful for development. - * - * If neither flag is set then we just silently do nothing. - */ - if (count > 0) { - if (flags & NVGPU_KMEM_FINI_WARN) { - WARN(1, "Letting %d allocs leak!!\n", count); - } else if (flags & NVGPU_KMEM_FINI_BUG) { - nvgpu_err(g, "Letting %d allocs leak!!", count); - BUG(); - } - } -} - -int nvgpu_kmem_init(struct gk20a *g) -{ - int err; - - g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); - g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); - - if (!g->vmallocs || !g->kmallocs) { - err = -ENOMEM; - goto fail; - } - - g->vmallocs->name = "vmalloc"; - g->kmallocs->name = "kmalloc"; - - g->vmallocs->allocs = NULL; - g->kmallocs->allocs = NULL; - - nvgpu_mutex_init(&g->vmallocs->lock); - nvgpu_mutex_init(&g->kmallocs->lock); - - g->vmallocs->min_alloc = PAGE_SIZE; - g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; - - /* - * This needs to go after all the other initialization since they use - * the nvgpu_kzalloc() API. - */ - g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, - sizeof(struct nvgpu_mem_alloc)); - g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, - sizeof(struct nvgpu_mem_alloc)); - - if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { - err = -ENOMEM; - if (g->vmallocs->allocs_cache) - nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); - if (g->kmallocs->allocs_cache) - nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); - goto fail; - } - - return 0; - -fail: - if (g->vmallocs) - kfree(g->vmallocs); - if (g->kmallocs) - kfree(g->kmallocs); - return err; -} - -#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ - -int nvgpu_kmem_init(struct gk20a *g) -{ - return 0; -} - -void nvgpu_kmem_fini(struct gk20a *g, int flags) -{ -} -#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ - -struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) -{ - struct nvgpu_kmem_cache *cache = - nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); - - if (!cache) - return NULL; - - cache->g = g; - - snprintf(cache->name, sizeof(cache->name), - "nvgpu-cache-0x%p-%d-%d", g, (int)size, - atomic_inc_return(&kmem_cache_id)); - cache->cache = kmem_cache_create(cache->name, - size, size, 0, NULL); - if (!cache->cache) { - nvgpu_kfree(g, cache); - return NULL; - } - - return cache; -} - -void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) -{ - struct gk20a *g = cache->g; - - kmem_cache_destroy(cache->cache); - nvgpu_kfree(g, cache); -} - -void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) -{ - return kmem_cache_alloc(cache->cache, GFP_KERNEL); -} - -void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr) -{ - kmem_cache_free(cache->cache, ptr); -} diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h deleted file mode 100644 index a41762af..00000000 --- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __KMEM_PRIV_H__ -#define __KMEM_PRIV_H__ - -#include -#include - -struct seq_file; - -#define __pstat(s, fmt, msg...) \ - do { \ - if (s) \ - seq_printf(s, fmt, ##msg); \ - else \ - pr_info(fmt, ##msg); \ - } while (0) - -#define MAX_STACK_TRACE 20 - -/* - * Linux specific version of the nvgpu_kmem_cache struct. This type is - * completely opaque to the rest of the driver. - */ -struct nvgpu_kmem_cache { - struct gk20a *g; - struct kmem_cache *cache; - - /* - * Memory to hold the kmem_cache unique name. Only necessary on our - * k3.10 kernel when not using the SLUB allocator but it's easier to - * just carry this on to newer kernels. - */ - char name[128]; -}; - -#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE - -struct nvgpu_mem_alloc { - struct nvgpu_mem_alloc_tracker *owner; - - void *ip; -#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES - unsigned long stack[MAX_STACK_TRACE]; - int stack_length; -#endif - - u64 addr; - - unsigned long size; - unsigned long real_size; - - struct nvgpu_rbtree_node allocs_entry; -}; - -static inline struct nvgpu_mem_alloc * -nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) -{ - return (struct nvgpu_mem_alloc *) - ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry)); -}; - -/* - * Linux specific tracking of vmalloc, kmalloc, etc. - */ -struct nvgpu_mem_alloc_tracker { - const char *name; - struct nvgpu_kmem_cache *allocs_cache; - struct nvgpu_rbtree_node *allocs; - struct nvgpu_mutex lock; - - u64 bytes_alloced; - u64 bytes_freed; - u64 bytes_alloced_real; - u64 bytes_freed_real; - u64 nr_allocs; - u64 nr_frees; - - unsigned long min_alloc; - unsigned long max_alloc; -}; - -void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); -void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); - -void kmem_print_mem_alloc(struct gk20a *g, - struct nvgpu_mem_alloc *alloc, - struct seq_file *s); -#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ - -#endif /* __KMEM_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/log.c b/drivers/gpu/nvgpu/common/linux/log.c deleted file mode 100644 index ca29e0f3..00000000 --- a/drivers/gpu/nvgpu/common/linux/log.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "os_linux.h" - -/* - * Define a length for log buffers. This is the buffer that the 'fmt, ...' part - * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it - * needs to not be overly sized since we have limited kernel stack space. But at - * the same time we don't want it to be restrictive either. - */ -#define LOG_BUFFER_LENGTH 160 - -/* - * Annoying quirk of Linux: this has to be a string literal since the printk() - * function and friends use the preprocessor to concatenate stuff to the start - * of this string when printing. - */ -#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n" - -static const char *log_types[] = { - "ERR", - "WRN", - "DBG", - "INFO", -}; - -int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) -{ - return !!(g->log_mask & log_mask); -} - -static inline const char *nvgpu_log_name(struct gk20a *g) -{ - return dev_name(dev_from_gk20a(g)); -} - -#ifdef CONFIG_GK20A_TRACE_PRINTK -static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name, - const char *func_name, int line, - const char *log_type, const char *log) -{ - trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log); -} -#endif - -static void __nvgpu_really_print_log(u32 trace, const char *gpu_name, - const char *func_name, int line, - enum nvgpu_log_type type, const char *log) -{ - const char *name = gpu_name ? gpu_name : ""; - const char *log_type = log_types[type]; - -#ifdef CONFIG_GK20A_TRACE_PRINTK - if (trace) - return __nvgpu_trace_printk_log(trace, name, func_name, - line, log_type, log); -#endif - switch (type) { - case NVGPU_DEBUG: - /* - * We could use pr_debug() here but we control debug enablement - * separately from the Linux kernel. Perhaps this is a bug in - * nvgpu. - */ - pr_info(LOG_FMT, name, func_name, line, log_type, log); - break; - case NVGPU_INFO: - pr_info(LOG_FMT, name, func_name, line, log_type, log); - break; - case NVGPU_WARNING: - pr_warn(LOG_FMT, name, func_name, line, log_type, log); - break; - case NVGPU_ERROR: - pr_err(LOG_FMT, name, func_name, line, log_type, log); - break; - } -} - -__attribute__((format (printf, 5, 6))) -void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line, - enum nvgpu_log_type type, const char *fmt, ...) -{ - char log[LOG_BUFFER_LENGTH]; - va_list args; - - va_start(args, fmt); - vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); - va_end(args); - - __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "", - func_name, line, type, log); -} - -__attribute__((format (printf, 5, 6))) -void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask, - const char *func_name, int line, - const char *fmt, ...) -{ - char log[LOG_BUFFER_LENGTH]; - va_list args; - - if ((log_mask & g->log_mask) == 0) - return; - - va_start(args, fmt); - vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); - va_end(args); - - __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g), - func_name, line, NVGPU_DEBUG, log); -} diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c deleted file mode 100644 index af71cc81..00000000 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ /dev/null @@ -1,1365 +0,0 @@ -/* - * GK20A Graphics - * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "platform_gk20a.h" -#include "sysfs.h" -#include "vgpu/vgpu_linux.h" -#include "scale.h" -#include "pci.h" -#include "module.h" -#include "module_usermode.h" -#include "intr.h" -#include "ioctl.h" - -#include "os_linux.h" -#include "ctxsw_trace.h" -#include "driver_common.h" -#include "channel.h" - -#ifdef CONFIG_NVGPU_SUPPORT_CDE -#include "cde.h" -#endif - -#define CLASS_NAME "nvidia-gpu" -/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ - -#define GK20A_WAIT_FOR_IDLE_MS 2000 - -#define CREATE_TRACE_POINTS -#include - - -struct device_node *nvgpu_get_node(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (dev_is_pci(dev)) { - struct pci_bus *bus = to_pci_dev(dev)->bus; - - while (!pci_is_root_bus(bus)) - bus = bus->parent; - - return bus->bridge->parent->of_node; - } - - return dev->of_node; -} - -void gk20a_busy_noresume(struct gk20a *g) -{ - pm_runtime_get_noresume(dev_from_gk20a(g)); -} - -int gk20a_busy(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int ret = 0; - struct device *dev; - - if (!g) - return -ENODEV; - - atomic_inc(&g->usage_count.atomic_var); - - down_read(&l->busy_lock); - - if (!gk20a_can_busy(g)) { - ret = -ENODEV; - atomic_dec(&g->usage_count.atomic_var); - goto fail; - } - - dev = dev_from_gk20a(g); - - if (pm_runtime_enabled(dev)) { - /* Increment usage count and attempt to resume device */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - /* Mark suspended so runtime pm will retry later */ - pm_runtime_set_suspended(dev); - pm_runtime_put_noidle(dev); - atomic_dec(&g->usage_count.atomic_var); - goto fail; - } - } else { - nvgpu_mutex_acquire(&g->poweron_lock); - if (!g->power_on) { - ret = gk20a_gpu_is_virtual(dev) ? - vgpu_pm_finalize_poweron(dev) - : gk20a_pm_finalize_poweron(dev); - if (ret) { - atomic_dec(&g->usage_count.atomic_var); - nvgpu_mutex_release(&g->poweron_lock); - goto fail; - } - } - nvgpu_mutex_release(&g->poweron_lock); - } - -fail: - up_read(&l->busy_lock); - - return ret < 0 ? ret : 0; -} - -void gk20a_idle_nosuspend(struct gk20a *g) -{ - pm_runtime_put_noidle(dev_from_gk20a(g)); -} - -void gk20a_idle(struct gk20a *g) -{ - struct device *dev; - - atomic_dec(&g->usage_count.atomic_var); - - dev = dev_from_gk20a(g); - - if (!(dev && gk20a_can_busy(g))) - return; - - if (pm_runtime_enabled(dev)) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_sync_autosuspend(dev); - } -} - -/* - * Undoes gk20a_lockout_registers(). - */ -static int gk20a_restore_registers(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->regs = l->regs_saved; - l->bar1 = l->bar1_saved; - - nvgpu_restore_usermode_registers(g); - - return 0; -} - -static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l) -{ - int err = 0; - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - err = nvgpu_cde_init_ops(l); -#endif - - return err; -} - -int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l) -{ - struct gk20a *g = &l->g; - int err; - - if (l->init_done) - return 0; - - err = nvgpu_init_channel_support_linux(l); - if (err) { - nvgpu_err(g, "failed to init linux channel support"); - return err; - } - - l->init_done = true; - - return 0; -} - -int gk20a_pm_finalize_poweron(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_platform *platform = gk20a_get_platform(dev); - int err; - - nvgpu_log_fn(g, " "); - - if (g->power_on) - return 0; - - trace_gk20a_finalize_poweron(dev_name(dev)); - - /* Increment platform power refcount */ - if (platform->busy) { - err = platform->busy(dev); - if (err < 0) { - nvgpu_err(g, "failed to poweron platform dependency"); - return err; - } - } - - err = gk20a_restore_registers(g); - if (err) - return err; - - /* Enable interrupt workqueue */ - if (!l->nonstall_work_queue) { - l->nonstall_work_queue = alloc_workqueue("%s", - WQ_HIGHPRI, 1, "mc_nonstall"); - INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb); - } - - err = gk20a_detect_chip(g); - if (err) - return err; - - if (g->sim) { - if (g->sim->sim_init_late) - g->sim->sim_init_late(g); - } - - err = gk20a_finalize_poweron(g); - if (err) - goto done; - - err = nvgpu_finalize_poweron_linux(l); - if (err) - goto done; - - nvgpu_init_mm_ce_context(g); - - nvgpu_vidmem_thread_unpause(&g->mm); - - /* Initialise scaling: it will initialize scaling drive only once */ - if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && - nvgpu_platform_is_silicon(g)) { - gk20a_scale_init(dev); - if (platform->initscale) - platform->initscale(dev); - } - - trace_gk20a_finalize_poweron_done(dev_name(dev)); - - err = nvgpu_init_os_linux_ops(l); - if (err) - goto done; - - enable_irq(g->irq_stall); - if (g->irq_stall != g->irq_nonstall) - enable_irq(g->irq_nonstall); - g->irqs_enabled = 1; - - gk20a_scale_resume(dev_from_gk20a(g)); - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - if (platform->has_cde) - gk20a_init_cde_support(l); -#endif - - err = gk20a_sched_ctrl_init(g); - if (err) { - nvgpu_err(g, "failed to init sched control"); - return err; - } - - g->sw_ready = true; - -done: - if (err) - g->power_on = false; - - return err; -} - -/* - * Locks out the driver from accessing GPU registers. This prevents access to - * thse registers after the GPU has been clock or power gated. This should help - * find annoying bugs where register reads and writes are silently dropped - * after the GPU has been turned off. On older chips these reads and writes can - * also lock the entire CPU up. - */ -static int gk20a_lockout_registers(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->regs = NULL; - l->bar1 = NULL; - - nvgpu_lockout_usermode_registers(g); - - return 0; -} - -static int gk20a_pm_prepare_poweroff(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); -#ifdef CONFIG_NVGPU_SUPPORT_CDE - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); -#endif - int ret = 0; - struct gk20a_platform *platform = gk20a_get_platform(dev); - bool irqs_enabled; - - nvgpu_log_fn(g, " "); - - nvgpu_mutex_acquire(&g->poweroff_lock); - - if (!g->power_on) - goto done; - - /* disable IRQs and wait for completion */ - irqs_enabled = g->irqs_enabled; - if (irqs_enabled) { - disable_irq(g->irq_stall); - if (g->irq_stall != g->irq_nonstall) - disable_irq(g->irq_nonstall); - g->irqs_enabled = 0; - } - - gk20a_scale_suspend(dev); - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - gk20a_cde_suspend(l); -#endif - - ret = gk20a_prepare_poweroff(g); - if (ret) - goto error; - - /* Decrement platform power refcount */ - if (platform->idle) - platform->idle(dev); - - /* Stop CPU from accessing the GPU registers. */ - gk20a_lockout_registers(g); - - nvgpu_mutex_release(&g->poweroff_lock); - return 0; - -error: - /* re-enabled IRQs if previously enabled */ - if (irqs_enabled) { - enable_irq(g->irq_stall); - if (g->irq_stall != g->irq_nonstall) - enable_irq(g->irq_nonstall); - g->irqs_enabled = 1; - } - - gk20a_scale_resume(dev); -done: - nvgpu_mutex_release(&g->poweroff_lock); - - return ret; -} - -static struct of_device_id tegra_gk20a_of_match[] = { -#ifdef CONFIG_TEGRA_GK20A - { .compatible = "nvidia,tegra210-gm20b", - .data = &gm20b_tegra_platform }, - { .compatible = "nvidia,tegra186-gp10b", - .data = &gp10b_tegra_platform }, - { .compatible = "nvidia,gv11b", - .data = &gv11b_tegra_platform }, -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION - { .compatible = "nvidia,gv11b-vgpu", - .data = &gv11b_vgpu_tegra_platform}, -#endif -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION - { .compatible = "nvidia,tegra124-gk20a-vgpu", - .data = &vgpu_tegra_platform }, -#endif -#endif - - { }, -}; - -#ifdef CONFIG_PM -/** - * __gk20a_do_idle() - force the GPU to idle and railgate - * - * In success, this call MUST be balanced by caller with __gk20a_do_unidle() - * - * Acquires two locks : &l->busy_lock and &platform->railgate_lock - * In success, we hold these locks and return - * In failure, we release these locks and return - */ -int __gk20a_do_idle(struct gk20a *g, bool force_reset) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct nvgpu_timeout timeout; - int ref_cnt; - int target_ref_cnt = 0; - bool is_railgated; - int err = 0; - - /* - * Hold back deterministic submits and changes to deterministic - * channels - this must be outside the power busy locks. - */ - gk20a_channel_deterministic_idle(g); - - /* acquire busy lock to block other busy() calls */ - down_write(&l->busy_lock); - - /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ - nvgpu_mutex_acquire(&platform->railgate_lock); - - /* check if it is already railgated ? */ - if (platform->is_railgated(dev)) - return 0; - - /* - * release railgate_lock, prevent suspend by incrementing usage counter, - * re-acquire railgate_lock - */ - nvgpu_mutex_release(&platform->railgate_lock); - pm_runtime_get_sync(dev); - - /* - * One refcount taken in this API - * If User disables rail gating, we take one more - * extra refcount - */ - if (g->can_railgate) - target_ref_cnt = 1; - else - target_ref_cnt = 2; - nvgpu_mutex_acquire(&platform->railgate_lock); - - nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, - NVGPU_TIMER_CPU_TIMER); - - /* check and wait until GPU is idle (with a timeout) */ - do { - nvgpu_usleep_range(1000, 1100); - ref_cnt = atomic_read(&dev->power.usage_count); - } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); - - if (ref_cnt != target_ref_cnt) { - nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", - ref_cnt); - goto fail_drop_usage_count; - } - - /* check if global force_reset flag is set */ - force_reset |= platform->force_reset_in_do_idle; - - nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, - NVGPU_TIMER_CPU_TIMER); - - if (g->can_railgate && !force_reset) { - /* - * Case 1 : GPU railgate is supported - * - * if GPU is now idle, we will have only one ref count, - * drop this ref which will rail gate the GPU - */ - pm_runtime_put_sync(dev); - - /* add sufficient delay to allow GPU to rail gate */ - nvgpu_msleep(g->railgate_delay); - - /* check in loop if GPU is railgated or not */ - do { - nvgpu_usleep_range(1000, 1100); - is_railgated = platform->is_railgated(dev); - } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); - - if (is_railgated) { - return 0; - } else { - nvgpu_err(g, "failed to idle in timeout"); - goto fail_timeout; - } - } else { - /* - * Case 2 : GPU railgate is not supported or we explicitly - * do not want to depend on runtime PM - * - * if GPU is now idle, call prepare_poweroff() to save the - * state and then do explicit railgate - * - * __gk20a_do_unidle() needs to unrailgate, call - * finalize_poweron(), and then call pm_runtime_put_sync() - * to balance the GPU usage counter - */ - - /* Save the GPU state */ - err = gk20a_pm_prepare_poweroff(dev); - if (err) - goto fail_drop_usage_count; - - /* railgate GPU */ - platform->railgate(dev); - - nvgpu_udelay(10); - - g->forced_reset = true; - return 0; - } - -fail_drop_usage_count: - pm_runtime_put_noidle(dev); -fail_timeout: - nvgpu_mutex_release(&platform->railgate_lock); - up_write(&l->busy_lock); - gk20a_channel_deterministic_unidle(g); - return -EBUSY; -} - -/** - * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called - * from outside of GPU driver - * - * In success, this call MUST be balanced by caller with gk20a_do_unidle() - */ -static int gk20a_do_idle(void *_g) -{ - struct gk20a *g = (struct gk20a *)_g; - - return __gk20a_do_idle(g, true); -} - -/** - * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() - */ -int __gk20a_do_unidle(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev); - int err; - - if (g->forced_reset) { - /* - * If we did a forced-reset/railgate - * then unrailgate the GPU here first - */ - platform->unrailgate(dev); - - /* restore the GPU state */ - err = gk20a_pm_finalize_poweron(dev); - if (err) - return err; - - /* balance GPU usage counter */ - pm_runtime_put_sync(dev); - - g->forced_reset = false; - } - - /* release the lock and open up all other busy() calls */ - nvgpu_mutex_release(&platform->railgate_lock); - up_write(&l->busy_lock); - - gk20a_channel_deterministic_unidle(g); - - return 0; -} - -/** - * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() - */ -static int gk20a_do_unidle(void *_g) -{ - struct gk20a *g = (struct gk20a *)_g; - - return __gk20a_do_unidle(g); -} -#endif - -void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, - struct resource **out) -{ - struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); - - if (!r) - return NULL; - if (out) - *out = r; - return devm_ioremap_resource(&dev->dev, r); -} - -static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) -{ - struct gk20a *g = dev_id; - - return nvgpu_intr_stall(g); -} - -static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) -{ - struct gk20a *g = dev_id; - - return nvgpu_intr_nonstall(g); -} - -static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) -{ - struct gk20a *g = dev_id; - - return nvgpu_intr_thread_stall(g); -} - -void gk20a_remove_support(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct sim_nvgpu_linux *sim_linux; - - tegra_unregister_idle_unidle(gk20a_do_idle); - - nvgpu_kfree(g, g->dbg_regops_tmp_buf); - - nvgpu_remove_channel_support_linux(l); - - if (g->pmu.remove_support) - g->pmu.remove_support(&g->pmu); - - if (g->gr.remove_support) - g->gr.remove_support(&g->gr); - - if (g->mm.remove_ce_support) - g->mm.remove_ce_support(&g->mm); - - if (g->fifo.remove_support) - g->fifo.remove_support(&g->fifo); - - if (g->mm.remove_support) - g->mm.remove_support(&g->mm); - - if (g->sim) { - sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); - if (g->sim->remove_support) - g->sim->remove_support(g); - if (sim_linux->remove_support_linux) - sim_linux->remove_support_linux(g); - } - - /* free mappings to registers, etc */ - if (l->regs) { - iounmap(l->regs); - l->regs = NULL; - } - if (l->bar1) { - iounmap(l->bar1); - l->bar1 = NULL; - } - - nvgpu_remove_usermode_support(g); - - nvgpu_free_enabled_flags(g); -} - -static int gk20a_init_support(struct platform_device *dev) -{ - int err = -ENOMEM; - struct gk20a *g = get_gk20a(&dev->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g); - - l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, - &l->reg_mem); - if (IS_ERR(l->regs)) { - nvgpu_err(g, "failed to remap gk20a registers"); - err = PTR_ERR(l->regs); - goto fail; - } - - l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, - &l->bar1_mem); - if (IS_ERR(l->bar1)) { - nvgpu_err(g, "failed to remap gk20a bar1"); - err = PTR_ERR(l->bar1); - goto fail; - } - - err = nvgpu_init_sim_support_linux(g, dev); - if (err) - goto fail; - err = nvgpu_init_sim_support(g); - if (err) - goto fail_sim; - - nvgpu_init_usermode_support(g); - return 0; - -fail_sim: - nvgpu_remove_sim_support_linux(g); -fail: - if (l->regs) { - iounmap(l->regs); - l->regs = NULL; - } - if (l->bar1) { - iounmap(l->bar1); - l->bar1 = NULL; - } - - return err; -} - -static int gk20a_pm_railgate(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - int ret = 0; - struct gk20a *g = get_gk20a(dev); - - /* if platform is already railgated, then just return */ - if (platform->is_railgated && platform->is_railgated(dev)) - return ret; - -#ifdef CONFIG_DEBUG_FS - g->pstats.last_rail_gate_start = jiffies; - - if (g->pstats.railgating_cycle_count >= 1) - g->pstats.total_rail_ungate_time_ms = - g->pstats.total_rail_ungate_time_ms + - jiffies_to_msecs(g->pstats.last_rail_gate_start - - g->pstats.last_rail_ungate_complete); -#endif - - if (platform->railgate) - ret = platform->railgate(dev); - if (ret) { - nvgpu_err(g, "failed to railgate platform, err=%d", ret); - return ret; - } - -#ifdef CONFIG_DEBUG_FS - g->pstats.last_rail_gate_complete = jiffies; -#endif - ret = tegra_fuse_clock_disable(); - if (ret) - nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret); - - return ret; -} - -static int gk20a_pm_unrailgate(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - int ret = 0; - struct gk20a *g = get_gk20a(dev); - - ret = tegra_fuse_clock_enable(); - if (ret) { - nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret); - return ret; - } -#ifdef CONFIG_DEBUG_FS - g->pstats.last_rail_ungate_start = jiffies; - if (g->pstats.railgating_cycle_count >= 1) - g->pstats.total_rail_gate_time_ms = - g->pstats.total_rail_gate_time_ms + - jiffies_to_msecs(g->pstats.last_rail_ungate_start - - g->pstats.last_rail_gate_complete); - - g->pstats.railgating_cycle_count++; -#endif - - trace_gk20a_pm_unrailgate(dev_name(dev)); - - if (platform->unrailgate) { - nvgpu_mutex_acquire(&platform->railgate_lock); - ret = platform->unrailgate(dev); - nvgpu_mutex_release(&platform->railgate_lock); - } - -#ifdef CONFIG_DEBUG_FS - g->pstats.last_rail_ungate_complete = jiffies; -#endif - - return ret; -} - -/* - * Remove association of the driver with OS interrupt handler - */ -void nvgpu_free_irq(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - devm_free_irq(dev, g->irq_stall, g); - if (g->irq_stall != g->irq_nonstall) - devm_free_irq(dev, g->irq_nonstall, g); -} - -/* - * Idle the GPU in preparation of shutdown/remove. - * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW - * state to prevent further activity on the driver SW side. - * On driver removal quiesce() should be called after start_unload() - */ -int nvgpu_quiesce(struct gk20a *g) -{ - int err; - struct device *dev = dev_from_gk20a(g); - - if (g->power_on) { - err = gk20a_wait_for_idle(g); - if (err) { - nvgpu_err(g, "failed to idle GPU, err=%d", err); - return err; - } - - err = gk20a_fifo_disable_all_engine_activity(g, true); - if (err) { - nvgpu_err(g, - "failed to disable engine activity, err=%d", - err); - return err; - } - - err = gk20a_fifo_wait_engine_idle(g); - if (err) { - nvgpu_err(g, "failed to idle engines, err=%d", - err); - return err; - } - } - - if (gk20a_gpu_is_virtual(dev)) - err = vgpu_pm_prepare_poweroff(dev); - else - err = gk20a_pm_prepare_poweroff(dev); - - if (err) - nvgpu_err(g, "failed to prepare for poweroff, err=%d", - err); - - return err; -} - -static void gk20a_pm_shutdown(struct platform_device *pdev) -{ - struct gk20a_platform *platform = platform_get_drvdata(pdev); - struct gk20a *g = platform->g; - int err; - - nvgpu_info(g, "shutting down"); - - /* vgpu has nothing to clean up currently */ - if (gk20a_gpu_is_virtual(&pdev->dev)) - return; - - if (!g->power_on) - goto finish; - - gk20a_driver_start_unload(g); - - /* If GPU is already railgated, - * just prevent more requests, and return */ - if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { - __pm_runtime_disable(&pdev->dev, false); - nvgpu_info(g, "already railgated, shut down complete"); - return; - } - - /* Prevent more requests by disabling Runtime PM */ - __pm_runtime_disable(&pdev->dev, false); - - err = nvgpu_quiesce(g); - if (err) - goto finish; - - err = gk20a_pm_railgate(&pdev->dev); - if (err) - nvgpu_err(g, "failed to railgate, err=%d", err); - -finish: - nvgpu_info(g, "shut down complete"); -} - -#ifdef CONFIG_PM -static int gk20a_pm_runtime_resume(struct device *dev) -{ - int err = 0; - - err = gk20a_pm_unrailgate(dev); - if (err) - goto fail; - - if (gk20a_gpu_is_virtual(dev)) - err = vgpu_pm_finalize_poweron(dev); - else - err = gk20a_pm_finalize_poweron(dev); - if (err) - goto fail_poweron; - - return 0; - -fail_poweron: - gk20a_pm_railgate(dev); -fail: - return err; -} - -static int gk20a_pm_runtime_suspend(struct device *dev) -{ - int err = 0; - struct gk20a *g = get_gk20a(dev); - - if (gk20a_gpu_is_virtual(dev)) - err = vgpu_pm_prepare_poweroff(dev); - else - err = gk20a_pm_prepare_poweroff(dev); - if (err) { - nvgpu_err(g, "failed to power off, err=%d", err); - goto fail; - } - - err = gk20a_pm_railgate(dev); - if (err) - goto fail; - - return 0; - -fail: - gk20a_pm_finalize_poweron(dev); - pm_runtime_mark_last_busy(dev); - return err; -} - -static int gk20a_pm_suspend(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = get_gk20a(dev); - int ret = 0; - int idle_usage_count = 0; - - if (!g->power_on) { - if (!pm_runtime_enabled(dev)) - gk20a_pm_railgate(dev); - return 0; - } - - if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count) - return -EBUSY; - - ret = gk20a_pm_runtime_suspend(dev); - if (ret) - return ret; - - if (platform->suspend) - platform->suspend(dev); - - g->suspended = true; - - return 0; -} - -static int gk20a_pm_resume(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - int ret = 0; - - if (!g->suspended) { - if (!pm_runtime_enabled(dev)) - gk20a_pm_unrailgate(dev); - return 0; - } - - ret = gk20a_pm_runtime_resume(dev); - - g->suspended = false; - - return ret; -} - -static const struct dev_pm_ops gk20a_pm_ops = { - .runtime_resume = gk20a_pm_runtime_resume, - .runtime_suspend = gk20a_pm_runtime_suspend, - .resume = gk20a_pm_resume, - .suspend = gk20a_pm_suspend, -}; -#endif - -static int gk20a_pm_init(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - int err = 0; - - nvgpu_log_fn(g, " "); - - /* - * Initialise pm runtime. For railgate disable - * case, set autosuspend delay to negative which - * will suspend runtime pm - */ - if (g->railgate_delay && g->can_railgate) - pm_runtime_set_autosuspend_delay(dev, - g->railgate_delay); - else - pm_runtime_set_autosuspend_delay(dev, -1); - - pm_runtime_use_autosuspend(dev); - pm_runtime_enable(dev); - - return err; -} - -/* - * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING. - */ -void gk20a_driver_start_unload(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n"); - - down_write(&l->busy_lock); - __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); - /* GR SW ready needs to be invalidated at this time with the busy lock - * held to prevent a racing condition on the gr/mm code */ - g->gr.sw_ready = false; - g->sw_ready = false; - up_write(&l->busy_lock); - - if (g->is_virtual) - return; - - gk20a_wait_for_idle(g); - - nvgpu_wait_for_deferred_interrupts(g); - - if (l->nonstall_work_queue) { - cancel_work_sync(&l->nonstall_fn_work); - destroy_workqueue(l->nonstall_work_queue); - l->nonstall_work_queue = NULL; - } -} - -static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) -{ - gk20a_get_platform(&pdev->dev)->g = gk20a; -} - -static int nvgpu_read_fuse_overrides(struct gk20a *g) -{ - struct device_node *np = nvgpu_get_node(g); - u32 *fuses; - int count, i; - - if (!np) /* may be pcie device */ - return 0; - - count = of_property_count_elems_of_size(np, "fuse-overrides", 8); - if (count <= 0) - return count; - - fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2); - if (!fuses) - return -ENOMEM; - of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); - for (i = 0; i < count; i++) { - u32 fuse, value; - - fuse = fuses[2 * i]; - value = fuses[2 * i + 1]; - switch (fuse) { - case GM20B_FUSE_OPT_TPC_DISABLE: - g->tpc_fs_mask_user = ~value; - break; - case GP10B_FUSE_OPT_ECC_EN: - g->gr.fecs_feature_override_ecc_val = value; - break; - default: - nvgpu_err(g, "ignore unknown fuse override %08x", fuse); - break; - } - } - - nvgpu_kfree(g, fuses); - - return 0; -} - -static int gk20a_probe(struct platform_device *dev) -{ - struct nvgpu_os_linux *l = NULL; - struct gk20a *gk20a; - int err; - struct gk20a_platform *platform = NULL; - struct device_node *np; - - if (dev->dev.of_node) { - const struct of_device_id *match; - - match = of_match_device(tegra_gk20a_of_match, &dev->dev); - if (match) - platform = (struct gk20a_platform *)match->data; - } else - platform = (struct gk20a_platform *)dev->dev.platform_data; - - if (!platform) { - dev_err(&dev->dev, "no platform data\n"); - return -ENODATA; - } - - platform_set_drvdata(dev, platform); - - if (gk20a_gpu_is_virtual(&dev->dev)) - return vgpu_probe(dev); - - l = kzalloc(sizeof(*l), GFP_KERNEL); - if (!l) { - dev_err(&dev->dev, "couldn't allocate gk20a support"); - return -ENOMEM; - } - - hash_init(l->ecc_sysfs_stats_htable); - - gk20a = &l->g; - - nvgpu_log_fn(gk20a, " "); - - nvgpu_init_gk20a(gk20a); - set_gk20a(dev, gk20a); - l->dev = &dev->dev; - gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK; - - nvgpu_kmem_init(gk20a); - - err = nvgpu_init_enabled_flags(gk20a); - if (err) - goto return_err; - - np = nvgpu_get_node(gk20a); - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); - } - - if (nvgpu_platform_is_simulation(gk20a)) - __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); - - gk20a->irq_stall = platform_get_irq(dev, 0); - gk20a->irq_nonstall = platform_get_irq(dev, 1); - if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) { - err = -ENXIO; - goto return_err; - } - - err = devm_request_threaded_irq(&dev->dev, - gk20a->irq_stall, - gk20a_intr_isr_stall, - gk20a_intr_thread_stall, - 0, "gk20a_stall", gk20a); - if (err) { - dev_err(&dev->dev, - "failed to request stall intr irq @ %d\n", - gk20a->irq_stall); - goto return_err; - } - err = devm_request_irq(&dev->dev, - gk20a->irq_nonstall, - gk20a_intr_isr_nonstall, - 0, "gk20a_nonstall", gk20a); - if (err) { - dev_err(&dev->dev, - "failed to request non-stall intr irq @ %d\n", - gk20a->irq_nonstall); - goto return_err; - } - disable_irq(gk20a->irq_stall); - if (gk20a->irq_stall != gk20a->irq_nonstall) - disable_irq(gk20a->irq_nonstall); - - err = gk20a_init_support(dev); - if (err) - goto return_err; - - err = nvgpu_read_fuse_overrides(gk20a); - -#ifdef CONFIG_RESET_CONTROLLER - platform->reset_control = devm_reset_control_get(&dev->dev, NULL); - if (IS_ERR(platform->reset_control)) - platform->reset_control = NULL; -#endif - - err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); - if (err) - goto return_err; - - err = gk20a_pm_init(&dev->dev); - if (err) { - dev_err(&dev->dev, "pm init failed"); - goto return_err; - } - - gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); - - return 0; - -return_err: - nvgpu_free_enabled_flags(gk20a); - - /* - * Last since the above allocs may use data structures in here. - */ - nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); - - kfree(l); - - return err; -} - -int nvgpu_remove(struct device *dev, struct class *class) -{ - struct gk20a *g = get_gk20a(dev); -#ifdef CONFIG_NVGPU_SUPPORT_CDE - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); -#endif - struct gk20a_platform *platform = gk20a_get_platform(dev); - int err; - - nvgpu_log_fn(g, " "); - - err = nvgpu_quiesce(g); - WARN(err, "gpu failed to idle during driver removal"); - - if (nvgpu_mem_is_valid(&g->syncpt_mem)) - nvgpu_dma_free(g, &g->syncpt_mem); - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - if (platform->has_cde) - gk20a_cde_destroy(l); -#endif - -#ifdef CONFIG_GK20A_CTXSW_TRACE - gk20a_ctxsw_trace_cleanup(g); -#endif - - gk20a_sched_ctrl_cleanup(g); - - if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) - gk20a_scale_exit(dev); - - nvgpu_clk_arb_cleanup_arbiter(g); - - gk20a_user_deinit(dev, class); - - gk20a_debug_deinit(g); - - nvgpu_remove_sysfs(dev); - - if (platform->secure_buffer.destroy) - platform->secure_buffer.destroy(g, - &platform->secure_buffer); - - if (pm_runtime_enabled(dev)) - pm_runtime_disable(dev); - - if (platform->remove) - platform->remove(dev); - - nvgpu_log_fn(g, "removed"); - - return err; -} - -static int __exit gk20a_remove(struct platform_device *pdev) -{ - int err; - struct device *dev = &pdev->dev; - struct gk20a *g = get_gk20a(dev); - - if (gk20a_gpu_is_virtual(dev)) - return vgpu_remove(pdev); - - err = nvgpu_remove(dev, &nvgpu_class); - - set_gk20a(pdev, NULL); - gk20a_put(g); - - return err; -} - -static struct platform_driver gk20a_driver = { - .probe = gk20a_probe, - .remove = __exit_p(gk20a_remove), - .shutdown = gk20a_pm_shutdown, - .driver = { - .owner = THIS_MODULE, - .name = "gk20a", - .probe_type = PROBE_PREFER_ASYNCHRONOUS, -#ifdef CONFIG_OF - .of_match_table = tegra_gk20a_of_match, -#endif -#ifdef CONFIG_PM - .pm = &gk20a_pm_ops, -#endif - .suppress_bind_attrs = true, - } -}; - -struct class nvgpu_class = { - .owner = THIS_MODULE, - .name = CLASS_NAME, -}; - -static int __init gk20a_init(void) -{ - - int ret; - - ret = class_register(&nvgpu_class); - if (ret) - return ret; - - ret = nvgpu_pci_init(); - if (ret) - return ret; - - return platform_driver_register(&gk20a_driver); -} - -static void __exit gk20a_exit(void) -{ - nvgpu_pci_exit(); - platform_driver_unregister(&gk20a_driver); - class_unregister(&nvgpu_class); -} - -MODULE_LICENSE("GPL v2"); -module_init(gk20a_init); -module_exit(gk20a_exit); diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h deleted file mode 100644 index ab4bca03..00000000 --- a/drivers/gpu/nvgpu/common/linux/module.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#ifndef __NVGPU_COMMON_LINUX_MODULE_H__ -#define __NVGPU_COMMON_LINUX_MODULE_H__ - -struct gk20a; -struct device; -struct nvgpu_os_linux; - -int gk20a_pm_finalize_poweron(struct device *dev); -int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l); -void gk20a_remove_support(struct gk20a *g); -void gk20a_driver_start_unload(struct gk20a *g); -int nvgpu_quiesce(struct gk20a *g); -int nvgpu_remove(struct device *dev, struct class *class); -void nvgpu_free_irq(struct gk20a *g); -struct device_node *nvgpu_get_node(struct gk20a *g); -void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, - struct resource **out); -extern struct class nvgpu_class; - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.c b/drivers/gpu/nvgpu/common/linux/module_usermode.c deleted file mode 100644 index 61cb4e87..00000000 --- a/drivers/gpu/nvgpu/common/linux/module_usermode.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include - -#include "common/linux/os_linux.h" - -/* - * Locks out the driver from accessing GPU registers. This prevents access to - * thse registers after the GPU has been clock or power gated. This should help - * find annoying bugs where register reads and writes are silently dropped - * after the GPU has been turned off. On older chips these reads and writes can - * also lock the entire CPU up. - */ -void nvgpu_lockout_usermode_registers(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->usermode_regs = NULL; -} - -/* - * Undoes t19x_lockout_registers(). - */ -void nvgpu_restore_usermode_registers(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->usermode_regs = l->usermode_regs_saved; -} - -void nvgpu_remove_usermode_support(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (l->usermode_regs) { - l->usermode_regs = NULL; - } -} - -void nvgpu_init_usermode_support(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->usermode_regs = l->regs + usermode_cfg0_r(); - l->usermode_regs_saved = l->usermode_regs; -} diff --git a/drivers/gpu/nvgpu/common/linux/module_usermode.h b/drivers/gpu/nvgpu/common/linux/module_usermode.h deleted file mode 100644 index b17053ca..00000000 --- a/drivers/gpu/nvgpu/common/linux/module_usermode.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __NVGPU_MODULE_T19X_H__ -#define __NVGPU_MODULE_T19X_H__ - -struct gk20a; - -void nvgpu_init_usermode_support(struct gk20a *g); -void nvgpu_remove_usermode_support(struct gk20a *g); -void nvgpu_lockout_usermode_registers(struct gk20a *g); -void nvgpu_restore_usermode_registers(struct gk20a *g); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c deleted file mode 100644 index 93925803..00000000 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include "os_linux.h" - -#include "gk20a/gk20a.h" -#include "gk20a/mm_gk20a.h" -#include "platform_gk20a.h" - -static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) -{ - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = gk20a_get_platform(dev); - u64 ipa = sg_phys((struct scatterlist *)sgl); - - if (platform->phys_addr) - return platform->phys_addr(g, ipa); - - return ipa; -} - -int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) -{ - void *cpu_va; - pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? - PAGE_KERNEL : - pgprot_writecombine(PAGE_KERNEL); - - if (mem->aperture != APERTURE_SYSMEM) - return 0; - - /* - * WAR for bug 2040115: we already will always have a coherent vmap() - * for all sysmem buffers. The prot settings are left alone since - * eventually this should be deleted. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - return 0; - - /* - * A CPU mapping is implicitly made for all SYSMEM DMA allocations that - * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make - * another CPU mapping. - */ - if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) - return 0; - - if (WARN_ON(mem->cpu_va)) { - nvgpu_warn(g, "nested"); - return -EBUSY; - } - - cpu_va = vmap(mem->priv.pages, - PAGE_ALIGN(mem->size) >> PAGE_SHIFT, - 0, prot); - - if (WARN_ON(!cpu_va)) - return -ENOMEM; - - mem->cpu_va = cpu_va; - return 0; -} - -void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) -{ - if (mem->aperture != APERTURE_SYSMEM) - return; - - /* - * WAR for bug 2040115: skip this since the map will be taken care of - * during the free in the DMA API. - */ - if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - return; - - /* - * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping - * already made by the DMA API. - */ - if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) - return; - - vunmap(mem->cpu_va); - mem->cpu_va = NULL; -} - -static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 r = start, *dest_u32 = *arg; - - if (!l->regs) { - __gk20a_warn_on_no_regs(); - return; - } - - while (words--) { - *dest_u32++ = gk20a_readl(g, r); - r += sizeof(u32); - } - - *arg = dest_u32; -} - -u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) -{ - u32 data = 0; - - if (mem->aperture == APERTURE_SYSMEM) { - u32 *ptr = mem->cpu_va; - - WARN_ON(!ptr); - data = ptr[w]; -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); -#endif - } else if (mem->aperture == APERTURE_VIDMEM) { - u32 value; - u32 *p = &value; - - nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), - sizeof(u32), pramin_access_batch_rd_n, &p); - - data = value; - - } else { - WARN_ON("Accessing unallocated nvgpu_mem"); - } - - return data; -} - -u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) -{ - WARN_ON(offset & 3); - return nvgpu_mem_rd32(g, mem, offset / sizeof(u32)); -} - -void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, - u32 offset, void *dest, u32 size) -{ - WARN_ON(offset & 3); - WARN_ON(size & 3); - - if (mem->aperture == APERTURE_SYSMEM) { - u8 *src = (u8 *)mem->cpu_va + offset; - - WARN_ON(!mem->cpu_va); - memcpy(dest, src, size); -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - if (size) - nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", - src, *dest, size); -#endif - } else if (mem->aperture == APERTURE_VIDMEM) { - u32 *dest_u32 = dest; - - nvgpu_pramin_access_batched(g, mem, offset, size, - pramin_access_batch_rd_n, &dest_u32); - } else { - WARN_ON("Accessing unallocated nvgpu_mem"); - } -} - -static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 r = start, *src_u32 = *arg; - - if (!l->regs) { - __gk20a_warn_on_no_regs(); - return; - } - - while (words--) { - writel_relaxed(*src_u32++, l->regs + r); - r += sizeof(u32); - } - - *arg = src_u32; -} - -void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) -{ - if (mem->aperture == APERTURE_SYSMEM) { - u32 *ptr = mem->cpu_va; - - WARN_ON(!ptr); -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); -#endif - ptr[w] = data; - } else if (mem->aperture == APERTURE_VIDMEM) { - u32 value = data; - u32 *p = &value; - - nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), - sizeof(u32), pramin_access_batch_wr_n, &p); - if (!mem->skip_wmb) - wmb(); - } else { - WARN_ON("Accessing unallocated nvgpu_mem"); - } -} - -void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) -{ - WARN_ON(offset & 3); - nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data); -} - -void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - void *src, u32 size) -{ - WARN_ON(offset & 3); - WARN_ON(size & 3); - - if (mem->aperture == APERTURE_SYSMEM) { - u8 *dest = (u8 *)mem->cpu_va + offset; - - WARN_ON(!mem->cpu_va); -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - if (size) - nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", - dest, *src, size); -#endif - memcpy(dest, src, size); - } else if (mem->aperture == APERTURE_VIDMEM) { - u32 *src_u32 = src; - - nvgpu_pramin_access_batched(g, mem, offset, size, - pramin_access_batch_wr_n, &src_u32); - if (!mem->skip_wmb) - wmb(); - } else { - WARN_ON("Accessing unallocated nvgpu_mem"); - } -} - -static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - u32 r = start, repeat = **arg; - - if (!l->regs) { - __gk20a_warn_on_no_regs(); - return; - } - - while (words--) { - writel_relaxed(repeat, l->regs + r); - r += sizeof(u32); - } -} - -void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - u32 c, u32 size) -{ - WARN_ON(offset & 3); - WARN_ON(size & 3); - WARN_ON(c & ~0xff); - - c &= 0xff; - - if (mem->aperture == APERTURE_SYSMEM) { - u8 *dest = (u8 *)mem->cpu_va + offset; - - WARN_ON(!mem->cpu_va); -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - if (size) - nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]", - dest, c, size); -#endif - memset(dest, c, size); - } else if (mem->aperture == APERTURE_VIDMEM) { - u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); - u32 *p = &repeat_value; - - nvgpu_pramin_access_batched(g, mem, offset, size, - pramin_access_batch_set, &p); - if (!mem->skip_wmb) - wmb(); - } else { - WARN_ON("Accessing unallocated nvgpu_mem"); - } -} - -/* - * Obtain a SYSMEM address from a Linux SGL. This should eventually go away - * and/or become private to this file once all bad usages of Linux SGLs are - * cleaned up in the driver. - */ -u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) -{ - if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || - !nvgpu_iommuable(g)) - return g->ops.mm.gpu_phys_addr(g, NULL, - __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); - - if (sg_dma_address(sgl) == 0) - return g->ops.mm.gpu_phys_addr(g, NULL, - __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); - - if (sg_dma_address(sgl) == DMA_ERROR_CODE) - return 0; - - return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); -} - -/* - * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM - * allocation. - */ -static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) -{ - return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); -} - -/* - * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM - * allocation. - * - * Note: this API does not make sense to use for _VIDMEM_ buffers with greater - * than one scatterlist chunk. If there's more than one scatterlist chunk then - * the buffer will not be contiguous. As such the base address probably isn't - * very useful. This is true for SYSMEM as well, if there's no IOMMU. - * - * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's - * an IOMMU present and enabled for the GPU. - * - * %attrs can be NULL. If it is not NULL then it may be inspected to determine - * if the address needs to be modified before writing into a PTE. - */ -u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) -{ - struct nvgpu_page_alloc *alloc; - - if (mem->aperture == APERTURE_SYSMEM) - return nvgpu_mem_get_addr_sysmem(g, mem); - - /* - * Otherwise get the vidmem address. - */ - alloc = mem->vidmem_alloc; - - /* This API should not be used with > 1 chunks */ - WARN_ON(alloc->nr_chunks != 1); - - return alloc->base; -} - -/* - * This should only be used on contiguous buffers regardless of whether - * there's an IOMMU present/enabled. This applies to both SYSMEM and - * VIDMEM. - */ -u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) -{ - /* - * For a VIDMEM buf, this is identical to simply get_addr() so just fall - * back to that. - */ - if (mem->aperture == APERTURE_VIDMEM) - return nvgpu_mem_get_addr(g, mem); - - return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); -} - -/* - * Be careful how you use this! You are responsible for correctly freeing this - * memory. - */ -int nvgpu_mem_create_from_mem(struct gk20a *g, - struct nvgpu_mem *dest, struct nvgpu_mem *src, - int start_page, int nr_pages) -{ - int ret; - u64 start = start_page * PAGE_SIZE; - u64 size = nr_pages * PAGE_SIZE; - dma_addr_t new_iova; - - if (src->aperture != APERTURE_SYSMEM) - return -EINVAL; - - /* Some silly things a caller might do... */ - if (size > src->size) - return -EINVAL; - if ((start + size) > src->size) - return -EINVAL; - - dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; - dest->aperture = src->aperture; - dest->skip_wmb = src->skip_wmb; - dest->size = size; - - /* - * Re-use the CPU mapping only if the mapping was made by the DMA API. - * - * Bug 2040115: the DMA API wrapper makes the mapping that we should - * re-use. - */ - if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || - nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) - dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); - - dest->priv.pages = src->priv.pages + start_page; - dest->priv.flags = src->priv.flags; - - new_iova = sg_dma_address(src->priv.sgt->sgl) ? - sg_dma_address(src->priv.sgt->sgl) + start : 0; - - /* - * Make a new SG table that is based only on the subset of pages that - * is passed to us. This table gets freed by the dma free routines. - */ - if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) - ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, - src->priv.pages + start_page, - new_iova, size); - else - ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, - new_iova, size); - - return ret; -} - -int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, - struct page **pages, int nr_pages) -{ - struct sg_table *sgt; - struct page **our_pages = - nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); - - if (!our_pages) - return -ENOMEM; - - memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); - - if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, - nr_pages * PAGE_SIZE)) { - nvgpu_kfree(g, our_pages); - return -ENOMEM; - } - - /* - * If we are making an SGT from physical pages we can be reasonably - * certain that this should bypass the SMMU - thus we set the DMA (aka - * IOVA) address to 0. This tells the GMMU mapping code to not make a - * mapping directed to the SMMU. - */ - sg_dma_address(sgt->sgl) = 0; - - dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; - dest->aperture = APERTURE_SYSMEM; - dest->skip_wmb = 0; - dest->size = PAGE_SIZE * nr_pages; - - dest->priv.flags = 0; - dest->priv.pages = our_pages; - dest->priv.sgt = sgt; - - return 0; -} - -#ifdef CONFIG_TEGRA_GK20A_NVHOST -int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, - u64 src_phys, int nr_pages) -{ - struct page **pages = - nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); - int i, ret = 0; - - if (!pages) - return -ENOMEM; - - for (i = 0; i < nr_pages; i++) - pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); - - ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); - nvgpu_kfree(g, pages); - - return ret; -} -#endif - -static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) -{ - return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); -} - -static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) -{ - return (u64)__nvgpu_sgl_phys(g, sgl); -} - -static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) -{ - return (u64)sg_dma_address((struct scatterlist *)sgl); -} - -static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) -{ - return (u64)((struct scatterlist *)sgl)->length; -} - -static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, - struct nvgpu_sgl *sgl, - struct nvgpu_gmmu_attrs *attrs) -{ - if (sg_dma_address((struct scatterlist *)sgl) == 0) - return g->ops.mm.gpu_phys_addr(g, attrs, - __nvgpu_sgl_phys(g, sgl)); - - if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) - return 0; - - return nvgpu_mem_iommu_translate(g, - sg_dma_address((struct scatterlist *)sgl)); -} - -static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, - struct nvgpu_sgt *sgt) -{ - if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) - return false; - return true; -} - -static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) -{ - /* - * Free this SGT. All we do is free the passed SGT. The actual Linux - * SGT/SGL needs to be freed separately. - */ - nvgpu_kfree(g, sgt); -} - -static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { - .sgl_next = nvgpu_mem_linux_sgl_next, - .sgl_phys = nvgpu_mem_linux_sgl_phys, - .sgl_dma = nvgpu_mem_linux_sgl_dma, - .sgl_length = nvgpu_mem_linux_sgl_length, - .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, - .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, - .sgt_free = nvgpu_mem_linux_sgl_free, -}; - -static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( - struct gk20a *g, - struct scatterlist *linux_sgl) -{ - struct nvgpu_page_alloc *vidmem_alloc; - - vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); - if (!vidmem_alloc) - return NULL; - - return &vidmem_alloc->sgt; -} - -struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) -{ - struct nvgpu_sgt *nvgpu_sgt; - struct scatterlist *linux_sgl = sgt->sgl; - - if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) - return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); - - nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); - if (!nvgpu_sgt) - return NULL; - - nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); - - nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; - nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; - - return nvgpu_sgt; -} - -struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, - struct nvgpu_mem *mem) -{ - return nvgpu_linux_sgt_create(g, mem->priv.sgt); -} diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c deleted file mode 100644 index 6ab60248..00000000 --- a/drivers/gpu/nvgpu/common/linux/nvhost.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include - -#include "nvhost_priv.h" - -#include "gk20a/gk20a.h" -#include "os_linux.h" -#include "module.h" - -int nvgpu_get_nvhost_dev(struct gk20a *g) -{ - struct device_node *np = nvgpu_get_node(g); - struct platform_device *host1x_pdev = NULL; - const __be32 *host1x_ptr; - - host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); - if (host1x_ptr) { - struct device_node *host1x_node = - of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); - - host1x_pdev = of_find_device_by_node(host1x_node); - if (!host1x_pdev) { - nvgpu_warn(g, "host1x device not available"); - return -EPROBE_DEFER; - } - - } else { - if (g->has_syncpoints) { - nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support"); - g->has_syncpoints = false; - } - return 0; - } - - g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev)); - if (!g->nvhost_dev) - return -ENOMEM; - - g->nvhost_dev->host1x_pdev = host1x_pdev; - - return 0; -} - -void nvgpu_free_nvhost_dev(struct gk20a *g) -{ - nvgpu_kfree(g, g->nvhost_dev); -} - -int nvgpu_nvhost_module_busy_ext( - struct nvgpu_nvhost_dev *nvhost_dev) -{ - return nvhost_module_busy_ext(nvhost_dev->host1x_pdev); -} - -void nvgpu_nvhost_module_idle_ext( - struct nvgpu_nvhost_dev *nvhost_dev) -{ - nvhost_module_idle_ext(nvhost_dev->host1x_pdev); -} - -void nvgpu_nvhost_debug_dump_device( - struct nvgpu_nvhost_dev *nvhost_dev) -{ - nvhost_debug_dump_device(nvhost_dev->host1x_pdev); -} - -const char *nvgpu_nvhost_syncpt_get_name( - struct nvgpu_nvhost_dev *nvhost_dev, int id) -{ - return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id); -} - -bool nvgpu_nvhost_syncpt_is_valid_pt_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id) -{ - return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id); -} - -int nvgpu_nvhost_syncpt_is_expired_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) -{ - return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev, - id, thresh); -} - -u32 nvgpu_nvhost_syncpt_incr_max_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs) -{ - return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs); -} - -int nvgpu_nvhost_intr_register_notifier( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, - void (*callback)(void *, int), void *private_data) -{ - return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev, - id, thresh, - callback, private_data); -} - -void nvgpu_nvhost_syncpt_set_min_eq_max_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id) -{ - nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id); -} - -void nvgpu_nvhost_syncpt_put_ref_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id) -{ - nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id); -} - -u32 nvgpu_nvhost_get_syncpt_host_managed( - struct nvgpu_nvhost_dev *nvhost_dev, - u32 param, const char *syncpt_name) -{ - return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev, - param, syncpt_name); -} - -u32 nvgpu_nvhost_get_syncpt_client_managed( - struct nvgpu_nvhost_dev *nvhost_dev, - const char *syncpt_name) -{ - return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev, - syncpt_name); -} - -int nvgpu_nvhost_syncpt_wait_timeout_ext( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, - u32 thresh, u32 timeout, u32 *value, struct timespec *ts) -{ - return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev, - id, thresh, timeout, value, ts); -} - -int nvgpu_nvhost_syncpt_read_ext_check( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val) -{ - return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); -} - -u32 nvgpu_nvhost_syncpt_read_maxval( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id) -{ - return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); -} - -void nvgpu_nvhost_syncpt_set_safe_state( - struct nvgpu_nvhost_dev *nvhost_dev, u32 id) -{ - u32 val; - - /* - * Add large number of increments to current value - * so that all waiters on this syncpoint are released - * - * We don't expect any case where more than 0x10000 increments - * are pending - */ - val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id); - val += 0x10000; - - nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val); - nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val); -} - -int nvgpu_nvhost_create_symlink(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - int err = 0; - - if (g->nvhost_dev && - (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { - err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj, - &dev->kobj, - dev_name(dev)); - } - - return err; -} - -void nvgpu_nvhost_remove_symlink(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (g->nvhost_dev && - (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { - sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj, - dev_name(dev)); - } -} - -#ifdef CONFIG_SYNC -u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt) -{ - return nvhost_sync_pt_id(pt); -} - -u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt) -{ - return nvhost_sync_pt_thresh(pt); -} - -struct sync_fence *nvgpu_nvhost_sync_fdget(int fd) -{ - return nvhost_sync_fdget(fd); -} - -int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence) -{ - return nvhost_sync_num_pts(fence); -} - -struct sync_fence *nvgpu_nvhost_sync_create_fence( - struct nvgpu_nvhost_dev *nvhost_dev, - u32 id, u32 thresh, const char *name) -{ - struct nvhost_ctrl_sync_fence_info pt = { - .id = id, - .thresh = thresh, - }; - - return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name); -} -#endif /* CONFIG_SYNC */ - -#ifdef CONFIG_TEGRA_T19X_GRHOST -int nvgpu_nvhost_syncpt_unit_interface_get_aperture( - struct nvgpu_nvhost_dev *nvhost_dev, - u64 *base, size_t *size) -{ - return nvhost_syncpt_unit_interface_get_aperture( - nvhost_dev->host1x_pdev, (phys_addr_t *)base, size); -} - -u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id) -{ - return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id); -} - -int nvgpu_nvhost_syncpt_init(struct gk20a *g) -{ - int err = 0; - - if (!g->has_syncpoints) - return -ENOSYS; - - err = nvgpu_get_nvhost_dev(g); - if (err) { - nvgpu_err(g, "host1x device not available"); - g->has_syncpoints = false; - return -ENOSYS; - } - - err = nvgpu_nvhost_syncpt_unit_interface_get_aperture( - g->nvhost_dev, - &g->syncpt_unit_base, - &g->syncpt_unit_size); - if (err) { - nvgpu_err(g, "Failed to get syncpt interface"); - g->has_syncpoints = false; - return -ENOSYS; - } - - g->syncpt_size = - nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); - nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", - g->syncpt_unit_base, g->syncpt_unit_size, - g->syncpt_size); - - return 0; -} -#endif diff --git a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h b/drivers/gpu/nvgpu/common/linux/nvhost_priv.h deleted file mode 100644 index c03390a7..00000000 --- a/drivers/gpu/nvgpu/common/linux/nvhost_priv.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __NVGPU_NVHOST_PRIV_H__ -#define __NVGPU_NVHOST_PRIV_H__ - -struct nvgpu_nvhost_dev { - struct platform_device *host1x_pdev; -}; - -#endif /* __NVGPU_NVHOST_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/common/linux/nvlink.c b/drivers/gpu/nvgpu/common/linux/nvlink.c deleted file mode 100644 index c93514c0..00000000 --- a/drivers/gpu/nvgpu/common/linux/nvlink.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include "module.h" - -#ifdef CONFIG_TEGRA_NVLINK -int nvgpu_nvlink_read_dt_props(struct gk20a *g) -{ - struct device_node *np; - struct nvlink_device *ndev = g->nvlink.priv; - u32 local_dev_id; - u32 local_link_id; - u32 remote_dev_id; - u32 remote_link_id; - bool is_master; - - /* Parse DT */ - np = nvgpu_get_node(g); - if (!np) - goto fail; - - np = of_get_child_by_name(np, "nvidia,nvlink"); - if (!np) - goto fail; - - np = of_get_child_by_name(np, "endpoint"); - if (!np) - goto fail; - - /* Parse DT structure to detect endpoint topology */ - of_property_read_u32(np, "local_dev_id", &local_dev_id); - of_property_read_u32(np, "local_link_id", &local_link_id); - of_property_read_u32(np, "remote_dev_id", &remote_dev_id); - of_property_read_u32(np, "remote_link_id", &remote_link_id); - is_master = of_property_read_bool(np, "is_master"); - - /* Check that we are in dGPU mode */ - if (local_dev_id != NVLINK_ENDPT_GV100) { - nvgpu_err(g, "Local nvlink device is not dGPU"); - return -EINVAL; - } - - ndev->is_master = is_master; - ndev->device_id = local_dev_id; - ndev->link.link_id = local_link_id; - ndev->link.remote_dev_info.device_id = remote_dev_id; - ndev->link.remote_dev_info.link_id = remote_link_id; - - return 0; - -fail: - nvgpu_info(g, "nvlink endpoint not found or invaling in DT"); - return -ENODEV; -} -#endif /* CONFIG_TEGRA_NVLINK */ - -void nvgpu_mss_nvlink_init_credits(struct gk20a *g) -{ - /* MSS_NVLINK_1_BASE */ - void __iomem *soc1 = ioremap(0x01f20010, 4096); - /* MSS_NVLINK_2_BASE */ - void __iomem *soc2 = ioremap(0x01f40010, 4096); - /* MSS_NVLINK_3_BASE */ - void __iomem *soc3 = ioremap(0x01f60010, 4096); - /* MSS_NVLINK_4_BASE */ - void __iomem *soc4 = ioremap(0x01f80010, 4096); - u32 val; - - nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits"); - - val = readl_relaxed(soc1); - writel_relaxed(val, soc1); - val = readl_relaxed(soc1 + 4); - writel_relaxed(val, soc1 + 4); - - val = readl_relaxed(soc2); - writel_relaxed(val, soc2); - val = readl_relaxed(soc2 + 4); - writel_relaxed(val, soc2 + 4); - - val = readl_relaxed(soc3); - writel_relaxed(val, soc3); - val = readl_relaxed(soc3 + 4); - writel_relaxed(val, soc3 + 4); - - val = readl_relaxed(soc4); - writel_relaxed(val, soc4); - val = readl_relaxed(soc4 + 4); - writel_relaxed(val, soc4 + 4); -} diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c deleted file mode 100644 index 9be8c6c0..00000000 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#include -#include -#include - -#include "gk20a/gk20a.h" - -#include "../drivers/staging/android/sync.h" - -inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s) -{ - struct sync_fence *fence = (struct sync_fence *)s->priv; - return fence; -} - -static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out) -{ - fence_out->priv = NULL; - fence_out->g = NULL; - fence_out->ops = NULL; -} - -void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, - struct gk20a *g, const struct nvgpu_os_fence_ops *fops, - struct sync_fence *fence) -{ - fence_out->g = g; - fence_out->ops = fops; - fence_out->priv = (void *)fence; -} - -void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) -{ - struct sync_fence *fence = nvgpu_get_sync_fence(s); - - sync_fence_put(fence); - - nvgpu_os_fence_clear(s); -} - -void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd) -{ - struct sync_fence *fence = nvgpu_get_sync_fence(s); - - sync_fence_get(fence); - sync_fence_install(fence, fd); -} - -int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, - struct channel_gk20a *c, int fd) -{ - int err = -ENOSYS; - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); -#endif - - if (err) - err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); - - if (err) - nvgpu_err(c->g, "error obtaining fence from fd %d", fd); - - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c deleted file mode 100644 index 25832417..00000000 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include -#include - -#include "gk20a/channel_sync_gk20a.h" -#include "gk20a/mm_gk20a.h" - -#include "sync_sema_android.h" - -#include "../drivers/staging/android/sync.h" - -int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, - struct priv_cmd_entry *wait_cmd, - struct channel_gk20a *c, - int max_wait_cmds) -{ - int err; - int wait_cmd_size; - int num_wait_cmds; - int i; - struct nvgpu_semaphore *sema; - struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); - - wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); - - num_wait_cmds = sync_fence->num_fences; - if (num_wait_cmds == 0) - return 0; - - if (max_wait_cmds && num_wait_cmds > max_wait_cmds) - return -EINVAL; - - err = gk20a_channel_alloc_priv_cmdbuf(c, - wait_cmd_size * num_wait_cmds, - wait_cmd); - if (err) { - nvgpu_err(c->g, "not enough priv cmd buffer space"); - return err; - } - - for (i = 0; i < num_wait_cmds; i++) { - struct fence *f = sync_fence->cbs[i].sync_pt; - struct sync_pt *pt = sync_pt_from_fence(f); - - sema = gk20a_sync_pt_sema(pt); - gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd, - wait_cmd_size, i); - } - - return 0; -} - -static const struct nvgpu_os_fence_ops sema_ops = { - .program_waits = nvgpu_os_fence_sema_wait_gen_cmd, - .drop_ref = nvgpu_os_fence_android_drop_ref, - .install_fence = nvgpu_os_fence_android_install_fd, -}; - -int nvgpu_os_fence_sema_create( - struct nvgpu_os_fence *fence_out, - struct channel_gk20a *c, - struct nvgpu_semaphore *sema) -{ - struct sync_fence *fence; - - fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x", - nvgpu_semaphore_gpu_ro_va(sema)); - - if (!fence) { - nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x", - (u32)nvgpu_semaphore_gpu_ro_va(sema)); - - return -ENOMEM; - } - - nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); - - return 0; -} - -int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out, - struct channel_gk20a *c, int fd) -{ - struct sync_fence *fence = gk20a_sync_fence_fdget(fd); - - if (!fence) - return -EINVAL; - - nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); - - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c deleted file mode 100644 index d7a72fcd..00000000 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/channel_gk20a.h" -#include "gk20a/channel_sync_gk20a.h" -#include "gk20a/mm_gk20a.h" - -#include "../drivers/staging/android/sync.h" - -int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, - struct priv_cmd_entry *wait_cmd, - struct channel_gk20a *c, - int max_wait_cmds) -{ - int err; - int wait_cmd_size; - int num_wait_cmds; - int i; - u32 wait_id; - struct sync_pt *pt; - - struct sync_fence *sync_fence = (struct sync_fence *)s->priv; - - if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) - return -EINVAL; - - /* validate syncpt ids */ - for (i = 0; i < sync_fence->num_fences; i++) { - pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); - wait_id = nvgpu_nvhost_sync_pt_id(pt); - if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( - c->g->nvhost_dev, wait_id)) { - return -EINVAL; - } - } - - num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); - if (num_wait_cmds == 0) - return 0; - - wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); - err = gk20a_channel_alloc_priv_cmdbuf(c, - wait_cmd_size * num_wait_cmds, wait_cmd); - if (err) { - nvgpu_err(c->g, - "not enough priv cmd buffer space"); - return err; - } - - for (i = 0; i < sync_fence->num_fences; i++) { - struct fence *f = sync_fence->cbs[i].sync_pt; - struct sync_pt *pt = sync_pt_from_fence(f); - u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); - u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); - - err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value, - wait_cmd, wait_cmd_size, i, true); - } - - WARN_ON(i != num_wait_cmds); - - return 0; -} - -static const struct nvgpu_os_fence_ops syncpt_ops = { - .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, - .drop_ref = nvgpu_os_fence_android_drop_ref, - .install_fence = nvgpu_os_fence_android_install_fd, -}; - -int nvgpu_os_fence_syncpt_create( - struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, - struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) -{ - struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( - nvhost_dev, id, thresh, "fence"); - - if (!fence) { - nvgpu_err(c->g, "error constructing fence %s", "fence"); - return -ENOMEM; - } - - nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); - - return 0; -} - -int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, - struct channel_gk20a *c, int fd) -{ - struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); - - if (!fence) - return -ENOMEM; - - nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); - - return 0; -} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h deleted file mode 100644 index 4dcce322..00000000 --- a/drivers/gpu/nvgpu/common/linux/os_linux.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef NVGPU_OS_LINUX_H -#define NVGPU_OS_LINUX_H - -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "cde.h" -#include "sched.h" - -struct nvgpu_os_linux_ops { - struct { - void (*get_program_numbers)(struct gk20a *g, - u32 block_height_log2, - u32 shader_parameter, - int *hprog, int *vprog); - bool (*need_scatter_buffer)(struct gk20a *g); - int (*populate_scatter_buffer)(struct gk20a *g, - struct sg_table *sgt, - size_t surface_size, - void *scatter_buffer_ptr, - size_t scatter_buffer_size); - } cde; -}; - -struct nvgpu_os_linux { - struct gk20a g; - struct device *dev; - - struct { - struct cdev cdev; - struct device *node; - } channel; - - struct { - struct cdev cdev; - struct device *node; - } ctrl; - - struct { - struct cdev cdev; - struct device *node; - } as_dev; - - struct { - struct cdev cdev; - struct device *node; - } dbg; - - struct { - struct cdev cdev; - struct device *node; - } prof; - - struct { - struct cdev cdev; - struct device *node; - } tsg; - - struct { - struct cdev cdev; - struct device *node; - } ctxsw; - - struct { - struct cdev cdev; - struct device *node; - } sched; - - dev_t cdev_region; - - struct devfreq *devfreq; - - struct device_dma_parameters dma_parms; - - atomic_t hw_irq_stall_count; - atomic_t hw_irq_nonstall_count; - - struct nvgpu_cond sw_irq_stall_last_handled_wq; - atomic_t sw_irq_stall_last_handled; - - atomic_t nonstall_ops; - - struct nvgpu_cond sw_irq_nonstall_last_handled_wq; - atomic_t sw_irq_nonstall_last_handled; - - struct work_struct nonstall_fn_work; - struct workqueue_struct *nonstall_work_queue; - - struct resource *reg_mem; - void __iomem *regs; - void __iomem *regs_saved; - - struct resource *bar1_mem; - void __iomem *bar1; - void __iomem *bar1_saved; - - void __iomem *usermode_regs; - void __iomem *usermode_regs_saved; - - struct nvgpu_os_linux_ops ops; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs; - struct dentry *debugfs_alias; - - struct dentry *debugfs_ltc_enabled; - struct dentry *debugfs_timeouts_enabled; - struct dentry *debugfs_gr_idle_timeout_default; - struct dentry *debugfs_disable_bigpage; - struct dentry *debugfs_gr_default_attrib_cb_size; - - struct dentry *debugfs_timeslice_low_priority_us; - struct dentry *debugfs_timeslice_medium_priority_us; - struct dentry *debugfs_timeslice_high_priority_us; - struct dentry *debugfs_runlist_interleave; - struct dentry *debugfs_allocators; - struct dentry *debugfs_xve; - struct dentry *debugfs_kmem; - struct dentry *debugfs_hal; - - struct dentry *debugfs_force_preemption_cilp; - struct dentry *debugfs_force_preemption_gfxp; - struct dentry *debugfs_dump_ctxsw_stats; -#endif - DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); - - struct gk20a_cde_app cde_app; - - struct rw_semaphore busy_lock; - - struct gk20a_sched_ctrl sched_ctrl; - - bool init_done; -}; - -static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) -{ - return container_of(g, struct nvgpu_os_linux, g); -} - -static inline struct device *dev_from_gk20a(struct gk20a *g) -{ - return nvgpu_os_linux_from_gk20a(g)->dev; -} - -#define INTERFACE_NAME "nvhost%s-gpu" - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/os_sched.c b/drivers/gpu/nvgpu/common/linux/os_sched.c deleted file mode 100644 index 586b35eb..00000000 --- a/drivers/gpu/nvgpu/common/linux/os_sched.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include - -#include - -int nvgpu_current_tid(struct gk20a *g) -{ - return current->pid; -} - -int nvgpu_current_pid(struct gk20a *g) -{ - return current->tgid; -} diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c deleted file mode 100644 index 1011b441..00000000 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ /dev/null @@ -1,861 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "clk/clk.h" -#include "clk/clk_mclk.h" -#include "module.h" -#include "intr.h" -#include "sysfs.h" -#include "os_linux.h" -#include "platform_gk20a.h" -#include - -#include "pci.h" -#include "pci_usermode.h" - -#include "os_linux.h" -#include "driver_common.h" - -#define PCI_INTERFACE_NAME "card-%s%%s" - -static int nvgpu_pci_tegra_probe(struct device *dev) -{ - return 0; -} - -static int nvgpu_pci_tegra_remove(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - - return 0; -} - -static bool nvgpu_pci_tegra_is_railgated(struct device *pdev) -{ - return false; -} - -static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate) -{ - long ret = (long)rate; - - if (rate == UINT_MAX) - ret = BOOT_GPC2CLK_MHZ * 1000000UL; - - return ret; -} - -static struct gk20a_platform nvgpu_pci_device[] = { - { /* DEVICE=0x1c35 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = true, - .enable_elpg = true, - .enable_elcg = false, - .enable_slcg = true, - .enable_blcg = true, - .enable_mscg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x86063000, - .hardcode_sw_threshold = true, - .ina3221_dcb_index = 0, - .ina3221_i2c_address = 0x84, - .ina3221_i2c_port = 0x2, - }, - { /* DEVICE=0x1c36 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = true, - .enable_elpg = true, - .enable_elcg = false, - .enable_slcg = true, - .enable_blcg = true, - .enable_mscg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x86062d00, - .hardcode_sw_threshold = true, - .ina3221_dcb_index = 0, - .ina3221_i2c_address = 0x84, - .ina3221_i2c_port = 0x2, - }, - { /* DEVICE=0x1c37 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = true, - .enable_elpg = true, - .enable_elcg = false, - .enable_slcg = true, - .enable_blcg = true, - .enable_mscg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x86063000, - .hardcode_sw_threshold = true, - .ina3221_dcb_index = 0, - .ina3221_i2c_address = 0x84, - .ina3221_i2c_port = 0x2, - }, - { /* DEVICE=0x1c75 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = true, - .enable_elpg = true, - .enable_elcg = false, - .enable_slcg = true, - .enable_blcg = true, - .enable_mscg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x86065300, - .hardcode_sw_threshold = false, - .ina3221_dcb_index = 1, - .ina3221_i2c_address = 0x80, - .ina3221_i2c_port = 0x1, - }, - { /* DEVICE=PG503 SKU 201 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_mscg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x88001e00, - .hardcode_sw_threshold = false, - .run_preos = true, - }, - { /* DEVICE=PG503 SKU 200 ES */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_mscg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x88001e00, - .hardcode_sw_threshold = false, - .run_preos = true, - }, - { - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_mscg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x88000126, - .hardcode_sw_threshold = false, - .run_preos = true, - .has_syncpoints = true, - }, - { /* SKU250 */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = true, - .enable_slcg = true, - .enable_blcg = true, - .enable_mscg = false, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x1, - .hardcode_sw_threshold = false, - .run_preos = true, - .has_syncpoints = true, - }, - { /* SKU 0x1e3f */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_mscg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - /* - * WAR: PCIE X1 is very slow, set to very high value till nvlink is up - */ - .ch_wdt_timeout_ms = 30000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x1, - .hardcode_sw_threshold = false, - .unified_memory = false, - }, - { /* 0x1eba */ - /* ptimer src frequency in hz */ - .ptimer_src_freq = 31250000, - - .probe = nvgpu_pci_tegra_probe, - .remove = nvgpu_pci_tegra_remove, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = false, - .can_elpg_init = false, - .enable_elpg = false, - .enable_elcg = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_mscg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .disable_aspm = true, - - /* power management callbacks */ - .is_railgated = nvgpu_pci_tegra_is_railgated, - .clk_round_rate = nvgpu_pci_clk_round_rate, - - .ch_wdt_timeout_ms = 7000, - - .honors_aperture = true, - .dma_mask = DMA_BIT_MASK(40), - .vbios_min_version = 0x90040109, - .hardcode_sw_threshold = false, - .has_syncpoints = true, - }, -}; - -static struct pci_device_id nvgpu_pci_table[] = { - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 0, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 1, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 2, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 3, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 4, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 5, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 6, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 7, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 8, - }, - { - PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba), - .class = PCI_BASE_CLASS_DISPLAY << 16, - .class_mask = 0xff << 16, - .driver_data = 9, - }, - {} -}; - -static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id) -{ - struct gk20a *g = dev_id; - irqreturn_t ret_stall; - irqreturn_t ret_nonstall; - - ret_stall = nvgpu_intr_stall(g); - ret_nonstall = nvgpu_intr_nonstall(g); - -#if defined(CONFIG_PCI_MSI) - /* Send MSI EOI */ - if (g->ops.xve.rearm_msi && g->msi_enabled) - g->ops.xve.rearm_msi(g); -#endif - - return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD; -} - -static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id) -{ - struct gk20a *g = dev_id; - - return nvgpu_intr_thread_stall(g); -} - -static int nvgpu_pci_init_support(struct pci_dev *pdev) -{ - int err = 0; - struct gk20a *g = get_gk20a(&pdev->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - l->regs = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (IS_ERR(l->regs)) { - nvgpu_err(g, "failed to remap gk20a registers"); - err = PTR_ERR(l->regs); - goto fail; - } - - l->bar1 = ioremap(pci_resource_start(pdev, 1), - pci_resource_len(pdev, 1)); - if (IS_ERR(l->bar1)) { - nvgpu_err(g, "failed to remap gk20a bar1"); - err = PTR_ERR(l->bar1); - goto fail; - } - - err = nvgpu_init_sim_support_linux_pci(g); - if (err) - goto fail; - err = nvgpu_init_sim_support_pci(g); - if (err) - goto fail_sim; - - nvgpu_pci_init_usermode_support(l); - - return 0; - - fail_sim: - nvgpu_remove_sim_support_linux_pci(g); - fail: - if (l->regs) { - iounmap(l->regs); - l->regs = NULL; - } - if (l->bar1) { - iounmap(l->bar1); - l->bar1 = NULL; - } - - return err; -} - -static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) -{ - if (mode) - *mode = S_IRUGO | S_IWUGO; - return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); -} - -static struct class nvgpu_pci_class = { - .owner = THIS_MODULE, - .name = "nvidia-pci-gpu", - .devnode = nvgpu_pci_devnode, -}; - -#ifdef CONFIG_PM -static int nvgpu_pci_pm_runtime_resume(struct device *dev) -{ - return gk20a_pm_finalize_poweron(dev); -} - -static int nvgpu_pci_pm_runtime_suspend(struct device *dev) -{ - return 0; -} - -static const struct dev_pm_ops nvgpu_pci_pm_ops = { - .runtime_resume = nvgpu_pci_pm_runtime_resume, - .runtime_suspend = nvgpu_pci_pm_runtime_suspend, - .resume = nvgpu_pci_pm_runtime_resume, - .suspend = nvgpu_pci_pm_runtime_suspend, -}; -#endif - -static int nvgpu_pci_pm_init(struct device *dev) -{ -#ifdef CONFIG_PM - struct gk20a *g = get_gk20a(dev); - - if (!g->can_railgate) { - pm_runtime_disable(dev); - } else { - if (g->railgate_delay) - pm_runtime_set_autosuspend_delay(dev, - g->railgate_delay); - - /* - * Runtime PM for PCI devices is disabled by default, - * so we need to enable it first - */ - pm_runtime_use_autosuspend(dev); - pm_runtime_put_noidle(dev); - pm_runtime_allow(dev); - } -#endif - return 0; -} - -static int nvgpu_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *pent) -{ - struct gk20a_platform *platform = NULL; - struct nvgpu_os_linux *l; - struct gk20a *g; - int err; - char nodefmt[64]; - struct device_node *np; - - /* make sure driver_data is a sane index */ - if (pent->driver_data >= sizeof(nvgpu_pci_device) / - sizeof(nvgpu_pci_device[0])) { - return -EINVAL; - } - - l = kzalloc(sizeof(*l), GFP_KERNEL); - if (!l) { - dev_err(&pdev->dev, "couldn't allocate gk20a support"); - return -ENOMEM; - } - - hash_init(l->ecc_sysfs_stats_htable); - - g = &l->g; - nvgpu_init_gk20a(g); - - nvgpu_kmem_init(g); - - /* Allocate memory to hold platform data*/ - platform = (struct gk20a_platform *)nvgpu_kzalloc( g, - sizeof(struct gk20a_platform)); - if (!platform) { - dev_err(&pdev->dev, "couldn't allocate platform data"); - err = -ENOMEM; - goto err_free_l; - } - - /* copy detected device data to allocated platform space*/ - memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data], - sizeof(struct gk20a_platform)); - - pci_set_drvdata(pdev, platform); - - err = nvgpu_init_enabled_flags(g); - if (err) - goto err_free_platform; - - platform->g = g; - l->dev = &pdev->dev; - - np = nvgpu_get_node(g); - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); - } - - err = pci_enable_device(pdev); - if (err) - goto err_free_platform; - pci_set_master(pdev); - - g->pci_vendor_id = pdev->vendor; - g->pci_device_id = pdev->device; - g->pci_subsystem_vendor_id = pdev->subsystem_vendor; - g->pci_subsystem_device_id = pdev->subsystem_device; - g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub - g->pci_revision = pdev->revision; - - g->ina3221_dcb_index = platform->ina3221_dcb_index; - g->ina3221_i2c_address = platform->ina3221_i2c_address; - g->ina3221_i2c_port = platform->ina3221_i2c_port; - g->hardcode_sw_threshold = platform->hardcode_sw_threshold; - -#if defined(CONFIG_PCI_MSI) - err = pci_enable_msi(pdev); - if (err) { - nvgpu_err(g, - "MSI could not be enabled, falling back to legacy"); - g->msi_enabled = false; - } else - g->msi_enabled = true; -#endif - - g->irq_stall = pdev->irq; - g->irq_nonstall = pdev->irq; - if (g->irq_stall < 0) { - err = -ENXIO; - goto err_disable_msi; - } - - err = devm_request_threaded_irq(&pdev->dev, - g->irq_stall, - nvgpu_pci_isr, - nvgpu_pci_intr_thread, -#if defined(CONFIG_PCI_MSI) - g->msi_enabled ? 0 : -#endif - IRQF_SHARED, "nvgpu", g); - if (err) { - nvgpu_err(g, - "failed to request irq @ %d", g->irq_stall); - goto err_disable_msi; - } - disable_irq(g->irq_stall); - - err = nvgpu_pci_init_support(pdev); - if (err) - goto err_free_irq; - - if (strchr(dev_name(&pdev->dev), '%')) { - nvgpu_err(g, "illegal character in device name"); - err = -EINVAL; - goto err_free_irq; - } - - snprintf(nodefmt, sizeof(nodefmt), - PCI_INTERFACE_NAME, dev_name(&pdev->dev)); - - err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class); - if (err) - goto err_free_irq; - - err = nvgpu_pci_pm_init(&pdev->dev); - if (err) { - nvgpu_err(g, "pm init failed"); - goto err_free_irq; - } - - err = nvgpu_nvlink_probe(g); - /* - * ENODEV is a legal error which means there is no NVLINK - * any other error is fatal - */ - if (err) { - if (err != -ENODEV) { - nvgpu_err(g, "fatal error probing nvlink, bailing out"); - goto err_free_irq; - } - /* Enable Semaphore SHIM on nvlink only for now. */ - __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); - g->has_syncpoints = false; - } else { - err = nvgpu_nvhost_syncpt_init(g); - if (err) { - if (err != -ENOSYS) { - nvgpu_err(g, "syncpt init failed"); - goto err_free_irq; - } - } - } - - g->mm.has_physical_mode = false; - - return 0; - -err_free_irq: - nvgpu_free_irq(g); -err_disable_msi: -#if defined(CONFIG_PCI_MSI) - if (g->msi_enabled) - pci_disable_msi(pdev); -#endif -err_free_platform: - nvgpu_kfree(g, platform); -err_free_l: - kfree(l); - return err; -} - -static void nvgpu_pci_remove(struct pci_dev *pdev) -{ - struct gk20a *g = get_gk20a(&pdev->dev); - struct device *dev = dev_from_gk20a(g); - int err; - - /* no support yet for unbind if DGPU is in VGPU mode */ - if (gk20a_gpu_is_virtual(dev)) - return; - - nvgpu_nvlink_remove(g); - - gk20a_driver_start_unload(g); - err = nvgpu_quiesce(g); - /* TODO: handle failure to idle */ - WARN(err, "gpu failed to idle during driver removal"); - - nvgpu_free_irq(g); - - nvgpu_remove(dev, &nvgpu_pci_class); - -#if defined(CONFIG_PCI_MSI) - if (g->msi_enabled) - pci_disable_msi(pdev); - else { - /* IRQ does not need to be enabled in MSI as the line is not - * shared - */ - enable_irq(g->irq_stall); - } -#endif - - /* free allocated platform data space */ - nvgpu_kfree(g, gk20a_get_platform(&pdev->dev)); - - gk20a_get_platform(&pdev->dev)->g = NULL; - gk20a_put(g); -} - -static struct pci_driver nvgpu_pci_driver = { - .name = "nvgpu", - .id_table = nvgpu_pci_table, - .probe = nvgpu_pci_probe, - .remove = nvgpu_pci_remove, -#ifdef CONFIG_PM - .driver.pm = &nvgpu_pci_pm_ops, -#endif -}; - -int __init nvgpu_pci_init(void) -{ - int ret; - - ret = class_register(&nvgpu_pci_class); - if (ret) - return ret; - - return pci_register_driver(&nvgpu_pci_driver); -} - -void __exit nvgpu_pci_exit(void) -{ - pci_unregister_driver(&nvgpu_pci_driver); - class_unregister(&nvgpu_pci_class); -} diff --git a/drivers/gpu/nvgpu/common/linux/pci.h b/drivers/gpu/nvgpu/common/linux/pci.h deleted file mode 100644 index cc6b77b1..00000000 --- a/drivers/gpu/nvgpu/common/linux/pci.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef NVGPU_PCI_H -#define NVGPU_PCI_H - -#ifdef CONFIG_GK20A_PCI -int nvgpu_pci_init(void); -void nvgpu_pci_exit(void); -#else -static inline int nvgpu_pci_init(void) { return 0; } -static inline void nvgpu_pci_exit(void) {} -#endif - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.c b/drivers/gpu/nvgpu/common/linux/pci_usermode.c deleted file mode 100644 index f474bd10..00000000 --- a/drivers/gpu/nvgpu/common/linux/pci_usermode.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include - -#include - -#include "common/linux/os_linux.h" - -void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l) -{ - l->usermode_regs = l->regs + usermode_cfg0_r(); - l->usermode_regs_saved = l->usermode_regs; -} diff --git a/drivers/gpu/nvgpu/common/linux/pci_usermode.h b/drivers/gpu/nvgpu/common/linux/pci_usermode.h deleted file mode 100644 index 25a08d28..00000000 --- a/drivers/gpu/nvgpu/common/linux/pci_usermode.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __NVGPU_PCI_USERMODE_H__ -#define __NVGPU_PCI_USERMODE_H__ - -struct nvgpu_os_linux; - -void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c deleted file mode 100644 index 0fe1c8d2..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include - -#include "os_linux.h" - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" -#include "platform_gk20a_tegra.h" -#include "gp10b/platform_gp10b.h" -#include "platform_gp10b_tegra.h" -#include "platform_ecc_sysfs.h" - -static u32 gen_ecc_hash_key(char *str) -{ - int i = 0; - u32 hash_key = 0x811c9dc5; - - while (str[i]) { - hash_key *= 0x1000193; - hash_key ^= (u32)(str[i]); - i++; - }; - - return hash_key; -} - -static ssize_t ecc_stat_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - const char *ecc_stat_full_name = attr->attr.name; - const char *ecc_stat_base_name; - unsigned int hw_unit; - unsigned int subunit; - struct gk20a_ecc_stat *ecc_stat; - u32 hash_key; - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit, - &subunit) == 2) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]); - hw_unit = g->gr.slices_per_ltc * hw_unit + subunit; - } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]); - } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]); - } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]); - } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]); - } else { - return snprintf(buf, - PAGE_SIZE, - "Error: Invalid ECC stat name!\n"); - } - - hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); - - hash_for_each_possible(l->ecc_sysfs_stats_htable, - ecc_stat, - hash_node, - hash_key) { - if (hw_unit >= ecc_stat->count) - continue; - if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit])) - return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]); - } - - return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); -} - -int nvgpu_gr_ecc_stat_create(struct device *dev, - int is_l2, char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - char *ltc_unit_name = "ltc"; - char *gr_unit_name = "gpc0_tpc"; - char *lts_unit_name = "lts"; - int num_hw_units = 0; - int num_subunits = 0; - - if (is_l2 == 1) - num_hw_units = g->ltc_count; - else if (is_l2 == 2) { - num_hw_units = g->ltc_count; - num_subunits = g->gr.slices_per_ltc; - } else - num_hw_units = g->gr.tpc_count; - - - return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits, - is_l2 ? ltc_unit_name : gr_unit_name, - num_subunits ? lts_unit_name: NULL, - ecc_stat_name, - ecc_stat); -} - -int nvgpu_ecc_stat_create(struct device *dev, - int num_hw_units, int num_subunits, - char *ecc_unit_name, char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat) -{ - int error = 0; - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int hw_unit = 0; - int subunit = 0; - int element = 0; - u32 hash_key = 0; - struct device_attribute *dev_attr_array; - - int num_elements = num_subunits ? num_subunits * num_hw_units : - num_hw_units; - - /* Allocate arrays */ - dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) * - num_elements); - ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements); - ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements); - - for (hw_unit = 0; hw_unit < num_elements; hw_unit++) { - ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) * - ECC_STAT_NAME_MAX_SIZE); - } - ecc_stat->count = num_elements; - if (num_subunits) { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - for (subunit = 0; subunit < num_subunits; subunit++) { - element = hw_unit*num_subunits + subunit; - - snprintf(ecc_stat->names[element], - ECC_STAT_NAME_MAX_SIZE, - "%s%d_%s%d_%s", - ecc_unit_name, - hw_unit, - ecc_subunit_name, - subunit, - ecc_stat_name); - - sysfs_attr_init(&dev_attr_array[element].attr); - dev_attr_array[element].attr.name = - ecc_stat->names[element]; - dev_attr_array[element].attr.mode = - VERIFY_OCTAL_PERMISSIONS(S_IRUGO); - dev_attr_array[element].show = ecc_stat_show; - dev_attr_array[element].store = NULL; - - /* Create sysfs file */ - error |= device_create_file(dev, - &dev_attr_array[element]); - - } - } - } else { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - - /* Fill in struct device_attribute members */ - snprintf(ecc_stat->names[hw_unit], - ECC_STAT_NAME_MAX_SIZE, - "%s%d_%s", - ecc_unit_name, - hw_unit, - ecc_stat_name); - - sysfs_attr_init(&dev_attr_array[hw_unit].attr); - dev_attr_array[hw_unit].attr.name = - ecc_stat->names[hw_unit]; - dev_attr_array[hw_unit].attr.mode = - VERIFY_OCTAL_PERMISSIONS(S_IRUGO); - dev_attr_array[hw_unit].show = ecc_stat_show; - dev_attr_array[hw_unit].store = NULL; - - /* Create sysfs file */ - error |= device_create_file(dev, - &dev_attr_array[hw_unit]); - } - } - - /* Add hash table entry */ - hash_key = gen_ecc_hash_key(ecc_stat_name); - hash_add(l->ecc_sysfs_stats_htable, - &ecc_stat->hash_node, - hash_key); - - ecc_stat->attr_array = dev_attr_array; - - return error; -} - -void nvgpu_gr_ecc_stat_remove(struct device *dev, - int is_l2, struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - int num_hw_units = 0; - int num_subunits = 0; - - if (is_l2 == 1) - num_hw_units = g->ltc_count; - else if (is_l2 == 2) { - num_hw_units = g->ltc_count; - num_subunits = g->gr.slices_per_ltc; - } else - num_hw_units = g->gr.tpc_count; - - nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat); -} - -void nvgpu_ecc_stat_remove(struct device *dev, - int num_hw_units, int num_subunits, - struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - struct device_attribute *dev_attr_array = ecc_stat->attr_array; - int hw_unit = 0; - int subunit = 0; - int element = 0; - int num_elements = num_subunits ? num_subunits * num_hw_units : - num_hw_units; - - /* Remove sysfs files */ - if (num_subunits) { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - for (subunit = 0; subunit < num_subunits; subunit++) { - element = hw_unit * num_subunits + subunit; - - device_remove_file(dev, - &dev_attr_array[element]); - } - } - } else { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) - device_remove_file(dev, &dev_attr_array[hw_unit]); - } - - /* Remove hash table entry */ - hash_del(&ecc_stat->hash_node); - - /* Free arrays */ - nvgpu_kfree(g, ecc_stat->counters); - - for (hw_unit = 0; hw_unit < num_elements; hw_unit++) - nvgpu_kfree(g, ecc_stat->names[hw_unit]); - - nvgpu_kfree(g, ecc_stat->names); - nvgpu_kfree(g, dev_attr_array); -} diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h deleted file mode 100644 index d29f7bd3..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _NVGPU_PLATFORM_SYSFS_H_ -#define _NVGPU_PLATFORM_SYSFS_H_ - -#include "gp10b/gr_gp10b.h" - -#define ECC_STAT_NAME_MAX_SIZE 100 - -int nvgpu_gr_ecc_stat_create(struct device *dev, - int is_l2, char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat); -int nvgpu_ecc_stat_create(struct device *dev, - int num_hw_units, int num_subunits, - char *ecc_unit_name, char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat); -void nvgpu_gr_ecc_stat_remove(struct device *dev, - int is_l2, struct gk20a_ecc_stat *ecc_stat); -void nvgpu_ecc_stat_remove(struct device *dev, - int num_hw_units, int num_subunits, - struct gk20a_ecc_stat *ecc_stat); -#endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h deleted file mode 100644 index 9a99b7fe..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - * GK20A Platform (SoC) Interface - * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef _GK20A_PLATFORM_H_ -#define _GK20A_PLATFORM_H_ - -#include - -#include - -#include "gk20a/gk20a.h" - -#define GK20A_CLKS_MAX 4 - -struct gk20a; -struct channel_gk20a; -struct gr_ctx_buffer_desc; -struct gk20a_scale_profile; - -struct secure_page_buffer { - void (*destroy)(struct gk20a *, struct secure_page_buffer *); - size_t size; - dma_addr_t phys; - size_t used; -}; - -struct gk20a_platform { - /* Populated by the gk20a driver before probing the platform. */ - struct gk20a *g; - - /* Should be populated at probe. */ - bool can_railgate_init; - - /* Should be populated at probe. */ - bool can_elpg_init; - - /* Should be populated at probe. */ - bool has_syncpoints; - - /* channel limit after which to start aggressive sync destroy */ - unsigned int aggressive_sync_destroy_thresh; - - /* flag to set sync destroy aggressiveness */ - bool aggressive_sync_destroy; - - /* set if ASPM should be disabled on boot; only makes sense for PCI */ - bool disable_aspm; - - /* Set if the platform can unify the small/large address spaces. */ - bool unify_address_spaces; - - /* Clock configuration is stored here. Platform probe is responsible - * for filling this data. */ - struct clk *clk[GK20A_CLKS_MAX]; - int num_clks; - int maxmin_clk_id; - -#ifdef CONFIG_RESET_CONTROLLER - /* Reset control for device */ - struct reset_control *reset_control; -#endif - - /* Delay before rail gated */ - int railgate_delay_init; - - /* init value for slowdown factor */ - u8 ldiv_slowdown_factor_init; - - /* Second Level Clock Gating: true = enable false = disable */ - bool enable_slcg; - - /* Block Level Clock Gating: true = enable flase = disable */ - bool enable_blcg; - - /* Engine Level Clock Gating: true = enable flase = disable */ - bool enable_elcg; - - /* Should be populated at probe. */ - bool can_slcg; - - /* Should be populated at probe. */ - bool can_blcg; - - /* Should be populated at probe. */ - bool can_elcg; - - /* Engine Level Power Gating: true = enable flase = disable */ - bool enable_elpg; - - /* Adaptative ELPG: true = enable flase = disable */ - bool enable_aelpg; - - /* PMU Perfmon: true = enable false = disable */ - bool enable_perfmon; - - /* Memory System Clock Gating: true = enable flase = disable*/ - bool enable_mscg; - - /* Timeout for per-channel watchdog (in mS) */ - u32 ch_wdt_timeout_ms; - - /* Disable big page support */ - bool disable_bigpage; - - /* - * gk20a_do_idle() API can take GPU either into rail gate or CAR reset - * This flag can be used to force CAR reset case instead of rail gate - */ - bool force_reset_in_do_idle; - - /* guest/vm id, needed for IPA to PA transation */ - int vmid; - - /* Initialize the platform interface of the gk20a driver. - * - * The platform implementation of this function must - * - set the power and clocks of the gk20a device to a known - * state, and - * - populate the gk20a_platform structure (a pointer to the - * structure can be obtained by calling gk20a_get_platform). - * - * After this function is finished, the driver will initialise - * pm runtime and genpd based on the platform configuration. - */ - int (*probe)(struct device *dev); - - /* Second stage initialisation - called once all power management - * initialisations are done. - */ - int (*late_probe)(struct device *dev); - - /* Remove device after power management has been done - */ - int (*remove)(struct device *dev); - - /* Poweron platform dependencies */ - int (*busy)(struct device *dev); - - /* Powerdown platform dependencies */ - void (*idle)(struct device *dev); - - /* Preallocated VPR buffer for kernel */ - size_t secure_buffer_size; - struct secure_page_buffer secure_buffer; - - /* Device is going to be suspended */ - int (*suspend)(struct device *); - - /* Called to turn off the device */ - int (*railgate)(struct device *dev); - - /* Called to turn on the device */ - int (*unrailgate)(struct device *dev); - struct nvgpu_mutex railgate_lock; - - /* Called to check state of device */ - bool (*is_railgated)(struct device *dev); - - /* get supported frequency list */ - int (*get_clk_freqs)(struct device *pdev, - unsigned long **freqs, int *num_freqs); - - /* clk related supported functions */ - long (*clk_round_rate)(struct device *dev, - unsigned long rate); - - /* Called to register GPCPLL with common clk framework */ - int (*clk_register)(struct gk20a *g); - - /* platform specific scale init quirks */ - void (*initscale)(struct device *dev); - - /* Postscale callback is called after frequency change */ - void (*postscale)(struct device *dev, - unsigned long freq); - - /* Pre callback is called before frequency change */ - void (*prescale)(struct device *dev); - - /* Devfreq governor name. If scaling is enabled, we request - * this governor to be used in scaling */ - const char *devfreq_governor; - - /* Quality of service notifier callback. If this is set, the scaling - * routines will register a callback to Qos. Each time we receive - * a new value, this callback gets called. */ - int (*qos_notify)(struct notifier_block *nb, - unsigned long n, void *p); - - /* Called as part of debug dump. If the gpu gets hung, this function - * is responsible for delivering all necessary debug data of other - * hw units which may interact with the gpu without direct supervision - * of the CPU. - */ - void (*dump_platform_dependencies)(struct device *dev); - - /* Defined when SMMU stage-2 is enabled, and we need to use physical - * addresses (not IPA). This is the case for GV100 nvlink in HV+L - * configuration, when dGPU is in pass-through mode. - */ - u64 (*phys_addr)(struct gk20a *g, u64 ipa); - - /* Callbacks to assert/deassert GPU reset */ - int (*reset_assert)(struct device *dev); - int (*reset_deassert)(struct device *dev); - struct clk *clk_reset; - struct dvfs_rail *gpu_rail; - - bool virtual_dev; -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION - void *vgpu_priv; -#endif - /* source frequency for ptimer in hz */ - u32 ptimer_src_freq; - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - bool has_cde; -#endif - - /* soc name for finding firmware files */ - const char *soc_name; - - /* false if vidmem aperture actually points to sysmem */ - bool honors_aperture; - /* unified or split memory with separate vidmem? */ - bool unified_memory; - - /* - * DMA mask for Linux (both coh and non-coh). If not set defaults to - * 0x3ffffffff (i.e a 34 bit mask). - */ - u64 dma_mask; - - /* minimum supported VBIOS version */ - u32 vbios_min_version; - - /* true if we run preos microcode on this board */ - bool run_preos; - - /* true if we need to program sw threshold for - * power limits - */ - bool hardcode_sw_threshold; - - /* i2c device index, port and address for INA3221 */ - u32 ina3221_dcb_index; - u32 ina3221_i2c_address; - u32 ina3221_i2c_port; - - /* stream id to use */ - u32 ltc_streamid; - - /* scaling rate */ - unsigned long cached_rate; -}; - -static inline struct gk20a_platform *gk20a_get_platform( - struct device *dev) -{ - return (struct gk20a_platform *)dev_get_drvdata(dev); -} - -#ifdef CONFIG_TEGRA_GK20A -extern struct gk20a_platform gm20b_tegra_platform; -extern struct gk20a_platform gp10b_tegra_platform; -extern struct gk20a_platform gv11b_tegra_platform; -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION -extern struct gk20a_platform vgpu_tegra_platform; -extern struct gk20a_platform gv11b_vgpu_tegra_platform; -#endif -#endif - -int gk20a_tegra_busy(struct device *dev); -void gk20a_tegra_idle(struct device *dev); -void gk20a_tegra_debug_dump(struct device *pdev); - -static inline struct gk20a *get_gk20a(struct device *dev) -{ - return gk20a_get_platform(dev)->g; -} -static inline struct gk20a *gk20a_from_dev(struct device *dev) -{ - if (!dev) - return NULL; - - return ((struct gk20a_platform *)dev_get_drvdata(dev))->g; -} -static inline bool gk20a_gpu_is_virtual(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - - return platform->virtual_dev; -} - -static inline int support_gk20a_pmu(struct device *dev) -{ - if (IS_ENABLED(CONFIG_GK20A_PMU)) { - /* gPMU is not supported for vgpu */ - return !gk20a_gpu_is_virtual(dev); - } - - return 0; -} - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c deleted file mode 100644 index af55e5b6..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c +++ /dev/null @@ -1,957 +0,0 @@ -/* - * GK20A Tegra Platform Interface - * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CONFIG_TEGRA_DVFS) -#include -#endif -#include -#include -#include -#if defined(CONFIG_COMMON_CLK) -#include -#endif -#ifdef CONFIG_TEGRA_BWMGR -#include -#endif - -#include -#include - -#include -#include -#include -#include - -#include - -#include "gk20a/gk20a.h" -#include "gm20b/clk_gm20b.h" - -#include "scale.h" -#include "platform_gk20a.h" -#include "clk.h" -#include "os_linux.h" - -#include "../../../arch/arm/mach-tegra/iomap.h" -#include - -#define TEGRA_GK20A_BW_PER_FREQ 32 -#define TEGRA_GM20B_BW_PER_FREQ 64 -#define TEGRA_DDR3_BW_PER_FREQ 16 -#define TEGRA_DDR4_BW_PER_FREQ 16 -#define MC_CLIENT_GPU 34 -#define PMC_GPU_RG_CNTRL_0 0x2d4 - -#ifdef CONFIG_COMMON_CLK -#define GPU_RAIL_NAME "vdd-gpu" -#else -#define GPU_RAIL_NAME "vdd_gpu" -#endif - -extern struct device tegra_vpr_dev; - -#ifdef CONFIG_TEGRA_BWMGR -struct gk20a_emc_params { - unsigned long bw_ratio; - unsigned long freq_last_set; - struct tegra_bwmgr_client *bwmgr_cl; -}; -#else -struct gk20a_emc_params { - unsigned long bw_ratio; - unsigned long freq_last_set; -}; -#endif - -#define MHZ_TO_HZ(x) ((x) * 1000000) -#define HZ_TO_MHZ(x) ((x) / 1000000) - -static void gk20a_tegra_secure_page_destroy(struct gk20a *g, - struct secure_page_buffer *secure_buffer) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); - dma_free_attrs(&tegra_vpr_dev, secure_buffer->size, - (void *)(uintptr_t)secure_buffer->phys, - secure_buffer->phys, __DMA_ATTR(attrs)); - - secure_buffer->destroy = NULL; -} - -static int gk20a_tegra_secure_alloc(struct gk20a *g, - struct gr_ctx_buffer_desc *desc, - size_t size) -{ - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct secure_page_buffer *secure_buffer = &platform->secure_buffer; - dma_addr_t phys; - struct sg_table *sgt; - struct page *page; - int err = 0; - size_t aligned_size = PAGE_ALIGN(size); - - if (nvgpu_mem_is_valid(&desc->mem)) - return 0; - - /* We ran out of preallocated memory */ - if (secure_buffer->used + aligned_size > secure_buffer->size) { - nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used", - size, secure_buffer->used, secure_buffer->size); - return -ENOMEM; - } - - phys = secure_buffer->phys + secure_buffer->used; - - sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt)); - if (!sgt) { - nvgpu_err(platform->g, "failed to allocate memory"); - return -ENOMEM; - } - err = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (err) { - nvgpu_err(platform->g, "failed to allocate sg_table"); - goto fail_sgt; - } - page = phys_to_page(phys); - sg_set_page(sgt->sgl, page, size, 0); - /* This bypasses SMMU for VPR during gmmu_map. */ - sg_dma_address(sgt->sgl) = 0; - - desc->destroy = NULL; - - desc->mem.priv.sgt = sgt; - desc->mem.size = size; - desc->mem.aperture = APERTURE_SYSMEM; - - secure_buffer->used += aligned_size; - - return err; - -fail_sgt: - nvgpu_kfree(platform->g, sgt); - return err; -} - -/* - * gk20a_tegra_get_emc_rate() - * - * This function returns the minimum emc clock based on gpu frequency - */ - -static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, - struct gk20a_emc_params *emc_params) -{ - unsigned long gpu_freq, gpu_fmax_at_vmin; - unsigned long emc_rate, emc_scale; - - gpu_freq = clk_get_rate(g->clk.tegra_clk); - gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t( - clk_get_parent(g->clk.tegra_clk)); - - /* When scaling emc, account for the gpu load when the - * gpu frequency is less than or equal to fmax@vmin. */ - if (gpu_freq <= gpu_fmax_at_vmin) - emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); - else - emc_scale = g->emc3d_ratio; - - emc_rate = - (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000; - - return MHZ_TO_HZ(emc_rate); -} - -/* - * gk20a_tegra_prescale(profile, freq) - * - * This function informs EDP about changed constraints. - */ - -static void gk20a_tegra_prescale(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - u32 avg = 0; - - nvgpu_pmu_load_norm(g, &avg); - tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); -} - -/* - * gk20a_tegra_calibrate_emc() - * - */ - -static void gk20a_tegra_calibrate_emc(struct device *dev, - struct gk20a_emc_params *emc_params) -{ - enum tegra_chipid cid = tegra_get_chip_id(); - long gpu_bw, emc_bw; - - /* store gpu bw based on soc */ - switch (cid) { - case TEGRA210: - gpu_bw = TEGRA_GM20B_BW_PER_FREQ; - break; - case TEGRA124: - case TEGRA132: - gpu_bw = TEGRA_GK20A_BW_PER_FREQ; - break; - default: - gpu_bw = 0; - break; - } - - /* TODO detect DDR type. - * Okay for now since DDR3 and DDR4 have the same BW ratio */ - emc_bw = TEGRA_DDR3_BW_PER_FREQ; - - /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq - * NOTE the ratio must come out as an integer */ - emc_params->bw_ratio = (gpu_bw / emc_bw); -} - -#ifdef CONFIG_TEGRA_BWMGR -#ifdef CONFIG_TEGRA_DVFS -static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb) -{ - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a_emc_params *params; - unsigned long rate; - - if (!profile || !profile->private_data) - return; - - params = (struct gk20a_emc_params *)profile->private_data; - rate = (enb) ? params->freq_last_set : 0; - tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR); -} -#endif - -static void gm20b_tegra_postscale(struct device *dev, unsigned long freq) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a_emc_params *emc_params; - unsigned long emc_rate; - - if (!profile || !profile->private_data) - return; - - emc_params = profile->private_data; - emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params); - - if (emc_rate > tegra_bwmgr_get_max_emc_rate()) - emc_rate = tegra_bwmgr_get_max_emc_rate(); - - emc_params->freq_last_set = emc_rate; - if (platform->is_railgated && platform->is_railgated(dev)) - return; - - tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate, - TEGRA_BWMGR_SET_EMC_FLOOR); - -} - -#endif - -#if defined(CONFIG_TEGRA_DVFS) -/* - * gk20a_tegra_is_railgated() - * - * Check status of gk20a power rail - */ - -static bool gk20a_tegra_is_railgated(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - bool ret = false; - - if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) - ret = !tegra_dvfs_is_rail_up(platform->gpu_rail); - - return ret; -} - -/* - * gm20b_tegra_railgate() - * - * Gate (disable) gm20b power rail - */ - -static int gm20b_tegra_railgate(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - int ret = 0; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) || - !tegra_dvfs_is_rail_up(platform->gpu_rail)) - return 0; - - tegra_mc_flush(MC_CLIENT_GPU); - - udelay(10); - - /* enable clamp */ - tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0); - tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); - - udelay(10); - - platform->reset_assert(dev); - - udelay(10); - - /* - * GPCPLL is already disabled before entering this function; reference - * clocks are enabled until now - disable them just before rail gating - */ - clk_disable_unprepare(platform->clk_reset); - clk_disable_unprepare(platform->clk[0]); - clk_disable_unprepare(platform->clk[1]); - if (platform->clk[3]) - clk_disable_unprepare(platform->clk[3]); - - udelay(10); - - tegra_soctherm_gpu_tsens_invalidate(1); - - if (tegra_dvfs_is_rail_up(platform->gpu_rail)) { - ret = tegra_dvfs_rail_power_down(platform->gpu_rail); - if (ret) - goto err_power_off; - } else - pr_info("No GPU regulator?\n"); - -#ifdef CONFIG_TEGRA_BWMGR - gm20b_bwmgr_set_rate(platform, false); -#endif - - return 0; - -err_power_off: - nvgpu_err(platform->g, "Could not railgate GPU"); - return ret; -} - - -/* - * gm20b_tegra_unrailgate() - * - * Ungate (enable) gm20b power rail - */ - -static int gm20b_tegra_unrailgate(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = platform->g; - int ret = 0; - bool first = false; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) - return 0; - - ret = tegra_dvfs_rail_power_up(platform->gpu_rail); - if (ret) - return ret; - -#ifdef CONFIG_TEGRA_BWMGR - gm20b_bwmgr_set_rate(platform, true); -#endif - - tegra_soctherm_gpu_tsens_invalidate(0); - - if (!platform->clk_reset) { - platform->clk_reset = clk_get(dev, "gpu_gate"); - if (IS_ERR(platform->clk_reset)) { - nvgpu_err(g, "fail to get gpu reset clk"); - goto err_clk_on; - } - } - - if (!first) { - ret = clk_prepare_enable(platform->clk_reset); - if (ret) { - nvgpu_err(g, "could not turn on gpu_gate"); - goto err_clk_on; - } - - ret = clk_prepare_enable(platform->clk[0]); - if (ret) { - nvgpu_err(g, "could not turn on gpu pll"); - goto err_clk_on; - } - ret = clk_prepare_enable(platform->clk[1]); - if (ret) { - nvgpu_err(g, "could not turn on pwr clock"); - goto err_clk_on; - } - - if (platform->clk[3]) { - ret = clk_prepare_enable(platform->clk[3]); - if (ret) { - nvgpu_err(g, "could not turn on fuse clock"); - goto err_clk_on; - } - } - } - - udelay(10); - - platform->reset_assert(dev); - - udelay(10); - - tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0); - tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); - - udelay(10); - - clk_disable(platform->clk_reset); - platform->reset_deassert(dev); - clk_enable(platform->clk_reset); - - /* Flush MC after boot/railgate/SC7 */ - tegra_mc_flush(MC_CLIENT_GPU); - - udelay(10); - - tegra_mc_flush_done(MC_CLIENT_GPU); - - udelay(10); - - return 0; - -err_clk_on: - tegra_dvfs_rail_power_down(platform->gpu_rail); - - return ret; -} -#endif - - -static struct { - char *name; - unsigned long default_rate; -} tegra_gk20a_clocks[] = { - {"gpu_ref", UINT_MAX}, - {"pll_p_out5", 204000000}, - {"emc", UINT_MAX}, - {"fuse", UINT_MAX}, -}; - - - -/* - * gk20a_tegra_get_clocks() - * - * This function finds clocks in tegra platform and populates - * the clock information to gk20a platform data. - */ - -static int gk20a_tegra_get_clocks(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - char devname[16]; - unsigned int i; - int ret = 0; - - BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks)); - - snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev)); - - platform->num_clks = 0; - for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { - long rate = tegra_gk20a_clocks[i].default_rate; - struct clk *c; - - c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); - if (IS_ERR(c)) { - ret = PTR_ERR(c); - goto err_get_clock; - } - rate = clk_round_rate(c, rate); - clk_set_rate(c, rate); - platform->clk[i] = c; - if (i == 0) - platform->cached_rate = rate; - } - platform->num_clks = i; - - return 0; - -err_get_clock: - - while (i--) - clk_put(platform->clk[i]); - return ret; -} - -#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) -static int gm20b_tegra_reset_assert(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - - if (!platform->reset_control) { - WARN(1, "Reset control not initialized\n"); - return -ENOSYS; - } - - return reset_control_assert(platform->reset_control); -} - -static int gm20b_tegra_reset_deassert(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - - if (!platform->reset_control) { - WARN(1, "Reset control not initialized\n"); - return -ENOSYS; - } - - return reset_control_deassert(platform->reset_control); -} -#endif - -static void gk20a_tegra_scale_init(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a_emc_params *emc_params; - struct gk20a *g = platform->g; - - if (!profile) - return; - - if (profile->private_data) - return; - - emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params)); - if (!emc_params) - return; - - emc_params->freq_last_set = -1; - gk20a_tegra_calibrate_emc(dev, emc_params); - -#ifdef CONFIG_TEGRA_BWMGR - emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); - if (!emc_params->bwmgr_cl) { - nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__); - return; - } -#endif - - profile->private_data = emc_params; -} - -static void gk20a_tegra_scale_exit(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a_emc_params *emc_params; - - if (!profile) - return; - - emc_params = profile->private_data; -#ifdef CONFIG_TEGRA_BWMGR - tegra_bwmgr_unregister(emc_params->bwmgr_cl); -#endif - - nvgpu_kfree(platform->g, profile->private_data); -} - -void gk20a_tegra_debug_dump(struct device *dev) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - - if (g->nvhost_dev) - nvgpu_nvhost_debug_dump_device(g->nvhost_dev); -#endif -} - -int gk20a_tegra_busy(struct device *dev) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - - if (g->nvhost_dev) - return nvgpu_nvhost_module_busy_ext(g->nvhost_dev); -#endif - return 0; -} - -void gk20a_tegra_idle(struct device *dev) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - - if (g->nvhost_dev) - nvgpu_nvhost_module_idle_ext(g->nvhost_dev); -#endif -} - -int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform) -{ - struct gk20a *g = platform->g; - struct secure_page_buffer *secure_buffer = &platform->secure_buffer; - DEFINE_DMA_ATTRS(attrs); - dma_addr_t iova; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) - return 0; - - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); - (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova, - GFP_KERNEL, __DMA_ATTR(attrs)); - /* Some platforms disable VPR. In that case VPR allocations always - * fail. Just disable VPR usage in nvgpu in that case. */ - if (dma_mapping_error(&tegra_vpr_dev, iova)) - return 0; - - secure_buffer->size = platform->secure_buffer_size; - secure_buffer->phys = iova; - secure_buffer->destroy = gk20a_tegra_secure_page_destroy; - - g->ops.secure_alloc = gk20a_tegra_secure_alloc; - __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true); - - return 0; -} - -#ifdef CONFIG_COMMON_CLK -static struct clk *gk20a_clk_get(struct gk20a *g) -{ - if (!g->clk.tegra_clk) { - struct clk *clk; - char clk_dev_id[32]; - struct device *dev = dev_from_gk20a(g); - - snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); - - clk = clk_get_sys(clk_dev_id, "gpu"); - if (IS_ERR(clk)) { - nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n", - clk_dev_id); - return NULL; - } - g->clk.tegra_clk = clk; - } - - return g->clk.tegra_clk; -} - -static int gm20b_clk_prepare_ops(struct clk_hw *hw) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - return gm20b_clk_prepare(clk); -} - -static void gm20b_clk_unprepare_ops(struct clk_hw *hw) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - gm20b_clk_unprepare(clk); -} - -static int gm20b_clk_is_prepared_ops(struct clk_hw *hw) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - return gm20b_clk_is_prepared(clk); -} - -static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - return gm20b_recalc_rate(clk, parent_rate); -} - -static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - return gm20b_gpcclk_set_rate(clk, rate, parent_rate); -} - -static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - struct clk_gk20a *clk = to_clk_gk20a(hw); - return gm20b_round_rate(clk, rate, parent_rate); -} - -static const struct clk_ops gm20b_clk_ops = { - .prepare = gm20b_clk_prepare_ops, - .unprepare = gm20b_clk_unprepare_ops, - .is_prepared = gm20b_clk_is_prepared_ops, - .recalc_rate = gm20b_recalc_rate_ops, - .set_rate = gm20b_gpcclk_set_rate_ops, - .round_rate = gm20b_round_rate_ops, -}; - -static int gm20b_register_gpcclk(struct gk20a *g) -{ - const char *parent_name = "pllg_ref"; - struct clk_gk20a *clk = &g->clk; - struct clk_init_data init; - struct clk *c; - int err = 0; - - /* make sure the clock is available */ - if (!gk20a_clk_get(g)) - return -ENOSYS; - - err = gm20b_init_clk_setup_sw(g); - if (err) - return err; - - init.name = "gpcclk"; - init.ops = &gm20b_clk_ops; - init.parent_names = &parent_name; - init.num_parents = 1; - init.flags = 0; - - /* Data in .init is copied by clk_register(), so stack variable OK */ - clk->hw.init = &init; - c = clk_register(dev_from_gk20a(g), &clk->hw); - if (IS_ERR(c)) { - nvgpu_err(g, "Failed to register GPCPLL clock"); - return -EINVAL; - } - - clk->g = g; - clk_register_clkdev(c, "gpcclk", "gpcclk"); - - return err; -} -#endif /* CONFIG_COMMON_CLK */ - -static int gk20a_tegra_probe(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct device_node *np = dev->of_node; - bool joint_xpu_rail = false; - int ret; - struct gk20a *g = platform->g; - -#ifdef CONFIG_COMMON_CLK - /* DVFS is not guaranteed to be initialized at the time of probe on - * kernels with Common Clock Framework enabled. - */ - if (!platform->gpu_rail) { - platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME); - if (!platform->gpu_rail) { - nvgpu_log_info(g, "deferring probe no gpu_rail"); - return -EPROBE_DEFER; - } - } - - if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) { - nvgpu_log_info(g, "deferring probe gpu_rail not ready"); - return -EPROBE_DEFER; - } -#endif - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - ret = nvgpu_get_nvhost_dev(platform->g); - if (ret) - return ret; -#endif - -#ifdef CONFIG_OF - joint_xpu_rail = of_property_read_bool(of_chosen, - "nvidia,tegra-joint_xpu_rail"); -#endif - - if (joint_xpu_rail) { - nvgpu_log_info(g, "XPU rails are joint\n"); - platform->g->can_railgate = false; - } - - platform->g->clk.gpc_pll.id = GK20A_GPC_PLL; - if (tegra_get_chip_id() == TEGRA210) { - /* WAR for bug 1547668: Disable railgating and scaling - irrespective of platform data if the rework was not made. */ - np = of_find_node_by_path("/gpu-dvfs-rework"); - if (!(np && of_device_is_available(np))) { - platform->devfreq_governor = ""; - dev_warn(dev, "board does not support scaling"); - } - platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1; - if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p) - platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1; - } - - if (tegra_get_chip_id() == TEGRA132) - platform->soc_name = "tegra13x"; - - gk20a_tegra_get_clocks(dev); - nvgpu_linux_init_clk_support(platform->g); - ret = gk20a_tegra_init_secure_alloc(platform); - if (ret) - return ret; - - if (platform->clk_register) { - ret = platform->clk_register(platform->g); - if (ret) - return ret; - } - - return 0; -} - -static int gk20a_tegra_late_probe(struct device *dev) -{ - return 0; -} - -static int gk20a_tegra_remove(struct device *dev) -{ - /* deinitialise tegra specific scaling quirks */ - gk20a_tegra_scale_exit(dev); - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - nvgpu_free_nvhost_dev(get_gk20a(dev)); -#endif - - return 0; -} - -static int gk20a_tegra_suspend(struct device *dev) -{ - tegra_edp_notify_gpu_load(0, 0); - return 0; -} - -#if defined(CONFIG_COMMON_CLK) -static long gk20a_round_clk_rate(struct device *dev, unsigned long rate) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - - /* make sure the clock is available */ - if (!gk20a_clk_get(g)) - return rate; - - return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); -} - -static int gk20a_clk_get_freqs(struct device *dev, - unsigned long **freqs, int *num_freqs) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - - /* make sure the clock is available */ - if (!gk20a_clk_get(g)) - return -ENOSYS; - - return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), - freqs, num_freqs); -} -#endif - -struct gk20a_platform gm20b_tegra_platform = { - .has_syncpoints = true, - .aggressive_sync_destroy_thresh = 64, - - /* power management configuration */ - .railgate_delay_init = 500, - .can_railgate_init = true, - .can_elpg_init = true, - .enable_slcg = true, - .enable_blcg = true, - .enable_elcg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - .enable_elpg = true, - .enable_aelpg = true, - .enable_perfmon = true, - .ptimer_src_freq = 19200000, - - .force_reset_in_do_idle = false, - - .ch_wdt_timeout_ms = 5000, - - .probe = gk20a_tegra_probe, - .late_probe = gk20a_tegra_late_probe, - .remove = gk20a_tegra_remove, - /* power management callbacks */ - .suspend = gk20a_tegra_suspend, - -#if defined(CONFIG_TEGRA_DVFS) - .railgate = gm20b_tegra_railgate, - .unrailgate = gm20b_tegra_unrailgate, - .is_railgated = gk20a_tegra_is_railgated, -#endif - - .busy = gk20a_tegra_busy, - .idle = gk20a_tegra_idle, - -#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) - .reset_assert = gm20b_tegra_reset_assert, - .reset_deassert = gm20b_tegra_reset_deassert, -#else - .reset_assert = gk20a_tegra_reset_assert, - .reset_deassert = gk20a_tegra_reset_deassert, -#endif - -#if defined(CONFIG_COMMON_CLK) - .clk_round_rate = gk20a_round_clk_rate, - .get_clk_freqs = gk20a_clk_get_freqs, -#endif - -#ifdef CONFIG_COMMON_CLK - .clk_register = gm20b_register_gpcclk, -#endif - - /* frequency scaling configuration */ - .initscale = gk20a_tegra_scale_init, - .prescale = gk20a_tegra_prescale, -#ifdef CONFIG_TEGRA_BWMGR - .postscale = gm20b_tegra_postscale, -#endif - .devfreq_governor = "nvhost_podgov", - .qos_notify = gk20a_scale_qos_notify, - - .dump_platform_dependencies = gk20a_tegra_debug_dump, - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - .has_cde = true, -#endif - - .soc_name = "tegra21x", - - .unified_memory = true, - .dma_mask = DMA_BIT_MASK(34), - - .secure_buffer_size = 335872, -}; diff --git a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h deleted file mode 100644 index f7d50406..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * GK20A Platform (SoC) Interface - * - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_ -#define _NVGPU_PLATFORM_GK20A_TEGRA_H_ - -struct gk20a_platform; - -int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c deleted file mode 100644 index fce16653..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c +++ /dev/null @@ -1,607 +0,0 @@ -/* - * GP10B Tegra Platform Interface - * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include "os_linux.h" - -#include "clk.h" - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" -#include "platform_ecc_sysfs.h" -#include "platform_gk20a_tegra.h" -#include "gp10b/platform_gp10b.h" -#include "platform_gp10b_tegra.h" -#include "scale.h" - -/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ -#define GP10B_FREQ_SELECT_STEP 8 -/* Max number of freq supported in h/w */ -#define GP10B_MAX_SUPPORTED_FREQS 120 -static unsigned long -gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; - -#define TEGRA_GP10B_BW_PER_FREQ 64 -#define TEGRA_DDR4_BW_PER_FREQ 16 - -#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) - -#define GPCCLK_INIT_RATE 1000000000 - -static struct { - char *name; - unsigned long default_rate; -} tegra_gp10b_clocks[] = { - {"gpu", GPCCLK_INIT_RATE}, - {"gpu_sys", 204000000} }; - -/* - * gp10b_tegra_get_clocks() - * - * This function finds clocks in tegra platform and populates - * the clock information to gp10b platform data. - */ - -int gp10b_tegra_get_clocks(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - unsigned int i; - - platform->num_clks = 0; - for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { - long rate = tegra_gp10b_clocks[i].default_rate; - struct clk *c; - - c = clk_get(dev, tegra_gp10b_clocks[i].name); - if (IS_ERR(c)) { - nvgpu_err(platform->g, "cannot get clock %s", - tegra_gp10b_clocks[i].name); - } else { - clk_set_rate(c, rate); - platform->clk[i] = c; - if (i == 0) - platform->cached_rate = rate; - } - } - platform->num_clks = i; - - if (platform->clk[0]) { - i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, - tegra_gp10b_clocks[0].name); - if (i > 0) - platform->maxmin_clk_id = i; - } - - return 0; -} - -void gp10b_tegra_scale_init(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct tegra_bwmgr_client *bwmgr_handle; - - if (!profile) - return; - - if ((struct tegra_bwmgr_client *)profile->private_data) - return; - - bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); - if (!bwmgr_handle) - return; - - profile->private_data = (void *)bwmgr_handle; -} - -static void gp10b_tegra_scale_exit(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - - if (profile) - tegra_bwmgr_unregister( - (struct tegra_bwmgr_client *)profile->private_data); -} - -static int gp10b_tegra_probe(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); -#ifdef CONFIG_TEGRA_GK20A_NVHOST - int ret; - - ret = nvgpu_get_nvhost_dev(platform->g); - if (ret) - return ret; -#endif - - ret = gk20a_tegra_init_secure_alloc(platform); - if (ret) - return ret; - - platform->disable_bigpage = !device_is_iommuable(dev); - - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close - = false; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close - = false; - - platform->g->gr.ctx_vars.force_preemption_gfxp = false; - platform->g->gr.ctx_vars.force_preemption_cilp = false; - - gp10b_tegra_get_clocks(dev); - nvgpu_linux_init_clk_support(platform->g); - - return 0; -} - -static int gp10b_tegra_late_probe(struct device *dev) -{ - return 0; -} - -static int gp10b_tegra_remove(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - - /* deinitialise tegra specific scaling quirks */ - gp10b_tegra_scale_exit(dev); - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - nvgpu_free_nvhost_dev(get_gk20a(dev)); -#endif - - return 0; -} - -static bool gp10b_tegra_is_railgated(struct device *dev) -{ - bool ret = false; - - if (tegra_bpmp_running()) - ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); - - return ret; -} - -static int gp10b_tegra_railgate(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - - /* remove emc frequency floor */ - if (profile) - tegra_bwmgr_set_emc( - (struct tegra_bwmgr_client *)profile->private_data, - 0, TEGRA_BWMGR_SET_EMC_FLOOR); - - if (tegra_bpmp_running() && - tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { - int i; - for (i = 0; i < platform->num_clks; i++) { - if (platform->clk[i]) - clk_disable_unprepare(platform->clk[i]); - } - tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); - } - return 0; -} - -static int gp10b_tegra_unrailgate(struct device *dev) -{ - int ret = 0; - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - - if (tegra_bpmp_running()) { - int i; - ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); - for (i = 0; i < platform->num_clks; i++) { - if (platform->clk[i]) - clk_prepare_enable(platform->clk[i]); - } - } - - /* to start with set emc frequency floor to max rate*/ - if (profile) - tegra_bwmgr_set_emc( - (struct tegra_bwmgr_client *)profile->private_data, - tegra_bwmgr_get_max_emc_rate(), - TEGRA_BWMGR_SET_EMC_FLOOR); - return ret; -} - -static int gp10b_tegra_suspend(struct device *dev) -{ - return 0; -} - -int gp10b_tegra_reset_assert(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - int ret = 0; - - if (!platform->reset_control) - return -EINVAL; - - ret = reset_control_assert(platform->reset_control); - - return ret; -} - -int gp10b_tegra_reset_deassert(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - int ret = 0; - - if (!platform->reset_control) - return -EINVAL; - - ret = reset_control_deassert(platform->reset_control); - - return ret; -} - -void gp10b_tegra_prescale(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - u32 avg = 0; - - nvgpu_log_fn(g, " "); - - nvgpu_pmu_load_norm(g, &avg); - - nvgpu_log_fn(g, "done"); -} - -void gp10b_tegra_postscale(struct device *pdev, - unsigned long freq) -{ - struct gk20a_platform *platform = gk20a_get_platform(pdev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a *g = get_gk20a(pdev); - unsigned long emc_rate; - - nvgpu_log_fn(g, " "); - if (profile && !platform->is_railgated(pdev)) { - unsigned long emc_scale; - - if (freq <= gp10b_freq_table[0]) - emc_scale = 0; - else - emc_scale = g->emc3d_ratio; - - emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; - - if (emc_rate > tegra_bwmgr_get_max_emc_rate()) - emc_rate = tegra_bwmgr_get_max_emc_rate(); - - tegra_bwmgr_set_emc( - (struct tegra_bwmgr_client *)profile->private_data, - emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); - } - nvgpu_log_fn(g, "done"); -} - -long gp10b_round_clk_rate(struct device *dev, unsigned long rate) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_scale_profile *profile = g->scale_profile; - unsigned long *freq_table = profile->devfreq_profile.freq_table; - int max_states = profile->devfreq_profile.max_state; - int i; - - for (i = 0; i < max_states; ++i) - if (freq_table[i] >= rate) - return freq_table[i]; - - return freq_table[max_states - 1]; -} - -int gp10b_clk_get_freqs(struct device *dev, - unsigned long **freqs, int *num_freqs) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - unsigned long max_rate; - unsigned long new_rate = 0, prev_rate = 0; - int i = 0, freq_counter = 0; - - max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); - - /* - * Walk the h/w frequency table and only select - * GP10B_FREQ_SELECT_STEP'th frequencies and - * add MAX freq to last - */ - for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { - prev_rate = new_rate; - new_rate = clk_round_rate(platform->clk[0], prev_rate + 1); - - if (i % GP10B_FREQ_SELECT_STEP == 0 || - new_rate == max_rate) { - gp10b_freq_table[freq_counter++] = new_rate; - - if (new_rate == max_rate) - break; - } - } - - WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS); - - /* Fill freq table */ - *freqs = gp10b_freq_table; - *num_freqs = freq_counter; - - nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", - gp10b_freq_table[0], max_rate, *num_freqs); - - return 0; -} - -struct gk20a_platform gp10b_tegra_platform = { - .has_syncpoints = true, - - /* power management configuration */ - .railgate_delay_init = 500, - - /* ldiv slowdown factor */ - .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, - - /* power management configuration */ - .can_railgate_init = true, - .enable_elpg = true, - .can_elpg_init = true, - .enable_blcg = true, - .enable_slcg = true, - .enable_elcg = true, - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - .enable_aelpg = true, - .enable_perfmon = true, - - /* ptimer src frequency in hz*/ - .ptimer_src_freq = 31250000, - - .ch_wdt_timeout_ms = 5000, - - .probe = gp10b_tegra_probe, - .late_probe = gp10b_tegra_late_probe, - .remove = gp10b_tegra_remove, - - /* power management callbacks */ - .suspend = gp10b_tegra_suspend, - .railgate = gp10b_tegra_railgate, - .unrailgate = gp10b_tegra_unrailgate, - .is_railgated = gp10b_tegra_is_railgated, - - .busy = gk20a_tegra_busy, - .idle = gk20a_tegra_idle, - - .dump_platform_dependencies = gk20a_tegra_debug_dump, - -#ifdef CONFIG_NVGPU_SUPPORT_CDE - .has_cde = true, -#endif - - .clk_round_rate = gp10b_round_clk_rate, - .get_clk_freqs = gp10b_clk_get_freqs, - - /* frequency scaling configuration */ - .initscale = gp10b_tegra_scale_init, - .prescale = gp10b_tegra_prescale, - .postscale = gp10b_tegra_postscale, - .devfreq_governor = "nvhost_podgov", - - .qos_notify = gk20a_scale_qos_notify, - - .reset_assert = gp10b_tegra_reset_assert, - .reset_deassert = gp10b_tegra_reset_deassert, - - .force_reset_in_do_idle = false, - - .soc_name = "tegra18x", - - .unified_memory = true, - .dma_mask = DMA_BIT_MASK(36), - - .ltc_streamid = TEGRA_SID_GPUB, - - .secure_buffer_size = 401408, -}; - -void gr_gp10b_create_sysfs(struct gk20a *g) -{ - int error = 0; - struct device *dev = dev_from_gk20a(g); - - /* This stat creation function is called on GR init. GR can get - initialized multiple times but we only need to create the ECC - stats once. Therefore, add the following check to avoid - creating duplicate stat sysfs nodes. */ - if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL) - return; - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_lrf_ecc_single_err_count", - &g->ecc.gr.sm_lrf_single_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_lrf_ecc_double_err_count", - &g->ecc.gr.sm_lrf_double_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_sec_count", - &g->ecc.gr.sm_shm_sec_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_sed_count", - &g->ecc.gr.sm_shm_sed_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_ded_count", - &g->ecc.gr.sm_shm_ded_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_sec_pipe0_count", - &g->ecc.gr.tex_total_sec_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_ded_pipe0_count", - &g->ecc.gr.tex_total_ded_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_sec_pipe0_count", - &g->ecc.gr.tex_unique_sec_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_ded_pipe0_count", - &g->ecc.gr.tex_unique_ded_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_sec_pipe1_count", - &g->ecc.gr.tex_total_sec_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_ded_pipe1_count", - &g->ecc.gr.tex_total_ded_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_sec_pipe1_count", - &g->ecc.gr.tex_unique_sec_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_ded_pipe1_count", - &g->ecc.gr.tex_unique_ded_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 2, - "ecc_sec_count", - &g->ecc.ltc.l2_sec_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 2, - "ecc_ded_count", - &g->ecc.ltc.l2_ded_count); - - if (error) - dev_err(dev, "Failed to create sysfs attributes!\n"); -} - -void gr_gp10b_remove_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (!g->ecc.gr.sm_lrf_single_err_count.counters) - return; - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_lrf_single_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_lrf_double_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_sec_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_sed_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_ded_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_sec_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_ded_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_sec_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_ded_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_sec_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_ded_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_sec_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_ded_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 2, - &g->ecc.ltc.l2_sec_count); - - nvgpu_gr_ecc_stat_remove(dev, - 2, - &g->ecc.ltc.l2_ded_count); -} diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h deleted file mode 100644 index 6de90275..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _PLATFORM_GP10B_TEGRA_H_ -#define _PLATFORM_GP10B_TEGRA_H_ - -#include "gp10b/gr_gp10b.h" -#include "platform_ecc_sysfs.h" - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c deleted file mode 100644 index bf66762b..00000000 --- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c +++ /dev/null @@ -1,588 +0,0 @@ -/* - * GV11B Tegra Platform Interface - * - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "clk.h" -#include "scale.h" - -#include "gp10b/platform_gp10b.h" -#include "platform_gp10b_tegra.h" -#include "platform_ecc_sysfs.h" - -#include "os_linux.h" -#include "platform_gk20a_tegra.h" -#include "gv11b/gr_gv11b.h" - -static void gv11b_tegra_scale_exit(struct device *dev) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - - if (profile) - tegra_bwmgr_unregister( - (struct tegra_bwmgr_client *)profile->private_data); -} - -static int gv11b_tegra_probe(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - int err; - - err = nvgpu_nvhost_syncpt_init(platform->g); - if (err) { - if (err != -ENOSYS) - return err; - } - - err = gk20a_tegra_init_secure_alloc(platform); - if (err) - return err; - - platform->disable_bigpage = !device_is_iommuable(dev); - - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close - = false; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close - = false; - - platform->g->gr.ctx_vars.force_preemption_gfxp = false; - platform->g->gr.ctx_vars.force_preemption_cilp = false; - - gp10b_tegra_get_clocks(dev); - nvgpu_linux_init_clk_support(platform->g); - - return 0; -} - -static int gv11b_tegra_late_probe(struct device *dev) -{ - return 0; -} - - -static int gv11b_tegra_remove(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - - gv11b_tegra_scale_exit(dev); - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - nvgpu_free_nvhost_dev(get_gk20a(dev)); -#endif - - return 0; -} - -static bool gv11b_tegra_is_railgated(struct device *dev) -{ - bool ret = false; -#ifdef TEGRA194_POWER_DOMAIN_GPU - struct gk20a *g = get_gk20a(dev); - - if (tegra_bpmp_running()) { - nvgpu_log(g, gpu_dbg_info, "bpmp running"); - ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU); - - nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no"); - } else { - nvgpu_log(g, gpu_dbg_info, "bpmp not running"); - } -#endif - return ret; -} - -static int gv11b_tegra_railgate(struct device *dev) -{ -#ifdef TEGRA194_POWER_DOMAIN_GPU - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a *g = get_gk20a(dev); - int i; - - /* remove emc frequency floor */ - if (profile) - tegra_bwmgr_set_emc( - (struct tegra_bwmgr_client *)profile->private_data, - 0, TEGRA_BWMGR_SET_EMC_FLOOR); - - if (tegra_bpmp_running()) { - nvgpu_log(g, gpu_dbg_info, "bpmp running"); - if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) { - nvgpu_log(g, gpu_dbg_info, "powergate is not powered"); - return 0; - } - nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare"); - for (i = 0; i < platform->num_clks; i++) { - if (platform->clk[i]) - clk_disable_unprepare(platform->clk[i]); - } - nvgpu_log(g, gpu_dbg_info, "powergate_partition"); - tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU); - } else { - nvgpu_log(g, gpu_dbg_info, "bpmp not running"); - } -#endif - return 0; -} - -static int gv11b_tegra_unrailgate(struct device *dev) -{ - int ret = 0; -#ifdef TEGRA194_POWER_DOMAIN_GPU - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = get_gk20a(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - int i; - - if (tegra_bpmp_running()) { - nvgpu_log(g, gpu_dbg_info, "bpmp running"); - ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU); - if (ret) { - nvgpu_log(g, gpu_dbg_info, - "unpowergate partition failed"); - return ret; - } - nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable"); - for (i = 0; i < platform->num_clks; i++) { - if (platform->clk[i]) - clk_prepare_enable(platform->clk[i]); - } - } else { - nvgpu_log(g, gpu_dbg_info, "bpmp not running"); - } - - /* to start with set emc frequency floor to max rate*/ - if (profile) - tegra_bwmgr_set_emc( - (struct tegra_bwmgr_client *)profile->private_data, - tegra_bwmgr_get_max_emc_rate(), - TEGRA_BWMGR_SET_EMC_FLOOR); -#endif - return ret; -} - -static int gv11b_tegra_suspend(struct device *dev) -{ - return 0; -} - -struct gk20a_platform gv11b_tegra_platform = { - .has_syncpoints = true, - - /* ptimer src frequency in hz*/ - .ptimer_src_freq = 31250000, - - .ch_wdt_timeout_ms = 5000, - - .probe = gv11b_tegra_probe, - .late_probe = gv11b_tegra_late_probe, - .remove = gv11b_tegra_remove, - .railgate_delay_init = 500, - .can_railgate_init = true, - - .can_slcg = true, - .can_blcg = true, - .can_elcg = true, - .enable_slcg = true, - .enable_blcg = true, - .enable_elcg = true, - .enable_perfmon = true, - - /* power management configuration */ - .enable_elpg = true, - .can_elpg_init = true, - .enable_aelpg = true, - - /* power management callbacks */ - .suspend = gv11b_tegra_suspend, - .railgate = gv11b_tegra_railgate, - .unrailgate = gv11b_tegra_unrailgate, - .is_railgated = gv11b_tegra_is_railgated, - - .busy = gk20a_tegra_busy, - .idle = gk20a_tegra_idle, - - .clk_round_rate = gp10b_round_clk_rate, - .get_clk_freqs = gp10b_clk_get_freqs, - - /* frequency scaling configuration */ - .initscale = gp10b_tegra_scale_init, - .prescale = gp10b_tegra_prescale, - .postscale = gp10b_tegra_postscale, - .devfreq_governor = "nvhost_podgov", - - .qos_notify = gk20a_scale_qos_notify, - - .dump_platform_dependencies = gk20a_tegra_debug_dump, - - .soc_name = "tegra19x", - - .honors_aperture = true, - .unified_memory = true, - .dma_mask = DMA_BIT_MASK(36), - - .reset_assert = gp10b_tegra_reset_assert, - .reset_deassert = gp10b_tegra_reset_deassert, - - .secure_buffer_size = 667648, -}; - -void gr_gv11b_create_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - int error = 0; - - /* This stat creation function is called on GR init. GR can get - initialized multiple times but we only need to create the ECC - stats once. Therefore, add the following check to avoid - creating duplicate stat sysfs nodes. */ - if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL) - return; - - gr_gp10b_create_sysfs(g); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_tag_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_tag_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_tag_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_cbu_ecc_corrected_err_count", - &g->ecc.gr.sm_cbu_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_cbu_ecc_uncorrected_err_count", - &g->ecc.gr.sm_cbu_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_data_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_data_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_data_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_data_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_icache_ecc_corrected_err_count", - &g->ecc.gr.sm_icache_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_icache_ecc_uncorrected_err_count", - &g->ecc.gr.sm_icache_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "gcc_l15_ecc_corrected_err_count", - &g->ecc.gr.gcc_l15_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "gcc_l15_ecc_uncorrected_err_count", - &g->ecc.gr.gcc_l15_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->ltc_count, - 0, - "ltc", - NULL, - "l2_cache_uncorrected_err_count", - &g->ecc.ltc.l2_cache_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->ltc_count, - 0, - "ltc", - NULL, - "l2_cache_corrected_err_count", - &g->ecc.ltc.l2_cache_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "gpc", - NULL, - "fecs_ecc_uncorrected_err_count", - &g->ecc.gr.fecs_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "gpc", - NULL, - "fecs_ecc_corrected_err_count", - &g->ecc.gr.fecs_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "gpccs_ecc_uncorrected_err_count", - &g->ecc.gr.gpccs_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "gpccs_ecc_corrected_err_count", - &g->ecc.gr.gpccs_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "mmu_l1tlb_ecc_uncorrected_err_count", - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "mmu_l1tlb_ecc_corrected_err_count", - &g->ecc.gr.mmu_l1tlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_l2tlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_l2tlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_l2tlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_hubtlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_hubtlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_hubtlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_fillunit_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_fillunit_ecc_corrected_err_count", - &g->ecc.fb.mmu_fillunit_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "pmu_ecc_uncorrected_err_count", - &g->ecc.pmu.pmu_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "pmu_ecc_corrected_err_count", - &g->ecc.pmu.pmu_corrected_err_count); - - if (error) - dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); -} - -void gr_gv11b_remove_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters) - return; - gr_gp10b_remove_sysfs(g); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_tag_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_cbu_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_cbu_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_data_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_data_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_icache_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_icache_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.gcc_l15_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.gcc_l15_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->ltc_count, - 0, - &g->ecc.ltc.l2_cache_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->ltc_count, - 0, - &g->ecc.ltc.l2_cache_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.gr.fecs_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.gr.fecs_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.gpccs_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.gpccs_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.mmu_l1tlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_l2tlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_hubtlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_fillunit_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.pmu.pmu_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.pmu.pmu_corrected_err_count); -} diff --git a/drivers/gpu/nvgpu/common/linux/rwsem.c b/drivers/gpu/nvgpu/common/linux/rwsem.c deleted file mode 100644 index 297ddf11..00000000 --- a/drivers/gpu/nvgpu/common/linux/rwsem.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include - -void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem) -{ - init_rwsem(&rwsem->rwsem); -} - -void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem) -{ - up_read(&rwsem->rwsem); -} - -void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem) -{ - down_read(&rwsem->rwsem); -} - -void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem) -{ - up_write(&rwsem->rwsem); -} - -void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem) -{ - down_write(&rwsem->rwsem); -} diff --git a/drivers/gpu/nvgpu/common/linux/scale.c b/drivers/gpu/nvgpu/common/linux/scale.c deleted file mode 100644 index 84ac1cfd..00000000 --- a/drivers/gpu/nvgpu/common/linux/scale.c +++ /dev/null @@ -1,428 +0,0 @@ -/* - * gk20a clock scaling profile - * - * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include - -#include -#include - -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "scale.h" -#include "os_linux.h" - -/* - * gk20a_scale_qos_notify() - * - * This function is called when the minimum QoS requirement for the device - * has changed. The function calls postscaling callback if it is defined. - */ - -#if defined(CONFIG_COMMON_CLK) -int gk20a_scale_qos_notify(struct notifier_block *nb, - unsigned long n, void *p) -{ - struct gk20a_scale_profile *profile = - container_of(nb, struct gk20a_scale_profile, - qos_notify_block); - struct gk20a *g = get_gk20a(profile->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct devfreq *devfreq = l->devfreq; - - if (!devfreq) - return NOTIFY_OK; - - mutex_lock(&devfreq->lock); - /* check for pm_qos min and max frequency requirement */ - profile->qos_min_freq = - (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; - profile->qos_max_freq = - (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; - - if (profile->qos_min_freq > profile->qos_max_freq) { - nvgpu_err(g, - "QoS: setting invalid limit, min_freq=%lu max_freq=%lu", - profile->qos_min_freq, profile->qos_max_freq); - profile->qos_min_freq = profile->qos_max_freq; - } - - update_devfreq(devfreq); - mutex_unlock(&devfreq->lock); - - return NOTIFY_OK; -} -#else -int gk20a_scale_qos_notify(struct notifier_block *nb, - unsigned long n, void *p) -{ - struct gk20a_scale_profile *profile = - container_of(nb, struct gk20a_scale_profile, - qos_notify_block); - struct gk20a_platform *platform = dev_get_drvdata(profile->dev); - struct gk20a *g = get_gk20a(profile->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - unsigned long freq; - - if (!platform->postscale) - return NOTIFY_OK; - - /* get the frequency requirement. if devfreq is enabled, check if it - * has higher demand than qos */ - freq = platform->clk_round_rate(profile->dev, - (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS)); - if (l->devfreq) - freq = max(l->devfreq->previous_freq, freq); - - /* Update gpu load because we may scale the emc target - * if the gpu load changed. */ - nvgpu_pmu_load_update(g); - platform->postscale(profile->dev, freq); - - return NOTIFY_OK; -} -#endif - -/* - * gk20a_scale_make_freq_table(profile) - * - * This function initialises the frequency table for the given device profile - */ - -static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) -{ - struct gk20a_platform *platform = dev_get_drvdata(profile->dev); - int num_freqs, err; - unsigned long *freqs; - - if (platform->get_clk_freqs) { - /* get gpu frequency table */ - err = platform->get_clk_freqs(profile->dev, &freqs, - &num_freqs); - if (err) - return -ENOSYS; - } else - return -ENOSYS; - - profile->devfreq_profile.freq_table = (unsigned long *)freqs; - profile->devfreq_profile.max_state = num_freqs; - - return 0; -} - -/* - * gk20a_scale_target(dev, *freq, flags) - * - * This function scales the clock - */ - -static int gk20a_scale_target(struct device *dev, unsigned long *freq, - u32 flags) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = platform->g; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_scale_profile *profile = g->scale_profile; - struct devfreq *devfreq = l->devfreq; - unsigned long local_freq = *freq; - unsigned long rounded_rate; - unsigned long min_freq = 0, max_freq = 0; - - /* - * Calculate floor and cap frequency values - * - * Policy : - * We have two APIs to clip the frequency - * 1. devfreq - * 2. pm_qos - * - * To calculate floor (min) freq, we select MAX of floor frequencies - * requested from both APIs - * To get cap (max) freq, we select MIN of max frequencies - * - * In case we have conflict (min_freq > max_freq) after above - * steps, we ensure that max_freq wins over min_freq - */ - min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq); - max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq); - - if (min_freq > max_freq) - min_freq = max_freq; - - /* Clip requested frequency */ - if (local_freq < min_freq) - local_freq = min_freq; - - if (local_freq > max_freq) - local_freq = max_freq; - - /* set the final frequency */ - rounded_rate = platform->clk_round_rate(dev, local_freq); - - /* Check for duplicate request */ - if (rounded_rate == g->last_freq) - return 0; - - if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate) - *freq = rounded_rate; - else { - g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); - *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); - } - - g->last_freq = *freq; - - /* postscale will only scale emc (dram clock) if evaluating - * gk20a_tegra_get_emc_rate() produces a new or different emc - * target because the load or_and gpufreq has changed */ - if (platform->postscale) - platform->postscale(dev, rounded_rate); - - return 0; -} - -/* - * update_load_estimate_gpmu(profile) - * - * Update load estimate using gpmu. The gpmu value is normalised - * based on the time it was asked last time. - */ - -static void update_load_estimate_gpmu(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_scale_profile *profile = g->scale_profile; - unsigned long dt; - u32 busy_time; - ktime_t t; - - t = ktime_get(); - dt = ktime_us_delta(t, profile->last_event_time); - - profile->dev_stat.total_time = dt; - profile->last_event_time = t; - nvgpu_pmu_load_norm(g, &busy_time); - profile->dev_stat.busy_time = (busy_time * dt) / 1000; -} - -/* - * gk20a_scale_suspend(dev) - * - * This function informs devfreq of suspend - */ - -void gk20a_scale_suspend(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct devfreq *devfreq = l->devfreq; - - if (!devfreq) - return; - - devfreq_suspend_device(devfreq); -} - -/* - * gk20a_scale_resume(dev) - * - * This functions informs devfreq of resume - */ - -void gk20a_scale_resume(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct devfreq *devfreq = l->devfreq; - - if (!devfreq) - return; - - g->last_freq = 0; - devfreq_resume_device(devfreq); -} - -/* - * gk20a_scale_get_dev_status(dev, *stat) - * - * This function queries the current device status. - */ - -static int gk20a_scale_get_dev_status(struct device *dev, - struct devfreq_dev_status *stat) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_scale_profile *profile = g->scale_profile; - struct gk20a_platform *platform = dev_get_drvdata(dev); - - /* update the software shadow */ - nvgpu_pmu_load_update(g); - - /* inform edp about new constraint */ - if (platform->prescale) - platform->prescale(dev); - - /* Make sure there are correct values for the current frequency */ - profile->dev_stat.current_frequency = - g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); - - /* Update load estimate */ - update_load_estimate_gpmu(dev); - - /* Copy the contents of the current device status */ - *stat = profile->dev_stat; - - /* Finally, clear out the local values */ - profile->dev_stat.total_time = 0; - profile->dev_stat.busy_time = 0; - - return 0; -} - -/* - * get_cur_freq(struct device *dev, unsigned long *freq) - * - * This function gets the current GPU clock rate. - */ - -static int get_cur_freq(struct device *dev, unsigned long *freq) -{ - struct gk20a *g = get_gk20a(dev); - *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); - return 0; -} - - -/* - * gk20a_scale_init(dev) - */ - -void gk20a_scale_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = platform->g; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_scale_profile *profile; - int err; - - if (g->scale_profile) - return; - - if (!platform->devfreq_governor && !platform->qos_notify) - return; - - profile = nvgpu_kzalloc(g, sizeof(*profile)); - - profile->dev = dev; - profile->dev_stat.busy = false; - - /* Create frequency table */ - err = gk20a_scale_make_freq_table(profile); - if (err || !profile->devfreq_profile.max_state) - goto err_get_freqs; - - profile->qos_min_freq = 0; - profile->qos_max_freq = UINT_MAX; - - /* Store device profile so we can access it if devfreq governor - * init needs that */ - g->scale_profile = profile; - - if (platform->devfreq_governor) { - struct devfreq *devfreq; - - profile->devfreq_profile.initial_freq = - profile->devfreq_profile.freq_table[0]; - profile->devfreq_profile.target = gk20a_scale_target; - profile->devfreq_profile.get_dev_status = - gk20a_scale_get_dev_status; - profile->devfreq_profile.get_cur_freq = get_cur_freq; - profile->devfreq_profile.polling_ms = 25; - - devfreq = devfreq_add_device(dev, - &profile->devfreq_profile, - platform->devfreq_governor, NULL); - - if (IS_ERR(devfreq)) - devfreq = NULL; - - l->devfreq = devfreq; - } - - /* Should we register QoS callback for this device? */ - if (platform->qos_notify) { - profile->qos_notify_block.notifier_call = - platform->qos_notify; - - pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &profile->qos_notify_block); - pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &profile->qos_notify_block); - } - - return; - -err_get_freqs: - nvgpu_kfree(g, profile); -} - -void gk20a_scale_exit(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a *g = platform->g; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int err; - - if (platform->qos_notify) { - pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &g->scale_profile->qos_notify_block); - pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &g->scale_profile->qos_notify_block); - } - - if (platform->devfreq_governor) { - err = devfreq_remove_device(l->devfreq); - l->devfreq = NULL; - } - - nvgpu_kfree(g, g->scale_profile); - g->scale_profile = NULL; -} - -/* - * gk20a_scale_hw_init(dev) - * - * Initialize hardware portion of the device - */ - -void gk20a_scale_hw_init(struct device *dev) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct gk20a_scale_profile *profile = platform->g->scale_profile; - - /* make sure that scaling has bee initialised */ - if (!profile) - return; - - profile->dev_stat.total_time = 0; - profile->last_event_time = ktime_get(); -} diff --git a/drivers/gpu/nvgpu/common/linux/scale.h b/drivers/gpu/nvgpu/common/linux/scale.h deleted file mode 100644 index c1e6fe86..00000000 --- a/drivers/gpu/nvgpu/common/linux/scale.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * gk20a clock scaling profile - * - * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef GK20A_SCALE_H -#define GK20A_SCALE_H - -#include - -struct clk; - -struct gk20a_scale_profile { - struct device *dev; - ktime_t last_event_time; - struct devfreq_dev_profile devfreq_profile; - struct devfreq_dev_status dev_stat; - struct notifier_block qos_notify_block; - unsigned long qos_min_freq; - unsigned long qos_max_freq; - void *private_data; -}; - -/* Initialization and de-initialization for module */ -void gk20a_scale_init(struct device *); -void gk20a_scale_exit(struct device *); -void gk20a_scale_hw_init(struct device *dev); - -#if defined(CONFIG_GK20A_DEVFREQ) -/* - * call when performing submit to notify scaling mechanism that the module is - * in use - */ -void gk20a_scale_notify_busy(struct device *); -void gk20a_scale_notify_idle(struct device *); - -void gk20a_scale_suspend(struct device *); -void gk20a_scale_resume(struct device *); -int gk20a_scale_qos_notify(struct notifier_block *nb, - unsigned long n, void *p); -#else -static inline void gk20a_scale_notify_busy(struct device *dev) {} -static inline void gk20a_scale_notify_idle(struct device *dev) {} -static inline void gk20a_scale_suspend(struct device *dev) {} -static inline void gk20a_scale_resume(struct device *dev) {} -static inline int gk20a_scale_qos_notify(struct notifier_block *nb, - unsigned long n, void *p) -{ - return -ENOSYS; -} -#endif - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c deleted file mode 100644 index 2ad5aabf..00000000 --- a/drivers/gpu/nvgpu/common/linux/sched.c +++ /dev/null @@ -1,676 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/gr_gk20a.h" -#include "sched.h" -#include "os_linux.h" -#include "ioctl_tsg.h" - -#include -#include - -ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, - size_t size, loff_t *off) -{ - struct gk20a_sched_ctrl *sched = filp->private_data; - struct gk20a *g = sched->g; - struct nvgpu_sched_event_arg event = { 0 }; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, - "filp=%p buf=%p size=%zu", filp, buf, size); - - if (size < sizeof(event)) - return -EINVAL; - size = sizeof(event); - - nvgpu_mutex_acquire(&sched->status_lock); - while (!sched->status) { - nvgpu_mutex_release(&sched->status_lock); - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq, - sched->status, 0); - if (err) - return err; - nvgpu_mutex_acquire(&sched->status_lock); - } - - event.reserved = 0; - event.status = sched->status; - - if (copy_to_user(buf, &event, size)) { - nvgpu_mutex_release(&sched->status_lock); - return -EFAULT; - } - - sched->status = 0; - - nvgpu_mutex_release(&sched->status_lock); - - return size; -} - -unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) -{ - struct gk20a_sched_ctrl *sched = filp->private_data; - struct gk20a *g = sched->g; - unsigned int mask = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); - - nvgpu_mutex_acquire(&sched->status_lock); - poll_wait(filp, &sched->readout_wq.wq, wait); - if (sched->status) - mask |= POLLIN | POLLRDNORM; - nvgpu_mutex_release(&sched->status_lock); - - return mask; -} - -static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_get_tsgs_args *arg) -{ - struct gk20a *g = sched->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", - arg->size, arg->buffer); - - if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { - arg->size = sched->bitmap_size; - return -ENOSPC; - } - - nvgpu_mutex_acquire(&sched->status_lock); - if (copy_to_user((void __user *)(uintptr_t)arg->buffer, - sched->active_tsg_bitmap, sched->bitmap_size)) { - nvgpu_mutex_release(&sched->status_lock); - return -EFAULT; - } - nvgpu_mutex_release(&sched->status_lock); - - return 0; -} - -static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_get_tsgs_args *arg) -{ - struct gk20a *g = sched->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", - arg->size, arg->buffer); - - if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { - arg->size = sched->bitmap_size; - return -ENOSPC; - } - - nvgpu_mutex_acquire(&sched->status_lock); - if (copy_to_user((void __user *)(uintptr_t)arg->buffer, - sched->recent_tsg_bitmap, sched->bitmap_size)) { - nvgpu_mutex_release(&sched->status_lock); - return -EFAULT; - } - - memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); - nvgpu_mutex_release(&sched->status_lock); - - return 0; -} - -static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_get_tsgs_by_pid_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u64 *bitmap; - unsigned int tsgid; - /* pid at user level corresponds to kernel tgid */ - pid_t tgid = (pid_t)arg->pid; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", - (pid_t)arg->pid, arg->size, arg->buffer); - - if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { - arg->size = sched->bitmap_size; - return -ENOSPC; - } - - bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size); - if (!bitmap) - return -ENOMEM; - - nvgpu_mutex_acquire(&sched->status_lock); - for (tsgid = 0; tsgid < f->num_channels; tsgid++) { - if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { - tsg = &f->tsg[tsgid]; - if (tsg->tgid == tgid) - NVGPU_SCHED_SET(tsgid, bitmap); - } - } - nvgpu_mutex_release(&sched->status_lock); - - if (copy_to_user((void __user *)(uintptr_t)arg->buffer, - bitmap, sched->bitmap_size)) - err = -EFAULT; - - nvgpu_kfree(sched->g, bitmap); - - return err; -} - -static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_tsg_get_params_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u32 tsgid = arg->tsgid; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); - - if (tsgid >= f->num_channels) - return -EINVAL; - - nvgpu_speculation_barrier(); - - tsg = &f->tsg[tsgid]; - if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) - return -ENXIO; - - arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ - arg->runlist_interleave = tsg->interleave_level; - arg->timeslice = tsg->timeslice_us; - - arg->graphics_preempt_mode = - tsg->gr_ctx.graphics_preempt_mode; - arg->compute_preempt_mode = - tsg->gr_ctx.compute_preempt_mode; - - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - - return 0; -} - -static int gk20a_sched_dev_ioctl_tsg_set_timeslice( - struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_tsg_timeslice_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u32 tsgid = arg->tsgid; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); - - if (tsgid >= f->num_channels) - return -EINVAL; - - nvgpu_speculation_barrier(); - - tsg = &f->tsg[tsgid]; - if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) - return -ENXIO; - - err = gk20a_busy(g); - if (err) - goto done; - - err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); - - gk20a_idle(g); - -done: - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - - return err; -} - -static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( - struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_tsg_runlist_interleave_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u32 tsgid = arg->tsgid; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); - - if (tsgid >= f->num_channels) - return -EINVAL; - - nvgpu_speculation_barrier(); - - tsg = &f->tsg[tsgid]; - if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) - return -ENXIO; - - err = gk20a_busy(g); - if (err) - goto done; - - err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); - - gk20a_idle(g); - -done: - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - - return err; -} - -static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched) -{ - struct gk20a *g = sched->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); - - nvgpu_mutex_acquire(&sched->control_lock); - sched->control_locked = true; - nvgpu_mutex_release(&sched->control_lock); - return 0; -} - -static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched) -{ - struct gk20a *g = sched->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); - - nvgpu_mutex_acquire(&sched->control_lock); - sched->control_locked = false; - nvgpu_mutex_release(&sched->control_lock); - return 0; -} - -static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_api_version_args *args) -{ - struct gk20a *g = sched->g; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); - - args->version = NVGPU_SCHED_API_VERSION; - return 0; -} - -static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_tsg_refcount_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u32 tsgid = arg->tsgid; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); - - if (tsgid >= f->num_channels) - return -EINVAL; - - nvgpu_speculation_barrier(); - - tsg = &f->tsg[tsgid]; - if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) - return -ENXIO; - - nvgpu_mutex_acquire(&sched->status_lock); - if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { - nvgpu_warn(g, "tsgid=%d already referenced", tsgid); - /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */ - nvgpu_mutex_release(&sched->status_lock); - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - return -ENXIO; - } - - /* keep reference on TSG, will be released on - * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close - */ - NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); - nvgpu_mutex_release(&sched->status_lock); - - return 0; -} - -static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched, - struct nvgpu_sched_tsg_refcount_args *arg) -{ - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - u32 tsgid = arg->tsgid; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); - - if (tsgid >= f->num_channels) - return -EINVAL; - - nvgpu_speculation_barrier(); - - nvgpu_mutex_acquire(&sched->status_lock); - if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { - nvgpu_mutex_release(&sched->status_lock); - nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid); - return -ENXIO; - } - NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); - nvgpu_mutex_release(&sched->status_lock); - - tsg = &f->tsg[tsgid]; - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - - return 0; -} - -int gk20a_sched_dev_open(struct inode *inode, struct file *filp) -{ - struct nvgpu_os_linux *l = container_of(inode->i_cdev, - struct nvgpu_os_linux, sched.cdev); - struct gk20a *g; - struct gk20a_sched_ctrl *sched; - int err = 0; - - g = gk20a_get(&l->g); - if (!g) - return -ENODEV; - sched = &l->sched_ctrl; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); - - if (!sched->sw_ready) { - err = gk20a_busy(g); - if (err) - goto free_ref; - - gk20a_idle(g); - } - - if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) { - err = -EBUSY; - goto free_ref; - } - - memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, - sched->bitmap_size); - memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); - - filp->private_data = sched; - nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched); - -free_ref: - if (err) - gk20a_put(g); - return err; -} - -long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct gk20a_sched_ctrl *sched = filp->private_data; - struct gk20a *g = sched->g; - u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; - int err = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); - - if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || - (_IOC_NR(cmd) == 0) || - (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || - (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) - return -EINVAL; - - memset(buf, 0, sizeof(buf)); - if (_IOC_DIR(cmd) & _IOC_WRITE) { - if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) - return -EFAULT; - } - - switch (cmd) { - case NVGPU_SCHED_IOCTL_GET_TSGS: - err = gk20a_sched_dev_ioctl_get_tsgs(sched, - (struct nvgpu_sched_get_tsgs_args *)buf); - break; - case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: - err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched, - (struct nvgpu_sched_get_tsgs_args *)buf); - break; - case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: - err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched, - (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); - break; - case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: - err = gk20a_sched_dev_ioctl_get_params(sched, - (struct nvgpu_sched_tsg_get_params_args *)buf); - break; - case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: - err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched, - (struct nvgpu_sched_tsg_timeslice_args *)buf); - break; - case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: - err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched, - (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); - break; - case NVGPU_SCHED_IOCTL_LOCK_CONTROL: - err = gk20a_sched_dev_ioctl_lock_control(sched); - break; - case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: - err = gk20a_sched_dev_ioctl_unlock_control(sched); - break; - case NVGPU_SCHED_IOCTL_GET_API_VERSION: - err = gk20a_sched_dev_ioctl_get_api_version(sched, - (struct nvgpu_sched_api_version_args *)buf); - break; - case NVGPU_SCHED_IOCTL_GET_TSG: - err = gk20a_sched_dev_ioctl_get_tsg(sched, - (struct nvgpu_sched_tsg_refcount_args *)buf); - break; - case NVGPU_SCHED_IOCTL_PUT_TSG: - err = gk20a_sched_dev_ioctl_put_tsg(sched, - (struct nvgpu_sched_tsg_refcount_args *)buf); - break; - default: - nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); - err = -ENOTTY; - } - - /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on - * purpose with NULL buffer and/or zero size to discover TSG bitmap - * size. We need to update user arguments in this case too, even - * if we return an error. - */ - if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { - if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) - err = -EFAULT; - } - - return err; -} - -int gk20a_sched_dev_release(struct inode *inode, struct file *filp) -{ - struct gk20a_sched_ctrl *sched = filp->private_data; - struct gk20a *g = sched->g; - struct fifo_gk20a *f = &g->fifo; - struct tsg_gk20a *tsg; - unsigned int tsgid; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); - - /* release any reference to TSGs */ - for (tsgid = 0; tsgid < f->num_channels; tsgid++) { - if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { - tsg = &f->tsg[tsgid]; - nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); - } - } - - /* unlock control */ - nvgpu_mutex_acquire(&sched->control_lock); - sched->control_locked = false; - nvgpu_mutex_release(&sched->control_lock); - - nvgpu_mutex_release(&sched->busy_lock); - gk20a_put(g); - return 0; -} - -void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); - - if (!sched->sw_ready) { - err = gk20a_busy(g); - if (err) { - WARN_ON(err); - return; - } - - gk20a_idle(g); - } - - nvgpu_mutex_acquire(&sched->status_lock); - NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); - NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); - sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; - nvgpu_mutex_release(&sched->status_lock); - nvgpu_cond_signal_interruptible(&sched->readout_wq); -} - -void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); - - nvgpu_mutex_acquire(&sched->status_lock); - NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); - - /* clear recent_tsg_bitmap as well: if app manager did not - * notice that TSG was previously added, no need to notify it - * if the TSG has been released in the meantime. If the - * TSG gets reallocated, app manager will be notified as usual. - */ - NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); - - /* do not set event_pending, we only want to notify app manager - * when TSGs are added, so that it can apply sched params - */ - nvgpu_mutex_release(&sched->status_lock); -} - -int gk20a_sched_ctrl_init(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - struct fifo_gk20a *f = &g->fifo; - int err; - - if (sched->sw_ready) - return 0; - - sched->g = g; - sched->bitmap_size = roundup(f->num_channels, 64) / 8; - sched->status = 0; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", - g, sched, sched->bitmap_size); - - sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); - if (!sched->active_tsg_bitmap) - return -ENOMEM; - - sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); - if (!sched->recent_tsg_bitmap) { - err = -ENOMEM; - goto free_active; - } - - sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); - if (!sched->ref_tsg_bitmap) { - err = -ENOMEM; - goto free_recent; - } - - nvgpu_cond_init(&sched->readout_wq); - - err = nvgpu_mutex_init(&sched->status_lock); - if (err) - goto free_ref; - - err = nvgpu_mutex_init(&sched->control_lock); - if (err) - goto free_status_lock; - - err = nvgpu_mutex_init(&sched->busy_lock); - if (err) - goto free_control_lock; - - sched->sw_ready = true; - - return 0; - -free_control_lock: - nvgpu_mutex_destroy(&sched->control_lock); -free_status_lock: - nvgpu_mutex_destroy(&sched->status_lock); -free_ref: - nvgpu_kfree(g, sched->ref_tsg_bitmap); -free_recent: - nvgpu_kfree(g, sched->recent_tsg_bitmap); -free_active: - nvgpu_kfree(g, sched->active_tsg_bitmap); - - return err; -} - -void gk20a_sched_ctrl_cleanup(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct gk20a_sched_ctrl *sched = &l->sched_ctrl; - - nvgpu_kfree(g, sched->active_tsg_bitmap); - nvgpu_kfree(g, sched->recent_tsg_bitmap); - nvgpu_kfree(g, sched->ref_tsg_bitmap); - sched->active_tsg_bitmap = NULL; - sched->recent_tsg_bitmap = NULL; - sched->ref_tsg_bitmap = NULL; - - nvgpu_mutex_destroy(&sched->status_lock); - nvgpu_mutex_destroy(&sched->control_lock); - nvgpu_mutex_destroy(&sched->busy_lock); - - sched->sw_ready = false; -} diff --git a/drivers/gpu/nvgpu/common/linux/sched.h b/drivers/gpu/nvgpu/common/linux/sched.h deleted file mode 100644 index a699bbea..00000000 --- a/drivers/gpu/nvgpu/common/linux/sched.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __NVGPU_SCHED_H -#define __NVGPU_SCHED_H - -struct gk20a; -struct gpu_ops; -struct tsg_gk20a; -struct poll_table_struct; - -struct gk20a_sched_ctrl { - struct gk20a *g; - - struct nvgpu_mutex control_lock; - bool control_locked; - bool sw_ready; - struct nvgpu_mutex status_lock; - struct nvgpu_mutex busy_lock; - - u64 status; - - size_t bitmap_size; - u64 *active_tsg_bitmap; - u64 *recent_tsg_bitmap; - u64 *ref_tsg_bitmap; - - struct nvgpu_cond readout_wq; -}; - -int gk20a_sched_dev_release(struct inode *inode, struct file *filp); -int gk20a_sched_dev_open(struct inode *inode, struct file *filp); -long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); -ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); -unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); - -void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); -void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); -int gk20a_sched_ctrl_init(struct gk20a *); - -void gk20a_sched_ctrl_cleanup(struct gk20a *g); - -#endif /* __NVGPU_SCHED_H */ diff --git a/drivers/gpu/nvgpu/common/linux/sim.c b/drivers/gpu/nvgpu/common/linux/sim.c deleted file mode 100644 index 8e964f39..00000000 --- a/drivers/gpu/nvgpu/common/linux/sim.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include "gk20a/gk20a.h" -#include "platform_gk20a.h" -#include "os_linux.h" -#include "module.h" - -void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) -{ - struct sim_nvgpu_linux *sim_linux = - container_of(sim, struct sim_nvgpu_linux, sim); - - writel(v, sim_linux->regs + r); -} - -u32 sim_readl(struct sim_nvgpu *sim, u32 r) -{ - struct sim_nvgpu_linux *sim_linux = - container_of(sim, struct sim_nvgpu_linux, sim); - - return readl(sim_linux->regs + r); -} - -void nvgpu_remove_sim_support_linux(struct gk20a *g) -{ - struct sim_nvgpu_linux *sim_linux; - - if (!g->sim) - return; - - sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); - if (sim_linux->regs) { - sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); - iounmap(sim_linux->regs); - sim_linux->regs = NULL; - } - nvgpu_kfree(g, sim_linux); - g->sim = NULL; -} - -int nvgpu_init_sim_support_linux(struct gk20a *g, - struct platform_device *dev) -{ - struct sim_nvgpu_linux *sim_linux; - int err = -ENOMEM; - - if (!nvgpu_platform_is_simulation(g)) - return 0; - - sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); - if (!sim_linux) - return err; - g->sim = &sim_linux->sim; - g->sim->g = g; - sim_linux->regs = nvgpu_ioremap_resource(dev, - GK20A_SIM_IORESOURCE_MEM, - &sim_linux->reg_mem); - if (IS_ERR(sim_linux->regs)) { - nvgpu_err(g, "failed to remap gk20a sim regs"); - err = PTR_ERR(sim_linux->regs); - goto fail; - } - sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux; - return 0; - -fail: - nvgpu_remove_sim_support_linux(g); - return err; -} diff --git a/drivers/gpu/nvgpu/common/linux/sim_pci.c b/drivers/gpu/nvgpu/common/linux/sim_pci.c deleted file mode 100644 index d37767b7..00000000 --- a/drivers/gpu/nvgpu/common/linux/sim_pci.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "gk20a/gk20a.h" -#include "os_linux.h" -#include "module.h" - -static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base) -{ - u32 cfg; - bool is_simulation = false; - - cfg = nvgpu_readl(g, sim_base + sim_config_r()); - if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v()) - is_simulation = true; - - return is_simulation; -} - -void nvgpu_remove_sim_support_linux_pci(struct gk20a *g) -{ - struct sim_nvgpu_linux *sim_linux; - bool is_simulation; - - is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); - - if (!is_simulation) { - return; - } - - if (!g->sim) { - nvgpu_warn(g, "sim_gk20a not allocated"); - return; - } - sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); - - if (sim_linux->regs) { - sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); - sim_linux->regs = NULL; - } - nvgpu_kfree(g, sim_linux); - g->sim = NULL; -} - -int nvgpu_init_sim_support_linux_pci(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - struct sim_nvgpu_linux *sim_linux; - int err = -ENOMEM; - bool is_simulation; - - is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); - __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation); - - if (!is_simulation) - return 0; - - sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); - if (!sim_linux) - return err; - g->sim = &sim_linux->sim; - g->sim->g = g; - sim_linux->regs = l->regs + sim_r(); - sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci; - - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/soc.c b/drivers/gpu/nvgpu/common/linux/soc.c deleted file mode 100644 index 1b27d6f1..00000000 --- a/drivers/gpu/nvgpu/common/linux/soc.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include -#ifdef CONFIG_TEGRA_HV_MANAGER -#include -#endif - -#include -#include "os_linux.h" -#include "platform_gk20a.h" - -bool nvgpu_platform_is_silicon(struct gk20a *g) -{ - return tegra_platform_is_silicon(); -} - -bool nvgpu_platform_is_simulation(struct gk20a *g) -{ - return tegra_platform_is_vdk(); -} - -bool nvgpu_platform_is_fpga(struct gk20a *g) -{ - return tegra_platform_is_fpga(); -} - -bool nvgpu_is_hypervisor_mode(struct gk20a *g) -{ - return is_tegra_hypervisor_mode(); -} - -bool nvgpu_is_bpmp_running(struct gk20a *g) -{ - return tegra_bpmp_running(); -} - -bool nvgpu_is_soc_t194_a01(struct gk20a *g) -{ - return ((tegra_get_chip_id() == TEGRA194 && - tegra_chip_get_revision() == TEGRA194_REVISION_A01) ? - true : false); -} - -#ifdef CONFIG_TEGRA_HV_MANAGER -/* When nvlink is enabled on dGPU, we need to use physical memory addresses. - * There is no SMMU translation. However, the device initially enumerates as a - * PCIe device. As such, when allocation memory for this PCIe device, the DMA - * framework ends up allocating memory using SMMU (if enabled in device tree). - * As a result, when we switch to nvlink, we need to use underlying physical - * addresses, even if memory mappings exist in SMMU. - * In addition, when stage-2 SMMU translation is enabled (for instance when HV - * is enabled), the addresses we get from dma_alloc are IPAs. We need to - * convert them to PA. - */ -static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) -{ - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct hyp_ipa_pa_info info; - int err; - u64 pa = 0ULL; - - err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa); - if (err < 0) { - /* WAR for bug 2096877 - * hyp_read_ipa_pa_info only looks up RAM mappings. - * assume one to one IPA:PA mapping for syncpt aperture - */ - u64 start = g->syncpt_unit_base; - u64 end = g->syncpt_unit_base + g->syncpt_unit_size; - if ((ipa >= start) && (ipa < end)) { - pa = ipa; - nvgpu_log(g, gpu_dbg_map_v, - "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n", - ipa, platform->vmid, pa); - } else { - nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d", - ipa, platform->vmid, err); - } - } else { - pa = info.base + info.offset; - nvgpu_log(g, gpu_dbg_map_v, - "ipa=%llx vmid=%d -> pa=%llx " - "base=%llx offset=%llx size=%llx\n", - ipa, platform->vmid, pa, info.base, - info.offset, info.size); - } - return pa; -} -#endif - -int nvgpu_init_soc_vars(struct gk20a *g) -{ -#ifdef CONFIG_TEGRA_HV_MANAGER - struct device *dev = dev_from_gk20a(g); - struct gk20a_platform *platform = gk20a_get_platform(dev); - int err; - - if (nvgpu_is_hypervisor_mode(g)) { - err = hyp_read_gid(&platform->vmid); - if (err) { - nvgpu_err(g, "failed to read vmid"); - return err; - } - platform->phys_addr = nvgpu_tegra_hv_ipa_pa; - } -#endif - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c deleted file mode 100644 index fad21351..00000000 --- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Semaphore Sync Framework Integration - * - * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "../common/linux/channel.h" - -#include "../drivers/staging/android/sync.h" - -#include "sync_sema_android.h" - -static const struct sync_timeline_ops gk20a_sync_timeline_ops; - -struct gk20a_sync_timeline { - struct sync_timeline obj; - u32 max; - u32 min; -}; - -/** - * The sync framework dups pts when merging fences. We share a single - * refcounted gk20a_sync_pt for each duped pt. - */ -struct gk20a_sync_pt { - struct gk20a *g; - struct nvgpu_ref refcount; - u32 thresh; - struct nvgpu_semaphore *sema; - struct gk20a_sync_timeline *obj; - - /* - * Use a spin lock here since it will have better performance - * than a mutex - there should be very little contention on this - * lock. - */ - struct nvgpu_spinlock lock; -}; - -struct gk20a_sync_pt_inst { - struct sync_pt pt; - struct gk20a_sync_pt *shared; -}; - -/** - * Compares sync pt values a and b, both of which will trigger either before - * or after ref (i.e. a and b trigger before ref, or a and b trigger after - * ref). Supplying ref allows us to handle wrapping correctly. - * - * Returns -1 if a < b (a triggers before b) - * 0 if a = b (a and b trigger at the same time) - * 1 if a > b (b triggers before a) - */ -static int __gk20a_sync_pt_compare_ref( - u32 ref, - u32 a, - u32 b) -{ - /* - * We normalize both a and b by subtracting ref from them. - * Denote the normalized values by a_n and b_n. Note that because - * of wrapping, a_n and/or b_n may be negative. - * - * The normalized values a_n and b_n satisfy: - * - a positive value triggers before a negative value - * - a smaller positive value triggers before a greater positive value - * - a smaller negative value (greater in absolute value) triggers - * before a greater negative value (smaller in absolute value). - * - * Thus we can just stick to unsigned arithmetic and compare - * (u32)a_n to (u32)b_n. - * - * Just to reiterate the possible cases: - * - * 1A) ...ref..a....b.... - * 1B) ...ref..b....a.... - * 2A) ...b....ref..a.... b_n < 0 - * 2B) ...a....ref..b.... a_n > 0 - * 3A) ...a....b....ref.. a_n < 0, b_n < 0 - * 3A) ...b....a....ref.. a_n < 0, b_n < 0 - */ - u32 a_n = a - ref; - u32 b_n = b - ref; - if (a_n < b_n) - return -1; - else if (a_n > b_n) - return 1; - else - return 0; -} - -static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) -{ - struct gk20a_sync_pt_inst *pti = - container_of(pt, struct gk20a_sync_pt_inst, pt); - return pti->shared; -} -static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) -{ - if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) - return NULL; - return (struct gk20a_sync_timeline *)obj; -} - -static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) -{ - struct gk20a_sync_pt *pt = - container_of(ref, struct gk20a_sync_pt, refcount); - struct gk20a *g = pt->g; - - if (pt->sema) - nvgpu_semaphore_put(pt->sema); - nvgpu_kfree(g, pt); -} - -static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( - struct gk20a *g, - struct gk20a_sync_timeline *obj, - struct nvgpu_semaphore *sema) -{ - struct gk20a_sync_pt *shared; - - shared = nvgpu_kzalloc(g, sizeof(*shared)); - if (!shared) - return NULL; - - nvgpu_ref_init(&shared->refcount); - shared->g = g; - shared->obj = obj; - shared->sema = sema; - shared->thresh = ++obj->max; /* sync framework has a lock */ - - nvgpu_spinlock_init(&shared->lock); - - nvgpu_semaphore_get(sema); - - return shared; -} - -static struct sync_pt *gk20a_sync_pt_create_inst( - struct gk20a *g, - struct gk20a_sync_timeline *obj, - struct nvgpu_semaphore *sema) -{ - struct gk20a_sync_pt_inst *pti; - - pti = (struct gk20a_sync_pt_inst *) - sync_pt_create(&obj->obj, sizeof(*pti)); - if (!pti) - return NULL; - - pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); - if (!pti->shared) { - sync_pt_free(&pti->pt); - return NULL; - } - return &pti->pt; -} - -static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) -{ - struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); - if (pt) - nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); -} - -static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) -{ - struct gk20a_sync_pt_inst *pti; - struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); - - pti = (struct gk20a_sync_pt_inst *) - sync_pt_create(&pt->obj->obj, sizeof(*pti)); - if (!pti) - return NULL; - pti->shared = pt; - nvgpu_ref_get(&pt->refcount); - return &pti->pt; -} - -/* - * This function must be able to run on the same sync_pt concurrently. This - * requires a lock to protect access to the sync_pt's internal data structures - * which are modified as a side effect of calling this function. - */ -static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) -{ - struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); - struct gk20a_sync_timeline *obj = pt->obj; - bool signaled = true; - - nvgpu_spinlock_acquire(&pt->lock); - if (!pt->sema) - goto done; - - /* Acquired == not realeased yet == active == not signaled. */ - signaled = !nvgpu_semaphore_is_acquired(pt->sema); - - if (signaled) { - /* Update min if necessary. */ - if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, - obj->min) == 1) - obj->min = pt->thresh; - - /* Release the semaphore to the pool. */ - nvgpu_semaphore_put(pt->sema); - pt->sema = NULL; - } -done: - nvgpu_spinlock_release(&pt->lock); - - return signaled; -} - -static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) -{ - bool a_expired; - bool b_expired; - struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); - struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); - - if (WARN_ON(pt_a->obj != pt_b->obj)) - return 0; - - /* Early out */ - if (a == b) - return 0; - - a_expired = gk20a_sync_pt_has_signaled(a); - b_expired = gk20a_sync_pt_has_signaled(b); - if (a_expired && !b_expired) { - /* Easy, a was earlier */ - return -1; - } else if (!a_expired && b_expired) { - /* Easy, b was earlier */ - return 1; - } - - /* Both a and b are expired (trigger before min) or not - * expired (trigger after min), so we can use min - * as a reference value for __gk20a_sync_pt_compare_ref. - */ - return __gk20a_sync_pt_compare_ref(pt_a->obj->min, - pt_a->thresh, pt_b->thresh); -} - -static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) -{ - return obj->min; -} - -static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, - char *str, int size) -{ - struct gk20a_sync_timeline *obj = - (struct gk20a_sync_timeline *)timeline; - snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); -} - -static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, - char *str, int size) -{ - struct nvgpu_semaphore *s = pt->sema; - - snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", - s->location.pool->page_idx, - nvgpu_semaphore_get_value(s), - nvgpu_semaphore_read(s)); -} - -static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, - int size) -{ - struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); - - if (pt->sema) { - gk20a_sync_pt_value_str_for_sema(pt, str, size); - return; - } - - snprintf(str, size, "%d", pt->thresh); -} - -static const struct sync_timeline_ops gk20a_sync_timeline_ops = { - .driver_name = "nvgpu_semaphore", - .dup = gk20a_sync_pt_dup_inst, - .has_signaled = gk20a_sync_pt_has_signaled, - .compare = gk20a_sync_pt_compare, - .free_pt = gk20a_sync_pt_free_inst, - .timeline_value_str = gk20a_sync_timeline_value_str, - .pt_value_str = gk20a_sync_pt_value_str, -}; - -/* Public API */ - -struct sync_fence *gk20a_sync_fence_fdget(int fd) -{ - struct sync_fence *fence = sync_fence_fdget(fd); - int i; - - if (!fence) - return NULL; - - for (i = 0; i < fence->num_fences; i++) { - struct fence *pt = fence->cbs[i].sync_pt; - struct sync_pt *spt = sync_pt_from_fence(pt); - struct sync_timeline *t; - - if (spt == NULL) { - sync_fence_put(fence); - return NULL; - } - - t = sync_pt_parent(spt); - if (t->ops != &gk20a_sync_timeline_ops) { - sync_fence_put(fence); - return NULL; - } - } - - return fence; -} - -struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) -{ - struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); - struct nvgpu_semaphore *sema; - - nvgpu_spinlock_acquire(&pt->lock); - sema = pt->sema; - if (sema) - nvgpu_semaphore_get(sema); - nvgpu_spinlock_release(&pt->lock); - - return sema; -} - -void gk20a_sync_timeline_signal(struct sync_timeline *timeline) -{ - sync_timeline_signal(timeline, 0); -} - -void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) -{ - sync_timeline_destroy(timeline); -} - -struct sync_timeline *gk20a_sync_timeline_create( - const char *name) -{ - struct gk20a_sync_timeline *obj; - - obj = (struct gk20a_sync_timeline *) - sync_timeline_create(&gk20a_sync_timeline_ops, - sizeof(struct gk20a_sync_timeline), - name); - if (!obj) - return NULL; - obj->max = 0; - obj->min = 0; - return &obj->obj; -} - -struct sync_fence *gk20a_sync_fence_create( - struct channel_gk20a *c, - struct nvgpu_semaphore *sema, - const char *fmt, ...) -{ - char name[30]; - va_list args; - struct sync_pt *pt; - struct sync_fence *fence; - struct gk20a *g = c->g; - - struct nvgpu_channel_linux *os_channel_priv = c->os_priv; - struct nvgpu_os_fence_framework *fence_framework = NULL; - struct gk20a_sync_timeline *timeline = NULL; - - fence_framework = &os_channel_priv->fence_framework; - - timeline = to_gk20a_timeline(fence_framework->timeline); - - pt = gk20a_sync_pt_create_inst(g, timeline, sema); - if (pt == NULL) - return NULL; - - va_start(args, fmt); - vsnprintf(name, sizeof(name), fmt, args); - va_end(args); - - fence = sync_fence_create(name, pt); - if (fence == NULL) { - sync_pt_free(pt); - return NULL; - } - return fence; -} diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h deleted file mode 100644 index 4fca7bed..00000000 --- a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Semaphore Sync Framework Integration - * - * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _GK20A_SYNC_H_ -#define _GK20A_SYNC_H_ - -struct sync_timeline; -struct sync_fence; -struct sync_pt; -struct nvgpu_semaphore; -struct fence; - -#ifdef CONFIG_SYNC -struct sync_timeline *gk20a_sync_timeline_create(const char *name); -void gk20a_sync_timeline_destroy(struct sync_timeline *); -void gk20a_sync_timeline_signal(struct sync_timeline *); -struct sync_fence *gk20a_sync_fence_create( - struct channel_gk20a *c, - struct nvgpu_semaphore *, - const char *fmt, ...); -struct sync_fence *gk20a_sync_fence_fdget(int fd); -struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt); -#else -static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} -static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} -static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) -{ - return NULL; -} -static inline struct sync_timeline *gk20a_sync_timeline_create( - const char *name) { - return NULL; -} -#endif - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.c b/drivers/gpu/nvgpu/common/linux/sysfs.c deleted file mode 100644 index e5995bb8..00000000 --- a/drivers/gpu/nvgpu/common/linux/sysfs.c +++ /dev/null @@ -1,1205 +0,0 @@ -/* - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include - -#include "sysfs.h" -#include "platform_gk20a.h" -#include "gk20a/pmu_gk20a.h" -#include "gk20a/gr_gk20a.h" -#include "gv11b/gr_gv11b.h" - -#define PTIMER_FP_FACTOR 1000000 - -#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) - -static ssize_t elcg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - if (val) { - g->elcg_enabled = true; - gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); - } else { - g->elcg_enabled = false; - gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); - } - - gk20a_idle(g); - - nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t elcg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0); -} - -static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store); - -static ssize_t blcg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val) - g->blcg_enabled = true; - else - g->blcg_enabled = false; - - err = gk20a_busy(g); - if (err) - return err; - - if (g->ops.clock_gating.blcg_bus_load_gating_prod) - g->ops.clock_gating.blcg_bus_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_ce_load_gating_prod) - g->ops.clock_gating.blcg_ce_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) - g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_fb_load_gating_prod) - g->ops.clock_gating.blcg_fb_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_fifo_load_gating_prod) - g->ops.clock_gating.blcg_fifo_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_gr_load_gating_prod) - g->ops.clock_gating.blcg_gr_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_ltc_load_gating_prod) - g->ops.clock_gating.blcg_ltc_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_pmu_load_gating_prod) - g->ops.clock_gating.blcg_pmu_load_gating_prod(g, - g->blcg_enabled); - if (g->ops.clock_gating.blcg_xbar_load_gating_prod) - g->ops.clock_gating.blcg_xbar_load_gating_prod(g, - g->blcg_enabled); - gk20a_idle(g); - - nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t blcg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0); -} - - -static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store); - -static ssize_t slcg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val) - g->slcg_enabled = true; - else - g->slcg_enabled = false; - - /* - * TODO: slcg_therm_load_gating is not enabled anywhere during - * init. Therefore, it would be incongruous to add it here. Once - * it is added to init, we should add it here too. - */ - err = gk20a_busy(g); - if (err) - return err; - - if (g->ops.clock_gating.slcg_bus_load_gating_prod) - g->ops.clock_gating.slcg_bus_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_ce2_load_gating_prod) - g->ops.clock_gating.slcg_ce2_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) - g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) - g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_fb_load_gating_prod) - g->ops.clock_gating.slcg_fb_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_fifo_load_gating_prod) - g->ops.clock_gating.slcg_fifo_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_gr_load_gating_prod) - g->ops.clock_gating.slcg_gr_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_ltc_load_gating_prod) - g->ops.clock_gating.slcg_ltc_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_perf_load_gating_prod) - g->ops.clock_gating.slcg_perf_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_priring_load_gating_prod) - g->ops.clock_gating.slcg_priring_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_pmu_load_gating_prod) - g->ops.clock_gating.slcg_pmu_load_gating_prod(g, - g->slcg_enabled); - if (g->ops.clock_gating.slcg_xbar_load_gating_prod) - g->ops.clock_gating.slcg_xbar_load_gating_prod(g, - g->slcg_enabled); - gk20a_idle(g); - - nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t slcg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0); -} - -static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store); - -static ssize_t ptimer_scale_factor_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - u32 src_freq_hz = platform->ptimer_src_freq; - u32 scaling_factor_fp; - ssize_t res; - - if (!src_freq_hz) { - nvgpu_err(g, "reference clk_m rate is not set correctly"); - return -EINVAL; - } - - scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / - ((u32)(src_freq_hz) / - (u32)(PTIMER_FP_FACTOR)); - res = snprintf(buf, - PAGE_SIZE, - "%u.%u\n", - scaling_factor_fp / PTIMER_FP_FACTOR, - scaling_factor_fp % PTIMER_FP_FACTOR); - - return res; - -} - -static DEVICE_ATTR(ptimer_scale_factor, - S_IRUGO, - ptimer_scale_factor_show, - NULL); - -static ssize_t ptimer_ref_freq_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - u32 src_freq_hz = platform->ptimer_src_freq; - ssize_t res; - - if (!src_freq_hz) { - nvgpu_err(g, "reference clk_m rate is not set correctly"); - return -EINVAL; - } - - res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ); - - return res; - -} - -static DEVICE_ATTR(ptimer_ref_freq, - S_IRUGO, - ptimer_ref_freq_show, - NULL); - -static ssize_t ptimer_src_freq_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - u32 src_freq_hz = platform->ptimer_src_freq; - ssize_t res; - - if (!src_freq_hz) { - nvgpu_err(g, "reference clk_m rate is not set correctly"); - return -EINVAL; - } - - res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz); - - return res; - -} - -static DEVICE_ATTR(ptimer_src_freq, - S_IRUGO, - ptimer_src_freq_show, - NULL); - - -#if defined(CONFIG_PM) -static ssize_t railgate_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned long railgate_enable = 0; - /* dev is guaranteed to be valid here. Ok to de-reference */ - struct gk20a *g = get_gk20a(dev); - int err; - - if (kstrtoul(buf, 10, &railgate_enable) < 0) - return -EINVAL; - - if (railgate_enable && !g->can_railgate) { - g->can_railgate = true; - pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); - } else if (railgate_enable == 0 && g->can_railgate) { - g->can_railgate = false; - pm_runtime_set_autosuspend_delay(dev, -1); - } - /* wake-up system to make rail-gating setting effective */ - err = gk20a_busy(g); - if (err) - return err; - gk20a_idle(g); - - nvgpu_info(g, "railgate is %s.", g->can_railgate ? - "enabled" : "disabled"); - - return count; -} - -static ssize_t railgate_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0); -} - -static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read, - railgate_enable_store); -#endif - -static ssize_t railgate_delay_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - int railgate_delay = 0, ret = 0; - struct gk20a *g = get_gk20a(dev); - int err; - - if (!g->can_railgate) { - nvgpu_info(g, "does not support power-gating"); - return count; - } - - ret = sscanf(buf, "%d", &railgate_delay); - if (ret == 1 && railgate_delay >= 0) { - g->railgate_delay = railgate_delay; - pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); - } else - nvgpu_err(g, "Invalid powergate delay"); - - /* wake-up system to make rail-gating delay effective immediately */ - err = gk20a_busy(g); - if (err) - return err; - gk20a_idle(g); - - return count; -} -static ssize_t railgate_delay_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay); -} -static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show, - railgate_delay_store); - -static ssize_t is_railgated_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a_platform *platform = dev_get_drvdata(dev); - bool is_railgated = 0; - - if (platform->is_railgated) - is_railgated = platform->is_railgated(dev); - - return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no"); -} -static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL); - -static ssize_t counters_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - u32 busy_cycles, total_cycles; - ssize_t res; - - nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); - - res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); - - return res; -} -static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); - -static ssize_t counters_show_reset(struct device *dev, - struct device_attribute *attr, char *buf) -{ - ssize_t res = counters_show(dev, attr, buf); - struct gk20a *g = get_gk20a(dev); - - nvgpu_pmu_reset_load_counters(g); - - return res; -} -static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); - -static ssize_t gk20a_load_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct gk20a *g = get_gk20a(dev); - u32 busy_time; - ssize_t res; - int err; - - if (!g->power_on) { - busy_time = 0; - } else { - err = gk20a_busy(g); - if (err) - return err; - - nvgpu_pmu_load_update(g); - nvgpu_pmu_load_norm(g, &busy_time); - gk20a_idle(g); - } - - res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); - - return res; -} -static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL); - -static ssize_t elpg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (!g->power_on) { - g->elpg_enabled = val ? true : false; - } else { - err = gk20a_busy(g); - if (err) - return -EAGAIN; - /* - * Since elpg is refcounted, we should not unnecessarily call - * enable/disable if it is already so. - */ - if (val && !g->elpg_enabled) { - g->elpg_enabled = true; - nvgpu_pmu_pg_global_enable(g, true); - - } else if (!val && g->elpg_enabled) { - if (g->ops.pmu.pmu_pg_engines_feature_list && - g->ops.pmu.pmu_pg_engines_feature_list(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != - NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { - nvgpu_pmu_pg_global_enable(g, false); - g->elpg_enabled = false; - } else { - g->elpg_enabled = false; - nvgpu_pmu_pg_global_enable(g, false); - } - } - gk20a_idle(g); - } - nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t elpg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0); -} - -static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store); - -static ssize_t ldiv_slowdown_factor_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) { - nvgpu_err(g, "parse error for input SLOWDOWN factor\n"); - return -EINVAL; - } - - if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) { - nvgpu_err(g, "Invalid SLOWDOWN factor\n"); - return -EINVAL; - } - - if (val == g->ldiv_slowdown_factor) - return count; - - if (!g->power_on) { - g->ldiv_slowdown_factor = val; - } else { - err = gk20a_busy(g); - if (err) - return -EAGAIN; - - g->ldiv_slowdown_factor = val; - - if (g->ops.pmu.pmu_pg_init_param) - g->ops.pmu.pmu_pg_init_param(g, - PMU_PG_ELPG_ENGINE_ID_GRAPHICS); - - gk20a_idle(g); - } - - nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor); - - return count; -} - -static ssize_t ldiv_slowdown_factor_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor); -} - -static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW, - ldiv_slowdown_factor_read, ldiv_slowdown_factor_store); - -static ssize_t mscg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_pmu *pmu = &g->pmu; - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (!g->power_on) { - g->mscg_enabled = val ? true : false; - } else { - err = gk20a_busy(g); - if (err) - return -EAGAIN; - /* - * Since elpg is refcounted, we should not unnecessarily call - * enable/disable if it is already so. - */ - if (val && !g->mscg_enabled) { - g->mscg_enabled = true; - if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, - PMU_PG_LPWR_FEATURE_MSCG)) { - if (!ACCESS_ONCE(pmu->mscg_stat)) { - WRITE_ONCE(pmu->mscg_stat, - PMU_MSCG_ENABLED); - /* make status visible */ - smp_mb(); - } - } - - } else if (!val && g->mscg_enabled) { - if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, - PMU_PG_LPWR_FEATURE_MSCG)) { - nvgpu_pmu_pg_global_enable(g, false); - WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); - /* make status visible */ - smp_mb(); - g->mscg_enabled = false; - if (g->elpg_enabled) - nvgpu_pmu_pg_global_enable(g, true); - } - g->mscg_enabled = false; - } - gk20a_idle(g); - } - nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t mscg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0); -} - -static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store); - -static ssize_t aelpg_param_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - int status = 0; - union pmu_ap_cmd ap_cmd; - int *paramlist = (int *)g->pmu.aelpg_param; - u32 defaultparam[5] = { - APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US, - APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US, - APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US, - APCTRL_POWER_BREAKEVEN_DEFAULT_US, - APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT - }; - - /* Get each parameter value from input string*/ - sscanf(buf, "%d %d %d %d %d", ¶mlist[0], ¶mlist[1], - ¶mlist[2], ¶mlist[3], ¶mlist[4]); - - /* If parameter value is 0 then reset to SW default values*/ - if ((paramlist[0] | paramlist[1] | paramlist[2] - | paramlist[3] | paramlist[4]) == 0x00) { - memcpy(paramlist, defaultparam, sizeof(defaultparam)); - } - - /* If aelpg is enabled & pmu is ready then post values to - * PMU else store then post later - */ - if (g->aelpg_enabled && g->pmu.pmu_ready) { - /* Disable AELPG */ - ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; - ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; - status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); - - /* Enable AELPG */ - nvgpu_aelpg_init(g); - nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); - } - - return count; -} - -static ssize_t aelpg_param_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, - "%d %d %d %d %d\n", g->pmu.aelpg_param[0], - g->pmu.aelpg_param[1], g->pmu.aelpg_param[2], - g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]); -} - -static DEVICE_ATTR(aelpg_param, ROOTRW, - aelpg_param_read, aelpg_param_store); - -static ssize_t aelpg_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int status = 0; - union pmu_ap_cmd ap_cmd; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = gk20a_busy(g); - if (err) - return err; - - if (g->pmu.pmu_ready) { - if (val && !g->aelpg_enabled) { - g->aelpg_enabled = true; - /* Enable AELPG */ - ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL; - ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; - status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); - } else if (!val && g->aelpg_enabled) { - g->aelpg_enabled = false; - /* Disable AELPG */ - ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; - ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; - status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); - } - } else { - nvgpu_info(g, "PMU is not ready, AELPG request failed"); - } - gk20a_idle(g); - - nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" : - "disabled"); - - return count; -} - -static ssize_t aelpg_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0); -} - -static DEVICE_ATTR(aelpg_enable, ROOTRW, - aelpg_enable_read, aelpg_enable_store); - - -static ssize_t allow_all_enable_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0); -} - -static ssize_t allow_all_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = gk20a_busy(g); - g->allow_all = (val ? true : false); - gk20a_idle(g); - - return count; -} - -static DEVICE_ATTR(allow_all, ROOTRW, - allow_all_enable_read, allow_all_enable_store); - -static ssize_t emc3d_ratio_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - g->emc3d_ratio = val; - - return count; -} - -static ssize_t emc3d_ratio_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio); -} - -static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); - -static ssize_t fmax_at_vmin_safe_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long gpu_fmax_at_vmin_hz = 0; - - if (g->ops.clk.get_fmax_at_vmin_safe) - gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g); - - return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz)); -} - -static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL); - -#ifdef CONFIG_PM -static ssize_t force_idle_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - int err = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val) { - if (g->forced_idle) - return count; /* do nothing */ - else { - err = __gk20a_do_idle(g, false); - if (!err) { - g->forced_idle = 1; - nvgpu_info(g, "gpu is idle : %d", - g->forced_idle); - } - } - } else { - if (!g->forced_idle) - return count; /* do nothing */ - else { - err = __gk20a_do_unidle(g); - if (!err) { - g->forced_idle = 0; - nvgpu_info(g, "gpu is idle : %d", - g->forced_idle); - } - } - } - - return count; -} - -static ssize_t force_idle_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0); -} - -static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); -#endif - -static ssize_t tpc_fs_mask_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (!g->gr.gpc_tpc_mask) - return -ENODEV; - - if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { - g->gr.gpc_tpc_mask[0] = val; - g->tpc_fs_mask_user = val; - - g->ops.gr.set_gpc_tpc_mask(g, 0); - - nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image); - g->gr.ctx_vars.local_golden_image = NULL; - g->gr.ctx_vars.golden_image_initialized = false; - g->gr.ctx_vars.golden_image_size = 0; - /* Cause next poweron to reinit just gr */ - g->gr.sw_ready = false; - } - - return count; -} - -static ssize_t tpc_fs_mask_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; - u32 gpc_index; - u32 tpc_fs_mask = 0; - int err = 0; - - err = gk20a_busy(g); - if (err) - return err; - - for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - if (g->ops.gr.get_gpc_tpc_mask) - tpc_fs_mask |= - g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << - (gr->max_tpc_per_gpc_count * gpc_index); - } - - gk20a_idle(g); - - return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask); -} - -static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store); - -static ssize_t min_timeslice_us_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us); -} - -static ssize_t min_timeslice_us_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val > g->max_timeslice_us) - return -EINVAL; - - g->min_timeslice_us = val; - - return count; -} - -static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read, - min_timeslice_us_store); - -static ssize_t max_timeslice_us_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us); -} - -static ssize_t max_timeslice_us_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val < g->min_timeslice_us) - return -EINVAL; - - g->max_timeslice_us = val; - - return count; -} - -static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read, - max_timeslice_us_store); - -static ssize_t czf_bypass_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val >= 4) - return -EINVAL; - - g->gr.czf_bypass = val; - - return count; -} - -static ssize_t czf_bypass_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return sprintf(buf, "%d\n", g->gr.czf_bypass); -} - -static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); - -static ssize_t pd_max_batches_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (val > 64) - return -EINVAL; - - g->gr.pd_max_batches = val; - - return count; -} - -static ssize_t pd_max_batches_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - - return sprintf(buf, "%d\n", g->gr.pd_max_batches); -} - -static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); - -static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; - unsigned long val = 0; - int err = -1; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - if (g->ops.gr.get_max_gfxp_wfi_timeout_count) { - if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g)) - return -EINVAL; - } - - gr->gfxp_wfi_timeout_count = val; - - if (g->ops.gr.init_preemption_state && g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - err = gr_gk20a_elpg_protected_call(g, - g->ops.gr.init_preemption_state(g)); - - gk20a_idle(g); - - if (err) - return err; - } - return count; -} - -static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; - int err = -1; - - if (count > 0 && buf[0] == 's') - /* sysclk */ - gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK; - else - /* usec */ - gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC; - - if (g->ops.gr.init_preemption_state && g->power_on) { - err = gk20a_busy(g); - if (err) - return err; - - err = gr_gk20a_elpg_protected_call(g, - g->ops.gr.init_preemption_state(g)); - - gk20a_idle(g); - - if (err) - return err; - } - - return count; -} - -static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; - u32 val = gr->gfxp_wfi_timeout_count; - - return snprintf(buf, PAGE_SIZE, "%d\n", val); -} - -static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; - - if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC) - return snprintf(buf, PAGE_SIZE, "usec\n"); - else - return snprintf(buf, PAGE_SIZE, "sysclk\n"); -} - -static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH), - gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store); - -static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH), - gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store); - -void nvgpu_remove_sysfs(struct device *dev) -{ - device_remove_file(dev, &dev_attr_elcg_enable); - device_remove_file(dev, &dev_attr_blcg_enable); - device_remove_file(dev, &dev_attr_slcg_enable); - device_remove_file(dev, &dev_attr_ptimer_scale_factor); - device_remove_file(dev, &dev_attr_ptimer_ref_freq); - device_remove_file(dev, &dev_attr_ptimer_src_freq); - device_remove_file(dev, &dev_attr_elpg_enable); - device_remove_file(dev, &dev_attr_mscg_enable); - device_remove_file(dev, &dev_attr_emc3d_ratio); - device_remove_file(dev, &dev_attr_ldiv_slowdown_factor); - - device_remove_file(dev, &dev_attr_fmax_at_vmin_safe); - - device_remove_file(dev, &dev_attr_counters); - device_remove_file(dev, &dev_attr_counters_reset); - device_remove_file(dev, &dev_attr_load); - device_remove_file(dev, &dev_attr_railgate_delay); - device_remove_file(dev, &dev_attr_is_railgated); -#ifdef CONFIG_PM - device_remove_file(dev, &dev_attr_force_idle); - device_remove_file(dev, &dev_attr_railgate_enable); -#endif - device_remove_file(dev, &dev_attr_aelpg_param); - device_remove_file(dev, &dev_attr_aelpg_enable); - device_remove_file(dev, &dev_attr_allow_all); - device_remove_file(dev, &dev_attr_tpc_fs_mask); - device_remove_file(dev, &dev_attr_min_timeslice_us); - device_remove_file(dev, &dev_attr_max_timeslice_us); - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - nvgpu_nvhost_remove_symlink(get_gk20a(dev)); -#endif - - device_remove_file(dev, &dev_attr_czf_bypass); - device_remove_file(dev, &dev_attr_pd_max_batches); - device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count); - device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit); - - if (strcmp(dev_name(dev), "gpu.0")) { - struct kobject *kobj = &dev->kobj; - struct device *parent = container_of((kobj->parent), - struct device, kobj); - sysfs_remove_link(&parent->kobj, "gpu.0"); - } -} - -int nvgpu_create_sysfs(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - int error = 0; - - error |= device_create_file(dev, &dev_attr_elcg_enable); - error |= device_create_file(dev, &dev_attr_blcg_enable); - error |= device_create_file(dev, &dev_attr_slcg_enable); - error |= device_create_file(dev, &dev_attr_ptimer_scale_factor); - error |= device_create_file(dev, &dev_attr_ptimer_ref_freq); - error |= device_create_file(dev, &dev_attr_ptimer_src_freq); - error |= device_create_file(dev, &dev_attr_elpg_enable); - error |= device_create_file(dev, &dev_attr_mscg_enable); - error |= device_create_file(dev, &dev_attr_emc3d_ratio); - error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor); - - error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe); - - error |= device_create_file(dev, &dev_attr_counters); - error |= device_create_file(dev, &dev_attr_counters_reset); - error |= device_create_file(dev, &dev_attr_load); - error |= device_create_file(dev, &dev_attr_railgate_delay); - error |= device_create_file(dev, &dev_attr_is_railgated); -#ifdef CONFIG_PM - error |= device_create_file(dev, &dev_attr_force_idle); - error |= device_create_file(dev, &dev_attr_railgate_enable); -#endif - error |= device_create_file(dev, &dev_attr_aelpg_param); - error |= device_create_file(dev, &dev_attr_aelpg_enable); - error |= device_create_file(dev, &dev_attr_allow_all); - error |= device_create_file(dev, &dev_attr_tpc_fs_mask); - error |= device_create_file(dev, &dev_attr_min_timeslice_us); - error |= device_create_file(dev, &dev_attr_max_timeslice_us); - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - error |= nvgpu_nvhost_create_symlink(g); -#endif - - error |= device_create_file(dev, &dev_attr_czf_bypass); - error |= device_create_file(dev, &dev_attr_pd_max_batches); - error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count); - error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit); - - if (strcmp(dev_name(dev), "gpu.0")) { - struct kobject *kobj = &dev->kobj; - struct device *parent = container_of((kobj->parent), - struct device, kobj); - error |= sysfs_create_link(&parent->kobj, - &dev->kobj, "gpu.0"); - } - - if (error) - nvgpu_err(g, "Failed to create sysfs attributes!\n"); - - return error; -} diff --git a/drivers/gpu/nvgpu/common/linux/sysfs.h b/drivers/gpu/nvgpu/common/linux/sysfs.h deleted file mode 100644 index 80925844..00000000 --- a/drivers/gpu/nvgpu/common/linux/sysfs.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef NVGPU_SYSFS_H -#define NVGPU_SYSFS_H - -struct device; - -int nvgpu_create_sysfs(struct device *dev); -void nvgpu_remove_sysfs(struct device *dev); - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/thread.c b/drivers/gpu/nvgpu/common/linux/thread.c deleted file mode 100644 index 92c556f2..00000000 --- a/drivers/gpu/nvgpu/common/linux/thread.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include - -int nvgpu_thread_proxy(void *threaddata) -{ - struct nvgpu_thread *thread = threaddata; - int ret = thread->fn(thread->data); - - thread->running = false; - return ret; -} - -int nvgpu_thread_create(struct nvgpu_thread *thread, - void *data, - int (*threadfn)(void *data), const char *name) -{ - struct task_struct *task = kthread_create(nvgpu_thread_proxy, - thread, name); - if (IS_ERR(task)) - return PTR_ERR(task); - - thread->task = task; - thread->fn = threadfn; - thread->data = data; - thread->running = true; - wake_up_process(task); - return 0; -}; - -void nvgpu_thread_stop(struct nvgpu_thread *thread) -{ - if (thread->task) { - kthread_stop(thread->task); - thread->task = NULL; - } -}; - -bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) -{ - return kthread_should_stop(); -}; - -bool nvgpu_thread_is_running(struct nvgpu_thread *thread) -{ - return ACCESS_ONCE(thread->running); -}; diff --git a/drivers/gpu/nvgpu/common/linux/timers.c b/drivers/gpu/nvgpu/common/linux/timers.c deleted file mode 100644 index d1aa641f..00000000 --- a/drivers/gpu/nvgpu/common/linux/timers.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" - -/* - * Returns 1 if the platform is pre-Si and should ignore the timeout checking. - * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the - * timeout check regardless of platform). - */ -static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout) -{ - if (timeout->flags & NVGPU_TIMER_NO_PRE_SI) - return 0; - - return !nvgpu_platform_is_silicon(timeout->g); -} - -/** - * nvgpu_timeout_init - Init timer. - * - * @g - nvgpu device. - * @timeout - The timer. - * @duration - Timeout in milliseconds or number of retries. - * @flags - Flags for timer. - * - * This configures the timeout to start the timeout duration now, i.e: when this - * function is called. Available flags to pass to @flags: - * - * %NVGPU_TIMER_CPU_TIMER - * %NVGPU_TIMER_RETRY_TIMER - * %NVGPU_TIMER_NO_PRE_SI - * %NVGPU_TIMER_SILENT_TIMEOUT - * - * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then - * a CPU timer is used by default. - */ -int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout, - u32 duration, unsigned long flags) -{ - if (flags & ~NVGPU_TIMER_FLAG_MASK) - return -EINVAL; - - memset(timeout, 0, sizeof(*timeout)); - - timeout->g = g; - timeout->flags = flags; - - if (flags & NVGPU_TIMER_RETRY_TIMER) - timeout->retries.max = duration; - else - timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(), - (s64)NSEC_PER_MSEC * duration)); - - return 0; -} - -static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout, - void *caller, - const char *fmt, va_list args) -{ - struct gk20a *g = timeout->g; - ktime_t now = ktime_get(); - - if (nvgpu_timeout_is_pre_silicon(timeout)) - return 0; - - if (ktime_after(now, ns_to_ktime(timeout->time))) { - if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { - char buf[128]; - - vsnprintf(buf, sizeof(buf), fmt, args); - - nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf); - } - - return -ETIMEDOUT; - } - - return 0; -} - -static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout, - void *caller, - const char *fmt, va_list args) -{ - struct gk20a *g = timeout->g; - - if (nvgpu_timeout_is_pre_silicon(timeout)) - return 0; - - if (timeout->retries.attempted >= timeout->retries.max) { - if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { - char buf[128]; - - vsnprintf(buf, sizeof(buf), fmt, args); - - nvgpu_err(g, "No more retries @ %pF %s", caller, buf); - } - - return -ETIMEDOUT; - } - - timeout->retries.attempted++; - - return 0; -} - -/** - * __nvgpu_timeout_expired_msg - Check if a timeout has expired. - * - * @timeout - The timeout to check. - * @caller - Address of the caller of this function. - * @fmt - The fmt string. - * - * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise. - * - * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout - * then a message is printed based on %fmt. - */ -int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout, - void *caller, const char *fmt, ...) -{ - int ret; - va_list args; - - va_start(args, fmt); - if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) - ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt, - args); - else - ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt, - args); - va_end(args); - - return ret; -} - -/** - * nvgpu_timeout_peek_expired - Check the status of a timeout. - * - * @timeout - The timeout to check. - * - * Returns non-zero if the timeout is expired, zero otherwise. In the case of - * retry timers this will not increment the underlying retry count. Also if the - * timer has expired no messages will be printed. - * - * This function honors the pre-Si check as well. - */ -int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout) -{ - if (nvgpu_timeout_is_pre_silicon(timeout)) - return 0; - - if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) - return timeout->retries.attempted >= timeout->retries.max; - else - return ktime_after(ktime_get(), ns_to_ktime(timeout->time)); -} - -/** - * nvgpu_udelay - Delay for some number of microseconds. - * - * @usecs - Microseconds to wait for. - * - * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly - * accurate. This is normally backed by a busy-loop so this means waits should - * be kept short, below 100us. If longer delays are necessary then - * nvgpu_msleep() should be preferred. - * - * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This - * function will attempt to not use a busy-loop. - */ -void nvgpu_udelay(unsigned int usecs) -{ - udelay(usecs); -} - -/** - * nvgpu_usleep_range - Sleep for a range of microseconds. - * - * @min_us - Minimum wait time. - * @max_us - Maximum wait time. - * - * Wait for some number of microseconds between @min_us and @max_us. This, - * unlike nvgpu_udelay(), will attempt to sleep for the passed number of - * microseconds instead of busy looping. Not all platforms support this, - * and in that case this reduces to nvgpu_udelay(min_us). - * - * Linux note: this is not safe to use in atomic context. If you are in - * atomic context you must use nvgpu_udelay(). - */ -void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us) -{ - usleep_range(min_us, max_us); -} - -/** - * nvgpu_msleep - Sleep for some milliseconds. - * - * @msecs - Sleep for at least this many milliseconds. - * - * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms - * or so) the sleep will be significantly longer due to scheduling overhead and - * mechanics. - */ -void nvgpu_msleep(unsigned int msecs) -{ - msleep(msecs); -} - -/** - * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock. - * - * Return a clock in millisecond units. The start time of the clock is - * unspecified; the time returned can be compared with older ones to measure - * durations. The source clock does not jump when the system clock is adjusted. - */ -s64 nvgpu_current_time_ms(void) -{ - return ktime_to_ms(ktime_get()); -} - -/** - * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock. - * - * Return a clock in nanosecond units. The start time of the clock is - * unspecified; the time returned can be compared with older ones to measure - * durations. The source clock does not jump when the system clock is adjusted. - */ -s64 nvgpu_current_time_ns(void) -{ - return ktime_to_ns(ktime_get()); -} - -/** - * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp. - * - * Return a "high resolution" time stamp. It does not really matter exactly what - * it is, so long as it generally returns unique values and monotonically - * increases - wrap around _is_ possible though in a system running for long - * enough. - * - * Note: what high resolution means is system dependent. - */ -u64 nvgpu_hr_timestamp(void) -{ - return get_cycles(); -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c deleted file mode 100644 index 0858e6b1..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Virtualized GPU Clock Interface - * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include "gk20a/gk20a.h" -#include "clk_vgpu.h" -#include "ctrl/ctrlclk.h" -#include "common/linux/platform_gk20a.h" - -static unsigned long -vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; - -static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain) -{ - struct tegra_vgpu_cmd_msg msg = {}; - struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; - int err; - unsigned long ret = 0; - - nvgpu_log_fn(g, " "); - - switch (api_domain) { - case CTRL_CLK_DOMAIN_GPCCLK: - msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE; - msg.handle = vgpu_get_handle(g); - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - if (err) - nvgpu_err(g, "%s failed - %d", __func__, err); - else - /* return frequency in Hz */ - ret = p->rate * 1000; - break; - case CTRL_CLK_DOMAIN_PWRCLK: - nvgpu_err(g, "unsupported clock: %u", api_domain); - break; - default: - nvgpu_err(g, "unknown clock: %u", api_domain); - break; - } - - return ret; -} - -static int vgpu_clk_set_rate(struct gk20a *g, - u32 api_domain, unsigned long rate) -{ - struct tegra_vgpu_cmd_msg msg = {}; - struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; - int err = -EINVAL; - - nvgpu_log_fn(g, " "); - - switch (api_domain) { - case CTRL_CLK_DOMAIN_GPCCLK: - msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE; - msg.handle = vgpu_get_handle(g); - - /* server dvfs framework requires frequency in kHz */ - p->rate = (u32)(rate / 1000); - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - if (err) - nvgpu_err(g, "%s failed - %d", __func__, err); - break; - case CTRL_CLK_DOMAIN_PWRCLK: - nvgpu_err(g, "unsupported clock: %u", api_domain); - break; - default: - nvgpu_err(g, "unknown clock: %u", api_domain); - break; - } - - return err; -} - -static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain) -{ - struct vgpu_priv_data *priv = vgpu_get_priv_data(g); - - return priv->constants.max_freq; -} - -void vgpu_init_clk_support(struct gk20a *g) -{ - g->ops.clk.get_rate = vgpu_clk_get_rate; - g->ops.clk.set_rate = vgpu_clk_set_rate; - g->ops.clk.get_maxrate = vgpu_clk_get_maxrate; -} - -long vgpu_clk_round_rate(struct device *dev, unsigned long rate) -{ - /* server will handle frequency rounding */ - return rate; -} - -int vgpu_clk_get_freqs(struct device *dev, - unsigned long **freqs, int *num_freqs) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - struct tegra_vgpu_cmd_msg msg = {}; - struct tegra_vgpu_get_gpu_freq_table_params *p = - &msg.params.get_gpu_freq_table; - unsigned int i; - int err; - - nvgpu_log_fn(g, " "); - - msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE; - msg.handle = vgpu_get_handle(g); - - p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - if (err) { - nvgpu_err(g, "%s failed - %d", __func__, err); - return err; - } - - /* return frequency in Hz */ - for (i = 0; i < p->num_freqs; i++) - vgpu_freq_table[i] = p->freqs[i] * 1000; - - *freqs = vgpu_freq_table; - *num_freqs = p->num_freqs; - - return 0; -} - -int vgpu_clk_cap_rate(struct device *dev, unsigned long rate) -{ - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct gk20a *g = platform->g; - struct tegra_vgpu_cmd_msg msg = {}; - struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; - int err = 0; - - nvgpu_log_fn(g, " "); - - msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE; - msg.handle = vgpu_get_handle(g); - p->rate = (u32)rate; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - if (err) { - nvgpu_err(g, "%s failed - %d", __func__, err); - return err; - } - - return 0; -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h deleted file mode 100644 index 8d477643..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/clk_vgpu.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Virtualized GPU Clock Interface - * - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _CLK_VIRT_H_ -#define _CLK_VIRT_H_ - -void vgpu_init_clk_support(struct gk20a *g); -long vgpu_clk_round_rate(struct device *dev, unsigned long rate); -int vgpu_clk_get_freqs(struct device *dev, - unsigned long **freqs, int *num_freqs); -int vgpu_clk_cap_rate(struct device *dev, unsigned long rate); -#endif diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c deleted file mode 100644 index 499a8eb4..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/fecs_trace_vgpu.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "common/linux/os_linux.h" -#include "vgpu/fecs_trace_vgpu.h" - -struct vgpu_fecs_trace { - struct tegra_hv_ivm_cookie *cookie; - struct nvgpu_ctxsw_ring_header *header; - struct nvgpu_ctxsw_trace_entry *entries; - int num_entries; - bool enabled; - void *buf; -}; - -int vgpu_fecs_trace_init(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - struct device_node *np = dev->of_node; - struct of_phandle_args args; - struct vgpu_fecs_trace *vcst; - u32 mempool; - int err; - - nvgpu_log_fn(g, " "); - - vcst = nvgpu_kzalloc(g, sizeof(*vcst)); - if (!vcst) - return -ENOMEM; - - err = of_parse_phandle_with_fixed_args(np, - "mempool-fecs-trace", 1, 0, &args); - if (err) { - nvgpu_info(g, "does not support fecs trace"); - goto fail; - } - __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); - - mempool = args.args[0]; - vcst->cookie = vgpu_ivm_mempool_reserve(mempool); - if (IS_ERR(vcst->cookie)) { - nvgpu_info(g, - "mempool %u reserve failed", mempool); - vcst->cookie = NULL; - err = -EINVAL; - goto fail; - } - - vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie), - vgpu_ivm_get_size(vcst->cookie)); - if (!vcst->buf) { - nvgpu_info(g, "ioremap_cache failed"); - err = -EINVAL; - goto fail; - } - vcst->header = vcst->buf; - vcst->num_entries = vcst->header->num_ents; - if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { - nvgpu_err(g, "entry size mismatch"); - goto fail; - } - vcst->entries = vcst->buf + sizeof(*vcst->header); - g->fecs_trace = (struct gk20a_fecs_trace *)vcst; - - return 0; -fail: - iounmap(vcst->buf); - if (vcst->cookie) - vgpu_ivm_mempool_unreserve(vcst->cookie); - nvgpu_kfree(g, vcst); - return err; -} - -int vgpu_fecs_trace_deinit(struct gk20a *g) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - - iounmap(vcst->buf); - vgpu_ivm_mempool_unreserve(vcst->cookie); - nvgpu_kfree(g, vcst); - return 0; -} - -int vgpu_fecs_trace_enable(struct gk20a *g) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - struct tegra_vgpu_cmd_msg msg = { - .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, - .handle = vgpu_get_handle(g), - }; - int err; - - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - WARN_ON(err); - vcst->enabled = !err; - return err; -} - -int vgpu_fecs_trace_disable(struct gk20a *g) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - struct tegra_vgpu_cmd_msg msg = { - .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, - .handle = vgpu_get_handle(g), - }; - int err; - - vcst->enabled = false; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - WARN_ON(err); - return err; -} - -bool vgpu_fecs_trace_is_enabled(struct gk20a *g) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - - return (vcst && vcst->enabled); -} - -int vgpu_fecs_trace_poll(struct gk20a *g) -{ - struct tegra_vgpu_cmd_msg msg = { - .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, - .handle = vgpu_get_handle(g), - }; - int err; - - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - WARN_ON(err); - return err; -} - -int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - - *buf = vcst->buf; - *size = vgpu_ivm_get_size(vcst->cookie); - return 0; -} - -int vgpu_free_user_buffer(struct gk20a *g) -{ - return 0; -} - -int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - unsigned long size = vgpu_ivm_get_size(vcst->cookie); - unsigned long vsize = vma->vm_end - vma->vm_start; - - size = min(size, vsize); - size = round_up(size, PAGE_SIZE); - - return remap_pfn_range(vma, vma->vm_start, - vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT, - size, - vma->vm_page_prot); -} - -#ifdef CONFIG_GK20A_CTXSW_TRACE -int vgpu_fecs_trace_max_entries(struct gk20a *g, - struct nvgpu_ctxsw_trace_filter *filter) -{ - struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; - - return vcst->header->num_ents; -} - -#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE -#error "FECS trace filter size mismatch!" -#endif - -int vgpu_fecs_trace_set_filter(struct gk20a *g, - struct nvgpu_ctxsw_trace_filter *filter) -{ - struct tegra_vgpu_cmd_msg msg = { - .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, - .handle = vgpu_get_handle(g), - }; - struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; - int err; - - memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - WARN_ON(err); - return err; -} - -void vgpu_fecs_trace_data_update(struct gk20a *g) -{ - gk20a_ctxsw_trace_wake_up(g, 0); -} -#endif /* CONFIG_GK20A_CTXSW_TRACE */ diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c deleted file mode 100644 index 054b019b..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "gk20a/gk20a.h" -#include "common/linux/vgpu/clk_vgpu.h" -#include "common/linux/platform_gk20a.h" -#include "common/linux/os_linux.h" - -#include - -#include - -static int gv11b_vgpu_probe(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct resource *r; - void __iomem *regs; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g); - struct gk20a *g = platform->g; - int ret; - - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode"); - if (!r) { - nvgpu_err(g, "failed to get usermode regs"); - return -ENXIO; - } - regs = devm_ioremap_resource(dev, r); - if (IS_ERR(regs)) { - nvgpu_err(g, "failed to map usermode regs"); - return PTR_ERR(regs); - } - l->usermode_regs = regs; - -#ifdef CONFIG_TEGRA_GK20A_NVHOST - ret = nvgpu_get_nvhost_dev(g); - if (ret) { - l->usermode_regs = NULL; - return ret; - } - - ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev, - &g->syncpt_unit_base, - &g->syncpt_unit_size); - if (ret) { - nvgpu_err(g, "Failed to get syncpt interface"); - return -ENOSYS; - } - g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); - nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", - g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size); -#endif - vgpu_init_clk_support(platform->g); - - return 0; -} - -struct gk20a_platform gv11b_vgpu_tegra_platform = { - .has_syncpoints = true, - - /* power management configuration */ - .can_railgate_init = false, - .can_elpg_init = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_elcg = false, - .enable_elpg = false, - .enable_aelpg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .ch_wdt_timeout_ms = 5000, - - .probe = gv11b_vgpu_probe, - - .clk_round_rate = vgpu_clk_round_rate, - .get_clk_freqs = vgpu_clk_get_freqs, - - /* frequency scaling configuration */ - .devfreq_governor = "userspace", - - .virtual_dev = true, -}; diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c deleted file mode 100644 index 830b04ac..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/platform_vgpu_tegra.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Tegra Virtualized GPU Platform Interface - * - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "gk20a/gk20a.h" -#include "common/linux/platform_gk20a.h" -#include "clk_vgpu.h" - -#include - -static int gk20a_tegra_probe(struct device *dev) -{ -#ifdef CONFIG_TEGRA_GK20A_NVHOST - struct gk20a_platform *platform = dev_get_drvdata(dev); - int ret; - - ret = nvgpu_get_nvhost_dev(platform->g); - if (ret) - return ret; - - vgpu_init_clk_support(platform->g); - return 0; -#else - return 0; -#endif -} - -struct gk20a_platform vgpu_tegra_platform = { - .has_syncpoints = true, - .aggressive_sync_destroy_thresh = 64, - - /* power management configuration */ - .can_railgate_init = false, - .can_elpg_init = false, - .enable_slcg = false, - .enable_blcg = false, - .enable_elcg = false, - .enable_elpg = false, - .enable_aelpg = false, - .can_slcg = false, - .can_blcg = false, - .can_elcg = false, - - .ch_wdt_timeout_ms = 5000, - - .probe = gk20a_tegra_probe, - - .clk_round_rate = vgpu_clk_round_rate, - .get_clk_freqs = vgpu_clk_get_freqs, - - /* frequency scaling configuration */ - .devfreq_governor = "userspace", - - .virtual_dev = true, -}; diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c deleted file mode 100644 index 5a8ed9fd..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/sysfs_vgpu.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "common/linux/platform_gk20a.h" - -static ssize_t vgpu_load_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct gk20a *g = get_gk20a(dev); - struct tegra_vgpu_cmd_msg msg = {0}; - struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load; - int err; - - msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD; - msg.handle = vgpu_get_handle(g); - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - if (err) - return err; - - return snprintf(buf, PAGE_SIZE, "%u\n", p->load); -} -static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); - -void vgpu_create_sysfs(struct device *dev) -{ - if (device_create_file(dev, &dev_attr_load)) - dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); -} - -void vgpu_remove_sysfs(struct device *dev) -{ - device_remove_file(dev, &dev_attr_load); -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c deleted file mode 100644 index b28b5013..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivc.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "common/linux/os_linux.h" - -int vgpu_ivc_init(struct gk20a *g, u32 elems, - const size_t *queue_sizes, u32 queue_start, u32 num_queues) -{ - struct platform_device *pdev = to_platform_device(dev_from_gk20a(g)); - - return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start, - num_queues); -} - -void vgpu_ivc_deinit(u32 queue_start, u32 num_queues) -{ - tegra_gr_comm_deinit(queue_start, num_queues); -} - -void vgpu_ivc_release(void *handle) -{ - tegra_gr_comm_release(handle); -} - -u32 vgpu_ivc_get_server_vmid(void) -{ - return tegra_gr_comm_get_server_vmid(); -} - -int vgpu_ivc_recv(u32 index, void **handle, void **data, - size_t *size, u32 *sender) -{ - return tegra_gr_comm_recv(index, handle, data, size, sender); -} - -int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size) -{ - return tegra_gr_comm_send(peer, index, data, size); -} - -int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle, - void **data, size_t *size) -{ - return tegra_gr_comm_sendrecv(peer, index, handle, data, size); -} - -u32 vgpu_ivc_get_peer_self(void) -{ - return TEGRA_GR_COMM_ID_SELF; -} - -void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr, - size_t *size) -{ - return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size); -} - -void vgpu_ivc_oob_put_ptr(void *handle) -{ - tegra_gr_comm_oob_put_ptr(handle); -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c deleted file mode 100644 index 90089de8..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_ivm.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include - -#include "common/linux/os_linux.h" - -struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id) -{ - return tegra_hv_mempool_reserve(id); -} - -int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie) -{ - return tegra_hv_mempool_unreserve(cookie); -} - -u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie) -{ - return cookie->ipa; -} - -u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie) -{ - return cookie->size; -} - -void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie) -{ - return ioremap_cache(vgpu_ivm_get_ipa(cookie), - vgpu_ivm_get_size(cookie)); -} - -void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie, - void *addr) -{ - iounmap(addr); -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c deleted file mode 100644 index 0d224eb9..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Virtualized GPU for Linux - * - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "vgpu_linux.h" -#include "vgpu/fecs_trace_vgpu.h" -#include "clk_vgpu.h" -#include "gk20a/tsg_gk20a.h" -#include "gk20a/channel_gk20a.h" -#include "gk20a/regops_gk20a.h" -#include "gm20b/hal_gm20b.h" - -#include "common/linux/module.h" -#include "common/linux/os_linux.h" -#include "common/linux/ioctl.h" -#include "common/linux/scale.h" -#include "common/linux/driver_common.h" -#include "common/linux/platform_gk20a.h" - -#include - -struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g) -{ - struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g)); - - return (struct vgpu_priv_data *)plat->vgpu_priv; -} - -static void vgpu_remove_support(struct gk20a *g) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - vgpu_remove_support_common(g); - - /* free mappings to registers, etc*/ - - if (l->bar1) { - iounmap(l->bar1); - l->bar1 = NULL; - } -} - -static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) -{ - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - nvgpu_mutex_init(&g->poweron_lock); - nvgpu_mutex_init(&g->poweroff_lock); - nvgpu_mutex_init(&g->ctxsw_disable_lock); - l->regs_saved = l->regs; - l->bar1_saved = l->bar1; - - g->aggressive_sync_destroy = platform->aggressive_sync_destroy; - g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; - g->has_syncpoints = platform->has_syncpoints; - g->ptimer_src_freq = platform->ptimer_src_freq; - g->can_railgate = platform->can_railgate_init; - g->railgate_delay = platform->railgate_delay_init; - - __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, - platform->unify_address_spaces); -} - -static int vgpu_init_support(struct platform_device *pdev) -{ - struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - struct gk20a *g = get_gk20a(&pdev->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - void __iomem *regs; - int err = 0; - - if (!r) { - nvgpu_err(g, "failed to get gk20a bar1"); - err = -ENXIO; - goto fail; - } - - if (r->name && !strcmp(r->name, "/vgpu")) { - regs = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(regs)) { - nvgpu_err(g, "failed to remap gk20a bar1"); - err = PTR_ERR(regs); - goto fail; - } - l->bar1 = regs; - l->bar1_mem = r; - } - - nvgpu_mutex_init(&g->dbg_sessions_lock); - nvgpu_mutex_init(&g->client_lock); - - nvgpu_init_list_node(&g->profiler_objects); - - g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); - if (!g->dbg_regops_tmp_buf) { - nvgpu_err(g, "couldn't allocate regops tmp buf"); - return -ENOMEM; - } - g->dbg_regops_tmp_buf_ops = - SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); - - g->remove_support = vgpu_remove_support; - return 0; - - fail: - vgpu_remove_support(g); - return err; -} - -int vgpu_pm_prepare_poweroff(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - int ret = 0; - - nvgpu_log_fn(g, " "); - - if (!g->power_on) - return 0; - - ret = gk20a_channel_suspend(g); - if (ret) - return ret; - - g->power_on = false; - - return ret; -} - -int vgpu_pm_finalize_poweron(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int err; - - nvgpu_log_fn(g, " "); - - if (g->power_on) - return 0; - - g->power_on = true; - - vgpu_detect_chip(g); - err = vgpu_init_hal(g); - if (err) - goto done; - - if (g->ops.ltc.init_fs_state) - g->ops.ltc.init_fs_state(g); - - err = nvgpu_init_ltc_support(g); - if (err) { - nvgpu_err(g, "failed to init ltc"); - goto done; - } - - err = vgpu_init_mm_support(g); - if (err) { - nvgpu_err(g, "failed to init gk20a mm"); - goto done; - } - - err = vgpu_init_fifo_support(g); - if (err) { - nvgpu_err(g, "failed to init gk20a fifo"); - goto done; - } - - err = vgpu_init_gr_support(g); - if (err) { - nvgpu_err(g, "failed to init gk20a gr"); - goto done; - } - - err = g->ops.chip_init_gpu_characteristics(g); - if (err) { - nvgpu_err(g, "failed to init gk20a gpu characteristics"); - goto done; - } - - err = nvgpu_finalize_poweron_linux(l); - if (err) - goto done; - -#ifdef CONFIG_GK20A_CTXSW_TRACE - gk20a_ctxsw_trace_init(g); -#endif - gk20a_sched_ctrl_init(g); - gk20a_channel_resume(g); - - g->sw_ready = true; - -done: - return err; -} - -static int vgpu_qos_notify(struct notifier_block *nb, - unsigned long n, void *data) -{ - struct gk20a_scale_profile *profile = - container_of(nb, struct gk20a_scale_profile, - qos_notify_block); - struct gk20a *g = get_gk20a(profile->dev); - u32 max_freq; - int err; - - nvgpu_log_fn(g, " "); - - max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); - err = vgpu_clk_cap_rate(profile->dev, max_freq); - if (err) - nvgpu_err(g, "%s failed, err=%d", __func__, err); - - return NOTIFY_OK; /* need notify call further */ -} - -static int vgpu_pm_qos_init(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct gk20a_scale_profile *profile = g->scale_profile; - - if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { - if (!profile) - return -EINVAL; - } else { - profile = nvgpu_kzalloc(g, sizeof(*profile)); - if (!profile) - return -ENOMEM; - g->scale_profile = profile; - } - - profile->dev = dev; - profile->qos_notify_block.notifier_call = vgpu_qos_notify; - pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &profile->qos_notify_block); - return 0; -} - -static void vgpu_pm_qos_remove(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - - pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, - &g->scale_profile->qos_notify_block); - nvgpu_kfree(g, g->scale_profile); - g->scale_profile = NULL; -} - -static int vgpu_pm_init(struct device *dev) -{ - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - unsigned long *freqs; - int num_freqs; - int err = 0; - - nvgpu_log_fn(g, " "); - - if (nvgpu_platform_is_simulation(g)) - return 0; - - __pm_runtime_disable(dev, false); - - if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) - gk20a_scale_init(dev); - - if (l->devfreq) { - /* set min/max frequency based on frequency table */ - err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs); - if (err) - return err; - - if (num_freqs < 1) - return -EINVAL; - - l->devfreq->min_freq = freqs[0]; - l->devfreq->max_freq = freqs[num_freqs - 1]; - } - - err = vgpu_pm_qos_init(dev); - if (err) - return err; - - return err; -} - -int vgpu_probe(struct platform_device *pdev) -{ - struct nvgpu_os_linux *l; - struct gk20a *gk20a; - int err; - struct device *dev = &pdev->dev; - struct gk20a_platform *platform = gk20a_get_platform(dev); - struct vgpu_priv_data *priv; - - if (!platform) { - dev_err(dev, "no platform data\n"); - return -ENODATA; - } - - l = kzalloc(sizeof(*l), GFP_KERNEL); - if (!l) { - dev_err(dev, "couldn't allocate gk20a support"); - return -ENOMEM; - } - gk20a = &l->g; - - nvgpu_log_fn(gk20a, " "); - - nvgpu_init_gk20a(gk20a); - - nvgpu_kmem_init(gk20a); - - err = nvgpu_init_enabled_flags(gk20a); - if (err) { - kfree(gk20a); - return err; - } - - l->dev = dev; - if (tegra_platform_is_vdk()) - __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); - - gk20a->is_virtual = true; - - priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); - if (!priv) { - kfree(gk20a); - return -ENOMEM; - } - - platform->g = gk20a; - platform->vgpu_priv = priv; - - err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); - if (err) - return err; - - vgpu_init_support(pdev); - - vgpu_init_vars(gk20a, platform); - - init_rwsem(&l->busy_lock); - - nvgpu_spinlock_init(&gk20a->mc_enable_lock); - - gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; - - /* Initialize the platform interface. */ - err = platform->probe(dev); - if (err) { - if (err == -EPROBE_DEFER) - nvgpu_info(gk20a, "platform probe failed"); - else - nvgpu_err(gk20a, "platform probe failed"); - return err; - } - - if (platform->late_probe) { - err = platform->late_probe(dev); - if (err) { - nvgpu_err(gk20a, "late probe failed"); - return err; - } - } - - err = vgpu_comm_init(gk20a); - if (err) { - nvgpu_err(gk20a, "failed to init comm interface"); - return -ENOSYS; - } - - priv->virt_handle = vgpu_connect(); - if (!priv->virt_handle) { - nvgpu_err(gk20a, "failed to connect to server node"); - vgpu_comm_deinit(); - return -ENOSYS; - } - - err = vgpu_get_constants(gk20a); - if (err) { - vgpu_comm_deinit(); - return err; - } - - err = vgpu_pm_init(dev); - if (err) { - nvgpu_err(gk20a, "pm init failed"); - return err; - } - - err = nvgpu_thread_create(&priv->intr_handler, gk20a, - vgpu_intr_thread, "gk20a"); - if (err) - return err; - - gk20a_debug_init(gk20a, "gpu.0"); - - /* Set DMA parameters to allow larger sgt lists */ - dev->dma_parms = &l->dma_parms; - dma_set_max_seg_size(dev, UINT_MAX); - - gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; - gk20a->timeouts_disabled_by_user = false; - nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0); - - vgpu_create_sysfs(dev); - gk20a_init_gr(gk20a); - - nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); - gk20a->gr.max_comptag_mem = totalram_pages - >> (10 - (PAGE_SHIFT - 10)); - - nvgpu_ref_init(&gk20a->refcount); - - return 0; -} - -int vgpu_remove(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct gk20a *g = get_gk20a(dev); - - nvgpu_log_fn(g, " "); - - vgpu_pm_qos_remove(dev); - if (g->remove_support) - g->remove_support(g); - - vgpu_comm_deinit(); - gk20a_sched_ctrl_cleanup(g); - gk20a_user_deinit(dev, &nvgpu_class); - vgpu_remove_sysfs(dev); - gk20a_get_platform(dev)->g = NULL; - gk20a_put(g); - - return 0; -} - -bool vgpu_is_reduced_bar1(struct gk20a *g) -{ - struct fifo_gk20a *f = &g->fifo; - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size; -} diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h deleted file mode 100644 index 38379cf2..00000000 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Virtualized GPU Linux Interfaces - * - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __VGPU_LINUX_H__ -#define __VGPU_LINUX_H__ - -struct device; -struct platform_device; - -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION - -#include - -int vgpu_pm_prepare_poweroff(struct device *dev); -int vgpu_pm_finalize_poweron(struct device *dev); -int vgpu_probe(struct platform_device *dev); -int vgpu_remove(struct platform_device *dev); - -void vgpu_create_sysfs(struct device *dev); -void vgpu_remove_sysfs(struct device *dev); -#else -/* define placeholders for functions used outside of vgpu */ - -static inline int vgpu_pm_prepare_poweroff(struct device *dev) -{ - return -ENOSYS; -} -static inline int vgpu_pm_finalize_poweron(struct device *dev) -{ - return -ENOSYS; -} -static inline int vgpu_probe(struct platform_device *dev) -{ - return -ENOSYS; -} -static inline int vgpu_remove(struct platform_device *dev) -{ - return -ENOSYS; -} -#endif - -#endif diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c deleted file mode 100644 index 136d4a10..00000000 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD -#include -#endif - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/mm_gk20a.h" - -bool nvgpu_addr_is_vidmem_page_alloc(u64 addr) -{ - return !!(addr & 1ULL); -} - -void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr) -{ - /* set bit 0 to indicate vidmem allocation */ - sg_dma_address(sgl) = (addr | 1ULL); -} - -struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl) -{ - u64 addr; - - addr = sg_dma_address(sgl); - - if (nvgpu_addr_is_vidmem_page_alloc(addr)) - addr = addr & ~1ULL; - else - WARN_ON(1); - - return (struct nvgpu_page_alloc *)(uintptr_t)addr; -} - -static struct sg_table *gk20a_vidbuf_map_dma_buf( - struct dma_buf_attachment *attach, enum dma_data_direction dir) -{ - struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv; - - return buf->mem->priv.sgt; -} - -static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *sgt, - enum dma_data_direction dir) -{ -} - -static void gk20a_vidbuf_release(struct dma_buf *dmabuf) -{ - struct nvgpu_vidmem_buf *buf = dmabuf->priv; - struct nvgpu_vidmem_linux *linux_buf = buf->priv; - struct gk20a *g = buf->g; - - vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", - dmabuf, buf->mem->size >> 10); - - if (linux_buf && linux_buf->dmabuf_priv_delete) - linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv); - - nvgpu_kfree(g, linux_buf); - nvgpu_vidmem_buf_free(g, buf); - - gk20a_put(g); -} - -static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) -{ - WARN_ON("Not supported"); - return NULL; -} - -static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, - unsigned long page_num) -{ - WARN_ON("Not supported"); - return NULL; -} - -static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - return -EINVAL; -} - -static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, - struct device *dev, void *priv, void (*delete)(void *priv)) -{ - struct nvgpu_vidmem_buf *buf = dmabuf->priv; - struct nvgpu_vidmem_linux *linux_buf = buf->priv; - - linux_buf->dmabuf_priv = priv; - linux_buf->dmabuf_priv_delete = delete; - - return 0; -} - -static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, - struct device *dev) -{ - struct nvgpu_vidmem_buf *buf = dmabuf->priv; - struct nvgpu_vidmem_linux *linux_buf = buf->priv; - - return linux_buf->dmabuf_priv; -} - -static const struct dma_buf_ops gk20a_vidbuf_ops = { - .map_dma_buf = gk20a_vidbuf_map_dma_buf, - .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, - .release = gk20a_vidbuf_release, - .kmap_atomic = gk20a_vidbuf_kmap_atomic, - .kmap = gk20a_vidbuf_kmap, - .mmap = gk20a_vidbuf_mmap, - .set_drvdata = gk20a_vidbuf_set_private, - .get_drvdata = gk20a_vidbuf_get_private, -}; - -static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf) -{ - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.priv = buf; - exp_info.ops = &gk20a_vidbuf_ops; - exp_info.size = buf->mem->size; - exp_info.flags = O_RDWR; - - return dma_buf_export(&exp_info); -} - -struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) -{ - struct nvgpu_vidmem_buf *buf = dmabuf->priv; - - if (dmabuf->ops != &gk20a_vidbuf_ops) - return NULL; - - return buf->g; -} - -int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) -{ - struct nvgpu_vidmem_buf *buf = NULL; - struct nvgpu_vidmem_linux *priv; - int err, fd; - - /* - * This ref is released when the dma_buf is closed. - */ - if (!gk20a_get(g)) - return -ENODEV; - - vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes); - - priv = nvgpu_kzalloc(g, sizeof(*priv)); - if (!priv) { - err = -ENOMEM; - goto fail; - } - - buf = nvgpu_vidmem_user_alloc(g, bytes); - if (IS_ERR(buf)) { - err = PTR_ERR(buf); - goto fail; - } - - priv->dmabuf = gk20a_vidbuf_export(buf); - if (IS_ERR(priv->dmabuf)) { - err = PTR_ERR(priv->dmabuf); - goto fail; - } - - buf->priv = priv; - -#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD - fd = tegra_alloc_fd(current->files, 1024, O_RDWR); -#else - fd = get_unused_fd_flags(O_RDWR); -#endif - if (fd < 0) { - /* ->release frees what we have done */ - dma_buf_put(priv->dmabuf); - return fd; - } - - /* fclose() on this drops one ref, freeing the dma buf */ - fd_install(fd, priv->dmabuf->file); - - vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", - priv->dmabuf, buf->mem->size >> 10); - - return fd; - -fail: - nvgpu_vidmem_buf_free(g, buf); - nvgpu_kfree(g, priv); - gk20a_put(g); - - vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err); - return err; -} - -int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, - void *buffer, u64 offset, u64 size, u32 cmd) -{ - struct nvgpu_vidmem_buf *vidmem_buf; - struct nvgpu_mem *mem; - int err = 0; - - if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM) - return -EINVAL; - - vidmem_buf = dmabuf->priv; - mem = vidmem_buf->mem; - - switch (cmd) { - case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: - nvgpu_mem_rd_n(g, mem, offset, buffer, size); - break; - - case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE: - nvgpu_mem_wr_n(g, mem, offset, buffer, size); - break; - - default: - err = -EINVAL; - } - - return err; -} - -void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) -{ - nvgpu_free(vidmem->allocator, - (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); - nvgpu_free_sgtable(g, &vidmem->priv.sgt); -} diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c deleted file mode 100644 index baa77515..00000000 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "gk20a/gk20a.h" -#include "gk20a/mm_gk20a.h" - -#include "platform_gk20a.h" -#include "os_linux.h" -#include "dmabuf.h" - -static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) -{ - u32 core_flags = 0; - - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) - core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) - core_flags |= NVGPU_VM_MAP_CACHEABLE; - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) - core_flags |= NVGPU_VM_MAP_IO_COHERENT; - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) - core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) - core_flags |= NVGPU_VM_MAP_L3_ALLOC; - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) - core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; - - if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) - nvgpu_warn(g, "Ignoring deprecated flag: " - "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); - - return core_flags; -} - -static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( - struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) -{ - struct nvgpu_rbtree_node *node = NULL; - struct nvgpu_rbtree_node *root = vm->mapped_buffers; - - nvgpu_rbtree_enum_start(0, &node, root); - - while (node) { - struct nvgpu_mapped_buf *mapped_buffer = - mapped_buffer_from_rbtree_node(node); - - if (mapped_buffer->os_priv.dmabuf == dmabuf && - mapped_buffer->kind == kind) - return mapped_buffer; - - nvgpu_rbtree_enum_next(&node, node); - } - - return NULL; -} - -int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset) -{ - struct nvgpu_mapped_buf *mapped_buffer; - struct gk20a *g = gk20a_from_vm(vm); - - nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); - if (!mapped_buffer) { - nvgpu_mutex_release(&vm->update_gmmu_lock); - return -EINVAL; - } - - *dmabuf = mapped_buffer->os_priv.dmabuf; - *offset = gpu_va - mapped_buffer->addr; - - nvgpu_mutex_release(&vm->update_gmmu_lock); - - return 0; -} - -u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) -{ - return os_buf->dmabuf->size; -} - -/* - * vm->update_gmmu_lock must be held. This checks to see if we already have - * mapped the passed buffer into this VM. If so, just return the existing - * mapping address. - */ -struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, - struct nvgpu_os_buffer *os_buf, - u64 map_addr, - u32 flags, - int kind) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct nvgpu_mapped_buf *mapped_buffer = NULL; - - if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { - mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); - if (!mapped_buffer) - return NULL; - - if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || - mapped_buffer->kind != (u32)kind) - return NULL; - } else { - mapped_buffer = - __nvgpu_vm_find_mapped_buf_reverse(vm, - os_buf->dmabuf, - kind); - if (!mapped_buffer) - return NULL; - } - - if (mapped_buffer->flags != flags) - return NULL; - - /* - * If we find the mapping here then that means we have mapped it already - * and the prior pin and get must be undone. - */ - gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, - mapped_buffer->os_priv.sgt); - dma_buf_put(os_buf->dmabuf); - - nvgpu_log(g, gpu_dbg_map, - "gv: 0x%04x_%08x + 0x%-7zu " - "[dma: 0x%010llx, pa: 0x%010llx] " - "pgsz=%-3dKb as=%-2d " - "flags=0x%x apt=%s (reused)", - u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), - os_buf->dmabuf->size, - (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), - (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), - vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, - vm_aspace_id(vm), - mapped_buffer->flags, - nvgpu_aperture_str(g, - gk20a_dmabuf_aperture(g, os_buf->dmabuf))); - - return mapped_buffer; -} - -int nvgpu_vm_map_linux(struct vm_gk20a *vm, - struct dma_buf *dmabuf, - u64 offset_align, - u32 flags, - s16 compr_kind, - s16 incompr_kind, - int rw_flag, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *batch, - u64 *gpu_va) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct device *dev = dev_from_gk20a(g); - struct nvgpu_os_buffer os_buf; - struct sg_table *sgt; - struct nvgpu_sgt *nvgpu_sgt = NULL; - struct nvgpu_mapped_buf *mapped_buffer = NULL; - struct dma_buf_attachment *attachment; - u64 map_addr = 0ULL; - int err = 0; - - if (flags & NVGPU_VM_MAP_FIXED_OFFSET) - map_addr = offset_align; - - sgt = gk20a_mm_pin(dev, dmabuf, &attachment); - if (IS_ERR(sgt)) { - nvgpu_warn(g, "Failed to pin dma_buf!"); - return PTR_ERR(sgt); - } - os_buf.dmabuf = dmabuf; - os_buf.attachment = attachment; - os_buf.dev = dev; - - if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { - err = -EINVAL; - goto clean_up; - } - - nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); - if (!nvgpu_sgt) { - err = -ENOMEM; - goto clean_up; - } - - mapped_buffer = nvgpu_vm_map(vm, - &os_buf, - nvgpu_sgt, - map_addr, - mapping_size, - buffer_offset, - rw_flag, - flags, - compr_kind, - incompr_kind, - batch, - gk20a_dmabuf_aperture(g, dmabuf)); - - nvgpu_sgt_free(g, nvgpu_sgt); - - if (IS_ERR(mapped_buffer)) { - err = PTR_ERR(mapped_buffer); - goto clean_up; - } - - mapped_buffer->os_priv.dmabuf = dmabuf; - mapped_buffer->os_priv.attachment = attachment; - mapped_buffer->os_priv.sgt = sgt; - - *gpu_va = mapped_buffer->addr; - return 0; - -clean_up: - gk20a_mm_unpin(dev, dmabuf, attachment, sgt); - - return err; -} - -int nvgpu_vm_map_buffer(struct vm_gk20a *vm, - int dmabuf_fd, - u64 *offset_align, - u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ - s16 compr_kind, - s16 incompr_kind, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *batch) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct dma_buf *dmabuf; - u64 ret_va; - int err = 0; - - /* get ref to the mem handle (released on unmap_locked) */ - dmabuf = dma_buf_get(dmabuf_fd); - if (IS_ERR(dmabuf)) { - nvgpu_warn(g, "%s: fd %d is not a dmabuf", - __func__, dmabuf_fd); - return PTR_ERR(dmabuf); - } - - /* verify that we're not overflowing the buffer, i.e. - * (buffer_offset + mapping_size)> dmabuf->size. - * - * Since buffer_offset + mapping_size could overflow, first check - * that mapping size < dmabuf_size, at which point we can subtract - * mapping_size from both sides for the final comparison. - */ - if ((mapping_size > dmabuf->size) || - (buffer_offset > (dmabuf->size - mapping_size))) { - nvgpu_err(g, - "buf size %llx < (offset(%llx) + map_size(%llx))\n", - (u64)dmabuf->size, buffer_offset, mapping_size); - dma_buf_put(dmabuf); - return -EINVAL; - } - - err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); - if (err) { - dma_buf_put(dmabuf); - return err; - } - - err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align, - nvgpu_vm_translate_linux_flags(g, flags), - compr_kind, incompr_kind, - gk20a_mem_flag_none, - buffer_offset, - mapping_size, - batch, - &ret_va); - - if (!err) - *offset_align = ret_va; - else - dma_buf_put(dmabuf); - - return err; -} - -/* - * This is the function call-back for freeing OS specific components of an - * nvgpu_mapped_buf. This should most likely never be called outside of the - * core MM framework! - * - * Note: the VM lock will be held. - */ -void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) -{ - struct vm_gk20a *vm = mapped_buffer->vm; - - gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, - mapped_buffer->os_priv.attachment, - mapped_buffer->os_priv.sgt); - - dma_buf_put(mapped_buffer->os_priv.dmabuf); -} diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 117920da..7e0aee11 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -41,7 +41,7 @@ #include "fecs_trace_gk20a.h" #include "gk20a.h" #include "gr_gk20a.h" -#include "common/linux/os_linux.h" +#include "os/linux/os_linux.h" #include diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c index 057527a9..07e281a5 100644 --- a/drivers/gpu/nvgpu/gp106/clk_gp106.c +++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c @@ -24,7 +24,7 @@ #ifdef CONFIG_DEBUG_FS #include -#include "common/linux/os_linux.h" +#include "os/linux/os_linux.h" #endif #include diff --git a/drivers/gpu/nvgpu/gp106/mclk_gp106.c b/drivers/gpu/nvgpu/gp106/mclk_gp106.c index 074aec92..4ea9e6f6 100644 --- a/drivers/gpu/nvgpu/gp106/mclk_gp106.c +++ b/drivers/gpu/nvgpu/gp106/mclk_gp106.c @@ -28,7 +28,7 @@ #include "gk20a/gk20a.h" #ifdef CONFIG_DEBUG_FS #include -#include "common/linux/os_linux.h" +#include "os/linux/os_linux.h" #endif #include "gp106/mclk_gp106.h" diff --git a/drivers/gpu/nvgpu/gp106/therm_gp106.c b/drivers/gpu/nvgpu/gp106/therm_gp106.c index b3862abe..b1f6fefb 100644 --- a/drivers/gpu/nvgpu/gp106/therm_gp106.c +++ b/drivers/gpu/nvgpu/gp106/therm_gp106.c @@ -27,7 +27,7 @@ #ifdef CONFIG_DEBUG_FS #include -#include "common/linux/os_linux.h" +#include "os/linux/os_linux.h" #endif #include diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b.h b/drivers/gpu/nvgpu/gp10b/platform_gp10b.h deleted file mode 100644 index d256d126..00000000 --- a/drivers/gpu/nvgpu/gp10b/platform_gp10b.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * GP10B Platform (SoC) Interface - * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _GP10B_PLATFORM_H_ -#define _GP10B_PLATFORM_H_ - -struct device; - -int gp10b_tegra_get_clocks(struct device *dev); -int gp10b_tegra_reset_assert(struct device *dev); -int gp10b_tegra_reset_deassert(struct device *dev); -void gp10b_tegra_scale_init(struct device *dev); -long gp10b_round_clk_rate(struct device *dev, unsigned long rate); -int gp10b_clk_get_freqs(struct device *dev, - unsigned long **freqs, int *num_freqs); -void gp10b_tegra_prescale(struct device *dev); -void gp10b_tegra_postscale(struct device *pdev, unsigned long freq); -#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h index ba6012ec..7d50d222 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h @@ -25,6 +25,8 @@ #ifdef CONFIG_TEGRA_GK20A_NVHOST +#include + struct nvgpu_nvhost_dev; struct gk20a; struct sync_pt; diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c new file mode 100644 index 00000000..32b333f1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -0,0 +1,1786 @@ +/* + * Color decompression engine support + * + * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/mm_gk20a.h" +#include "gk20a/fence_gk20a.h" +#include "gk20a/gr_gk20a.h" + +#include "cde.h" +#include "os_linux.h" +#include "dmabuf.h" +#include "channel.h" +#include "cde_gm20b.h" +#include "cde_gp10b.h" + +#include +#include + +static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); +static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); + +#define CTX_DELETE_TIME 1000 + +#define MAX_CTX_USE_COUNT 42 +#define MAX_CTX_RETRY_TIME 2000 + +static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) +{ + struct nvgpu_mapped_buf *buffer; + dma_addr_t addr = 0; + struct gk20a *g = gk20a_from_vm(vm); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); + if (buffer) + addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return addr; +} + +static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) +{ + unsigned int i; + + for (i = 0; i < cde_ctx->num_bufs; i++) { + struct nvgpu_mem *mem = cde_ctx->mem + i; + nvgpu_dma_unmap_free(cde_ctx->vm, mem); + } + + nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); + + cde_ctx->convert_cmd = NULL; + cde_ctx->init_convert_cmd = NULL; + cde_ctx->num_bufs = 0; + cde_ctx->num_params = 0; + cde_ctx->init_cmd_num_entries = 0; + cde_ctx->convert_cmd_num_entries = 0; + cde_ctx->init_cmd_executed = false; +} + +static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) +__must_hold(&cde_app->mutex) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct channel_gk20a *ch = cde_ctx->ch; + struct vm_gk20a *vm = ch->vm; + + trace_gk20a_cde_remove_ctx(cde_ctx); + + /* release mapped memory */ + gk20a_deinit_cde_img(cde_ctx); + nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, + cde_ctx->backing_store_vaddr); + + /* + * free the channel + * gk20a_channel_close() will also unbind the channel from TSG + */ + gk20a_channel_close(ch); + nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); + + /* housekeeping on app */ + nvgpu_list_del(&cde_ctx->list); + l->cde_app.ctx_count--; + nvgpu_kfree(g, cde_ctx); +} + +static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, + bool wait_finish) +__releases(&cde_app->mutex) +__acquires(&cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; + + /* permanent contexts do not have deleter works */ + if (!cde_ctx->is_temporary) + return; + + if (wait_finish) { + nvgpu_mutex_release(&cde_app->mutex); + cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); + nvgpu_mutex_acquire(&cde_app->mutex); + } else { + cancel_delayed_work(&cde_ctx->ctx_deleter_work); + } +} + +static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; + + /* safe to go off the mutex in cancel_deleter since app is + * deinitialised; no new jobs are started. deleter works may be only at + * waiting for the mutex or before, going to abort */ + + nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, + &cde_app->free_contexts, gk20a_cde_ctx, list) { + gk20a_cde_cancel_deleter(cde_ctx, true); + gk20a_cde_remove_ctx(cde_ctx); + } + + nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, + &cde_app->used_contexts, gk20a_cde_ctx, list) { + gk20a_cde_cancel_deleter(cde_ctx, true); + gk20a_cde_remove_ctx(cde_ctx); + } +} + +static void gk20a_cde_stop(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + + /* prevent further conversions and delayed works from working */ + cde_app->initialised = false; + /* free all data, empty the list */ + gk20a_cde_remove_contexts(l); +} + +void gk20a_cde_destroy(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + + if (!cde_app->initialised) + return; + + nvgpu_mutex_acquire(&cde_app->mutex); + gk20a_cde_stop(l); + nvgpu_mutex_release(&cde_app->mutex); + + nvgpu_mutex_destroy(&cde_app->mutex); +} + +void gk20a_cde_suspend(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; + + if (!cde_app->initialised) + return; + + nvgpu_mutex_acquire(&cde_app->mutex); + + nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, + &cde_app->free_contexts, gk20a_cde_ctx, list) { + gk20a_cde_cancel_deleter(cde_ctx, false); + } + + nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, + &cde_app->used_contexts, gk20a_cde_ctx, list) { + gk20a_cde_cancel_deleter(cde_ctx, false); + } + + nvgpu_mutex_release(&cde_app->mutex); + +} + +static int gk20a_cde_create_context(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a_cde_ctx *cde_ctx; + + cde_ctx = gk20a_cde_allocate_context(l); + if (IS_ERR(cde_ctx)) + return PTR_ERR(cde_ctx); + + nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); + cde_app->ctx_count++; + if (cde_app->ctx_count > cde_app->ctx_count_top) + cde_app->ctx_count_top = cde_app->ctx_count; + + return 0; +} + +static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) +{ + int err; + int i; + + for (i = 0; i < NUM_CDE_CONTEXTS; i++) { + err = gk20a_cde_create_context(l); + if (err) + goto out; + } + + return 0; +out: + gk20a_cde_remove_contexts(l); + return err; +} + +static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img, + struct gk20a_cde_hdr_buf *buf) +{ + struct nvgpu_mem *mem; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + int err; + + /* check that the file can hold the buf */ + if (buf->data_byte_offset != 0 && + buf->data_byte_offset + buf->num_bytes > img->size) { + nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", + cde_ctx->num_bufs); + return -EINVAL; + } + + /* check that we have enough buf elems available */ + if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { + nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", + cde_ctx->num_bufs); + return -ENOMEM; + } + + /* allocate buf */ + mem = cde_ctx->mem + cde_ctx->num_bufs; + err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); + if (err) { + nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", + cde_ctx->num_bufs); + return -ENOMEM; + } + + /* copy the content */ + if (buf->data_byte_offset != 0) + memcpy(mem->cpu_va, img->data + buf->data_byte_offset, + buf->num_bytes); + + cde_ctx->num_bufs++; + + return 0; +} + +static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, + int type, s32 shift, u64 mask, u64 value) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + u32 *target_mem_ptr = target; + u64 *target_mem_ptr_u64 = target; + u64 current_value, new_value; + + value = (shift >= 0) ? value << shift : value >> -shift; + value &= mask; + + /* read current data from the location */ + current_value = 0; + if (type == TYPE_PARAM_TYPE_U32) { + if (mask != 0xfffffffful) + current_value = *target_mem_ptr; + } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { + if (mask != ~0ul) + current_value = *target_mem_ptr_u64; + } else if (type == TYPE_PARAM_TYPE_U64_BIG) { + current_value = *target_mem_ptr_u64; + current_value = (u64)(current_value >> 32) | + (u64)(current_value << 32); + } else { + nvgpu_warn(g, "cde: unknown type. type=%d", + type); + return -EINVAL; + } + + current_value &= ~mask; + new_value = current_value | value; + + /* store the element data back */ + if (type == TYPE_PARAM_TYPE_U32) + *target_mem_ptr = (u32)new_value; + else if (type == TYPE_PARAM_TYPE_U64_LITTLE) + *target_mem_ptr_u64 = new_value; + else { + new_value = (u64)(new_value >> 32) | + (u64)(new_value << 32); + *target_mem_ptr_u64 = new_value; + } + + return 0; +} + +static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img, + struct gk20a_cde_hdr_replace *replace) +{ + struct nvgpu_mem *source_mem; + struct nvgpu_mem *target_mem; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + u32 *target_mem_ptr; + u64 vaddr; + int err; + + if (replace->target_buf >= cde_ctx->num_bufs || + replace->source_buf >= cde_ctx->num_bufs) { + nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", + replace->target_buf, replace->source_buf, + cde_ctx->num_bufs); + return -EINVAL; + } + + source_mem = cde_ctx->mem + replace->source_buf; + target_mem = cde_ctx->mem + replace->target_buf; + target_mem_ptr = target_mem->cpu_va; + + if (source_mem->size < (replace->source_byte_offset + 3) || + target_mem->size < (replace->target_byte_offset + 3)) { + nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", + replace->target_byte_offset, + replace->source_byte_offset, + source_mem->size, + target_mem->size); + return -EINVAL; + } + + /* calculate the target pointer */ + target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); + + /* determine patch value */ + vaddr = source_mem->gpu_va + replace->source_byte_offset; + err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, + replace->shift, replace->mask, + vaddr); + if (err) { + nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", + err, replace->target_buf, + replace->target_byte_offset, + replace->source_buf, + replace->source_byte_offset); + } + + return err; +} + +static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct nvgpu_mem *target_mem; + u32 *target_mem_ptr; + u64 new_data; + int user_id = 0, err; + unsigned int i; + + for (i = 0; i < cde_ctx->num_params; i++) { + struct gk20a_cde_hdr_param *param = cde_ctx->params + i; + target_mem = cde_ctx->mem + param->target_buf; + target_mem_ptr = target_mem->cpu_va; + target_mem_ptr += (param->target_byte_offset / sizeof(u32)); + + switch (param->id) { + case TYPE_PARAM_COMPTAGS_PER_CACHELINE: + new_data = g->gr.comptags_per_cacheline; + break; + case TYPE_PARAM_GPU_CONFIGURATION: + new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * + g->gr.cacheline_size; + break; + case TYPE_PARAM_FIRSTPAGEOFFSET: + new_data = cde_ctx->surf_param_offset; + break; + case TYPE_PARAM_NUMPAGES: + new_data = cde_ctx->surf_param_lines; + break; + case TYPE_PARAM_BACKINGSTORE: + new_data = cde_ctx->backing_store_vaddr; + break; + case TYPE_PARAM_DESTINATION: + new_data = cde_ctx->compbit_vaddr; + break; + case TYPE_PARAM_DESTINATION_SIZE: + new_data = cde_ctx->compbit_size; + break; + case TYPE_PARAM_BACKINGSTORE_SIZE: + new_data = g->gr.compbit_store.mem.size; + break; + case TYPE_PARAM_SOURCE_SMMU_ADDR: + new_data = gpuva_to_iova_base(cde_ctx->vm, + cde_ctx->surf_vaddr); + if (new_data == 0) { + nvgpu_warn(g, "cde: failed to find 0x%llx", + cde_ctx->surf_vaddr); + return -EINVAL; + } + break; + case TYPE_PARAM_BACKINGSTORE_BASE_HW: + new_data = g->gr.compbit_store.base_hw; + break; + case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: + new_data = g->gr.gobs_per_comptagline_per_slice; + break; + case TYPE_PARAM_SCATTERBUFFER: + new_data = cde_ctx->scatterbuffer_vaddr; + break; + case TYPE_PARAM_SCATTERBUFFER_SIZE: + new_data = cde_ctx->scatterbuffer_size; + break; + default: + user_id = param->id - NUM_RESERVED_PARAMS; + if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) + continue; + new_data = cde_ctx->user_param_values[user_id]; + } + + nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", + i, param->id, param->target_buf, + param->target_byte_offset, new_data, + param->data_offset, param->type, param->shift, + param->mask); + + new_data += param->data_offset; + + err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, + param->shift, param->mask, new_data); + + if (err) { + nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", + err, i, param->id, param->target_buf, + param->target_byte_offset, new_data); + return err; + } + } + + return 0; +} + +static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img, + struct gk20a_cde_hdr_param *param) +{ + struct nvgpu_mem *target_mem; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + + if (param->target_buf >= cde_ctx->num_bufs) { + nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", + cde_ctx->num_params, param->target_buf, + cde_ctx->num_bufs); + return -EINVAL; + } + + target_mem = cde_ctx->mem + param->target_buf; + if (target_mem->size < (param->target_byte_offset + 3)) { + nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", + cde_ctx->num_params, param->target_byte_offset, + target_mem->size); + return -EINVAL; + } + + /* does this parameter fit into our parameter structure */ + if (cde_ctx->num_params >= MAX_CDE_PARAMS) { + nvgpu_warn(g, "cde: no room for new parameters param idx = %d", + cde_ctx->num_params); + return -ENOMEM; + } + + /* is the given id valid? */ + if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { + nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", + param->id, cde_ctx->num_params, + NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); + return -EINVAL; + } + + cde_ctx->params[cde_ctx->num_params] = *param; + cde_ctx->num_params++; + + return 0; +} + +static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img, + u32 required_class) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + int err; + + /* CDE enabled */ + cde_ctx->ch->cde = true; + + err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); + if (err) { + nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", + err); + return err; + } + + return 0; +} + +static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img, + u32 op, + struct gk20a_cde_cmd_elem *cmd_elem, + u32 num_elems) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; + u32 *num_entries; + unsigned int i; + + /* check command type */ + if (op == TYPE_BUF_COMMAND_INIT) { + gpfifo = &cde_ctx->init_convert_cmd; + num_entries = &cde_ctx->init_cmd_num_entries; + } else if (op == TYPE_BUF_COMMAND_CONVERT) { + gpfifo = &cde_ctx->convert_cmd; + num_entries = &cde_ctx->convert_cmd_num_entries; + } else { + nvgpu_warn(g, "cde: unknown command. op=%u", + op); + return -EINVAL; + } + + /* allocate gpfifo entries to be pushed */ + *gpfifo = nvgpu_kzalloc(g, + sizeof(struct nvgpu_gpfifo_entry) * num_elems); + if (!*gpfifo) { + nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); + return -ENOMEM; + } + + gpfifo_elem = *gpfifo; + for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { + struct nvgpu_mem *target_mem; + + /* validate the current entry */ + if (cmd_elem->target_buf >= cde_ctx->num_bufs) { + nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", + cmd_elem->target_buf, cde_ctx->num_bufs); + return -EINVAL; + } + + target_mem = cde_ctx->mem + cmd_elem->target_buf; + if (target_mem->size< + cmd_elem->target_byte_offset + cmd_elem->num_bytes) { + nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", + target_mem->size, + cmd_elem->target_byte_offset, + cmd_elem->num_bytes); + return -EINVAL; + } + + /* store the element into gpfifo */ + gpfifo_elem->entry0 = + u64_lo32(target_mem->gpu_va + + cmd_elem->target_byte_offset); + gpfifo_elem->entry1 = + u64_hi32(target_mem->gpu_va + + cmd_elem->target_byte_offset) | + pbdma_gp_entry1_length_f(cmd_elem->num_bytes / + sizeof(u32)); + } + + *num_entries = num_elems; + return 0; +} + +static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + unsigned long init_bytes = cde_ctx->init_cmd_num_entries * + sizeof(struct nvgpu_gpfifo_entry); + unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * + sizeof(struct nvgpu_gpfifo_entry); + unsigned long total_bytes = init_bytes + conv_bytes; + struct nvgpu_gpfifo_entry *combined_cmd; + + /* allocate buffer that has space for both */ + combined_cmd = nvgpu_kzalloc(g, total_bytes); + if (!combined_cmd) { + nvgpu_warn(g, + "cde: could not allocate memory for gpfifo entries"); + return -ENOMEM; + } + + /* move the original init here and append convert */ + memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); + memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, + cde_ctx->convert_cmd, conv_bytes); + + nvgpu_kfree(g, cde_ctx->init_convert_cmd); + nvgpu_kfree(g, cde_ctx->convert_cmd); + + cde_ctx->init_convert_cmd = combined_cmd; + cde_ctx->convert_cmd = combined_cmd + + cde_ctx->init_cmd_num_entries; + + return 0; +} + +static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, + struct nvgpu_firmware *img) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; + u32 *data = (u32 *)img->data; + u32 num_of_elems; + struct gk20a_cde_hdr_elem *elem; + u32 min_size = 0; + int err = 0; + unsigned int i; + + min_size += 2 * sizeof(u32); + if (img->size < min_size) { + nvgpu_warn(g, "cde: invalid image header"); + return -EINVAL; + } + + cde_app->firmware_version = data[0]; + num_of_elems = data[1]; + + min_size += num_of_elems * sizeof(*elem); + if (img->size < min_size) { + nvgpu_warn(g, "cde: bad image"); + return -EINVAL; + } + + elem = (struct gk20a_cde_hdr_elem *)&data[2]; + for (i = 0; i < num_of_elems; i++) { + int err = 0; + switch (elem->type) { + case TYPE_BUF: + err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); + break; + case TYPE_REPLACE: + err = gk20a_init_cde_replace(cde_ctx, img, + &elem->replace); + break; + case TYPE_PARAM: + err = gk20a_init_cde_param(cde_ctx, img, &elem->param); + break; + case TYPE_REQUIRED_CLASS: + err = gk20a_init_cde_required_class(cde_ctx, img, + elem->required_class); + break; + case TYPE_COMMAND: + { + struct gk20a_cde_cmd_elem *cmd = (void *) + &img->data[elem->command.data_byte_offset]; + err = gk20a_init_cde_command(cde_ctx, img, + elem->command.op, cmd, + elem->command.num_entries); + break; + } + case TYPE_ARRAY: + memcpy(&cde_app->arrays[elem->array.id][0], + elem->array.data, + MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); + break; + default: + nvgpu_warn(g, "cde: unknown header element"); + err = -EINVAL; + } + + if (err) + goto deinit_image; + + elem++; + } + + if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { + nvgpu_warn(g, "cde: convert command not defined"); + err = -EINVAL; + goto deinit_image; + } + + if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { + nvgpu_warn(g, "cde: convert command not defined"); + err = -EINVAL; + goto deinit_image; + } + + err = gk20a_cde_pack_cmdbufs(cde_ctx); + if (err) + goto deinit_image; + + return 0; + +deinit_image: + gk20a_deinit_cde_img(cde_ctx); + return err; +} + +static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, + u32 op, struct nvgpu_channel_fence *fence, + u32 flags, struct gk20a_fence **fence_out) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct nvgpu_gpfifo_entry *gpfifo = NULL; + int num_entries = 0; + + /* check command type */ + if (op == TYPE_BUF_COMMAND_INIT) { + /* both init and convert combined */ + gpfifo = cde_ctx->init_convert_cmd; + num_entries = cde_ctx->init_cmd_num_entries + + cde_ctx->convert_cmd_num_entries; + } else if (op == TYPE_BUF_COMMAND_CONVERT) { + gpfifo = cde_ctx->convert_cmd; + num_entries = cde_ctx->convert_cmd_num_entries; + } else if (op == TYPE_BUF_COMMAND_NOOP) { + /* Any non-null gpfifo will suffice with 0 num_entries */ + gpfifo = cde_ctx->init_convert_cmd; + num_entries = 0; + } else { + nvgpu_warn(g, "cde: unknown buffer"); + return -EINVAL; + } + + if (gpfifo == NULL) { + nvgpu_warn(g, "cde: buffer not available"); + return -ENOSYS; + } + + return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, + num_entries, flags, fence, fence_out, + NULL); +} + +static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) +__acquires(&cde_app->mutex) +__releases(&cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; + struct gk20a *g = &cde_ctx->l->g; + + nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); + trace_gk20a_cde_release(cde_ctx); + + nvgpu_mutex_acquire(&cde_app->mutex); + + if (cde_ctx->in_use) { + cde_ctx->in_use = false; + nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); + cde_app->ctx_usecount--; + } else { + nvgpu_log_info(g, "double release cde context %p", cde_ctx); + } + + nvgpu_mutex_release(&cde_app->mutex); +} + +static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) +__acquires(&cde_app->mutex) +__releases(&cde_app->mutex) +{ + struct delayed_work *delay_work = to_delayed_work(work); + struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, + struct gk20a_cde_ctx, ctx_deleter_work); + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + int err; + + /* someone has just taken it? engine deletion started? */ + if (cde_ctx->in_use || !cde_app->initialised) + return; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, + "cde: attempting to delete temporary %p", cde_ctx); + + err = gk20a_busy(g); + if (err) { + /* this context would find new use anyway later, so not freeing + * here does not leak anything */ + nvgpu_warn(g, "cde: cannot set gk20a on, postponing" + " temp ctx deletion"); + return; + } + + nvgpu_mutex_acquire(&cde_app->mutex); + if (cde_ctx->in_use || !cde_app->initialised) { + nvgpu_log(g, gpu_dbg_cde_ctx, + "cde: context use raced, not deleting %p", + cde_ctx); + goto out; + } + + WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), + "double pending %p", cde_ctx); + + gk20a_cde_remove_ctx(cde_ctx); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, + "cde: destroyed %p count=%d use=%d max=%d", + cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, + cde_app->ctx_count_top); + +out: + nvgpu_mutex_release(&cde_app->mutex); + gk20a_idle(g); +} + +static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) +__must_hold(&cde_app->mutex) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a_cde_ctx *cde_ctx; + + /* exhausted? */ + + if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) + return ERR_PTR(-EAGAIN); + + /* idle context available? */ + + if (!nvgpu_list_empty(&cde_app->free_contexts)) { + cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, + gk20a_cde_ctx, list); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, + "cde: got free %p count=%d use=%d max=%d", + cde_ctx, cde_app->ctx_count, + cde_app->ctx_usecount, + cde_app->ctx_count_top); + trace_gk20a_cde_get_context(cde_ctx); + + /* deleter work may be scheduled, but in_use prevents it */ + cde_ctx->in_use = true; + nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); + cde_app->ctx_usecount++; + + /* cancel any deletions now that ctx is in use */ + gk20a_cde_cancel_deleter(cde_ctx, true); + return cde_ctx; + } + + /* no free contexts, get a temporary one */ + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, + "cde: no free contexts, count=%d", + cde_app->ctx_count); + + cde_ctx = gk20a_cde_allocate_context(l); + if (IS_ERR(cde_ctx)) { + nvgpu_warn(g, "cde: cannot allocate context: %ld", + PTR_ERR(cde_ctx)); + return cde_ctx; + } + + trace_gk20a_cde_get_context(cde_ctx); + cde_ctx->in_use = true; + cde_ctx->is_temporary = true; + cde_app->ctx_usecount++; + cde_app->ctx_count++; + if (cde_app->ctx_count > cde_app->ctx_count_top) + cde_app->ctx_count_top = cde_app->ctx_count; + nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); + + return cde_ctx; +} + +static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) +__releases(&cde_app->mutex) +__acquires(&cde_app->mutex) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a_cde_ctx *cde_ctx = NULL; + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, + NVGPU_TIMER_CPU_TIMER); + + do { + cde_ctx = gk20a_cde_do_get_context(l); + if (PTR_ERR(cde_ctx) != -EAGAIN) + break; + + /* exhausted, retry */ + nvgpu_mutex_release(&cde_app->mutex); + cond_resched(); + nvgpu_mutex_acquire(&cde_app->mutex); + } while (!nvgpu_timeout_expired(&timeout)); + + return cde_ctx; +} + +static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_ctx *cde_ctx; + int ret; + + cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); + if (!cde_ctx) + return ERR_PTR(-ENOMEM); + + cde_ctx->l = l; + cde_ctx->dev = dev_from_gk20a(g); + + ret = gk20a_cde_load(cde_ctx); + if (ret) { + nvgpu_kfree(g, cde_ctx); + return ERR_PTR(ret); + } + + nvgpu_init_list_node(&cde_ctx->list); + cde_ctx->is_temporary = false; + cde_ctx->in_use = false; + INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, + gk20a_cde_ctx_deleter_fn); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); + trace_gk20a_cde_allocate_context(cde_ctx); + return cde_ctx; +} + +int gk20a_cde_convert(struct nvgpu_os_linux *l, + struct dma_buf *compbits_scatter_buf, + u64 compbits_byte_offset, + u64 scatterbuffer_byte_offset, + struct nvgpu_channel_fence *fence, + u32 __flags, struct gk20a_cde_param *params, + int num_params, struct gk20a_fence **fence_out) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_ctx *cde_ctx = NULL; + struct gk20a_comptags comptags; + struct nvgpu_os_buffer os_buf = { + compbits_scatter_buf, + NULL, + dev_from_gk20a(g) + }; + u64 mapped_compbits_offset = 0; + u64 compbits_size = 0; + u64 mapped_scatterbuffer_offset = 0; + u64 scatterbuffer_size = 0; + u64 map_vaddr = 0; + u64 map_offset = 0; + u64 map_size = 0; + u8 *surface = NULL; + u64 big_page_mask = 0; + u32 flags; + int err, i; + const s16 compbits_kind = 0; + u32 submit_op; + struct dma_buf_attachment *attachment; + + nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", + compbits_byte_offset, scatterbuffer_byte_offset); + + /* scatter buffer must be after compbits buffer */ + if (scatterbuffer_byte_offset && + scatterbuffer_byte_offset < compbits_byte_offset) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_mutex_acquire(&l->cde_app.mutex); + cde_ctx = gk20a_cde_get_context(l); + nvgpu_mutex_release(&l->cde_app.mutex); + if (IS_ERR(cde_ctx)) { + err = PTR_ERR(cde_ctx); + goto exit_idle; + } + + /* First, map the buffer to local va */ + + /* ensure that the compbits buffer has drvdata */ + err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, + dev_from_gk20a(g)); + if (err) + goto exit_idle; + + /* compbits don't start at page aligned offset, so we need to align + the region to be mapped */ + big_page_mask = cde_ctx->vm->big_page_size - 1; + map_offset = compbits_byte_offset & ~big_page_mask; + map_size = compbits_scatter_buf->size - map_offset; + + + /* compute compbit start offset from the beginning of the mapped + area */ + mapped_compbits_offset = compbits_byte_offset - map_offset; + if (scatterbuffer_byte_offset) { + compbits_size = scatterbuffer_byte_offset - + compbits_byte_offset; + mapped_scatterbuffer_offset = scatterbuffer_byte_offset - + map_offset; + scatterbuffer_size = compbits_scatter_buf->size - + scatterbuffer_byte_offset; + } else { + compbits_size = compbits_scatter_buf->size - + compbits_byte_offset; + } + + nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", + map_offset, map_size); + nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", + mapped_compbits_offset, compbits_size); + nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", + mapped_scatterbuffer_offset, scatterbuffer_size); + + + /* map the destination buffer */ + get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ + err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, + NVGPU_VM_MAP_CACHEABLE | + NVGPU_VM_MAP_DIRECT_KIND_CTRL, + NVGPU_KIND_INVALID, + compbits_kind, /* incompressible kind */ + gk20a_mem_flag_none, + map_offset, map_size, + NULL, + &map_vaddr); + if (err) { + dma_buf_put(compbits_scatter_buf); + err = -EINVAL; + goto exit_idle; + } + + if (scatterbuffer_byte_offset && + l->ops.cde.need_scatter_buffer && + l->ops.cde.need_scatter_buffer(g)) { + struct sg_table *sgt; + void *scatter_buffer; + + surface = dma_buf_vmap(compbits_scatter_buf); + if (IS_ERR(surface)) { + nvgpu_warn(g, + "dma_buf_vmap failed"); + err = -EINVAL; + goto exit_unmap_vaddr; + } + + scatter_buffer = surface + scatterbuffer_byte_offset; + + nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", + surface, scatter_buffer); + sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, + &attachment); + if (IS_ERR(sgt)) { + nvgpu_warn(g, + "mm_pin failed"); + err = -EINVAL; + goto exit_unmap_surface; + } else { + err = l->ops.cde.populate_scatter_buffer(g, sgt, + compbits_byte_offset, scatter_buffer, + scatterbuffer_size); + WARN_ON(err); + + gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, + attachment, sgt); + if (err) + goto exit_unmap_surface; + } + + __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); + dma_buf_vunmap(compbits_scatter_buf, surface); + surface = NULL; + } + + /* store source buffer compression tags */ + gk20a_get_comptags(&os_buf, &comptags); + cde_ctx->surf_param_offset = comptags.offset; + cde_ctx->surf_param_lines = comptags.lines; + + /* store surface vaddr. This is actually compbit vaddr, but since + compbits live in the same surface, and we can get the alloc base + address by using gpuva_to_iova_base, this will do */ + cde_ctx->surf_vaddr = map_vaddr; + + /* store information about destination */ + cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; + cde_ctx->compbit_size = compbits_size; + + cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; + cde_ctx->scatterbuffer_size = scatterbuffer_size; + + /* remove existing argument data */ + memset(cde_ctx->user_param_values, 0, + sizeof(cde_ctx->user_param_values)); + + /* read user space arguments for the conversion */ + for (i = 0; i < num_params; i++) { + struct gk20a_cde_param *param = params + i; + int id = param->id - NUM_RESERVED_PARAMS; + + if (id < 0 || id >= MAX_CDE_USER_PARAMS) { + nvgpu_warn(g, "cde: unknown user parameter"); + err = -EINVAL; + goto exit_unmap_surface; + } + cde_ctx->user_param_values[id] = param->value; + } + + /* patch data */ + err = gk20a_cde_patch_params(cde_ctx); + if (err) { + nvgpu_warn(g, "cde: failed to patch parameters"); + goto exit_unmap_surface; + } + + nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", + g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); + nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", + cde_ctx->compbit_size, cde_ctx->compbit_vaddr); + nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", + cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); + + /* take always the postfence as it is needed for protecting the + * cde context */ + flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; + + /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ + gk20a_idle(g); + + if (comptags.lines == 0) { + /* + * Nothing to do on the buffer, but do a null kickoff for + * managing the pre and post fences. + */ + submit_op = TYPE_BUF_COMMAND_NOOP; + } else if (!cde_ctx->init_cmd_executed) { + /* + * First time, so include the init pushbuf too in addition to + * the conversion code. + */ + submit_op = TYPE_BUF_COMMAND_INIT; + } else { + /* + * The usual condition: execute just the conversion. + */ + submit_op = TYPE_BUF_COMMAND_CONVERT; + } + err = gk20a_cde_execute_buffer(cde_ctx, submit_op, + fence, flags, fence_out); + + if (comptags.lines != 0 && !err) + cde_ctx->init_cmd_executed = true; + + /* unmap the buffers - channel holds references to them now */ + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); + + return err; + +exit_unmap_surface: + if (surface) + dma_buf_vunmap(compbits_scatter_buf, surface); +exit_unmap_vaddr: + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); +exit_idle: + gk20a_idle(g); + return err; +} + +static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) +__acquires(&cde_app->mutex) +__releases(&cde_app->mutex) +{ + struct gk20a_cde_ctx *cde_ctx = data; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; + bool channel_idle; + + channel_gk20a_joblist_lock(ch); + channel_idle = channel_gk20a_joblist_is_empty(ch); + channel_gk20a_joblist_unlock(ch); + + if (!channel_idle) + return; + + trace_gk20a_cde_finished_ctx_cb(cde_ctx); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); + if (!cde_ctx->in_use) + nvgpu_log_info(g, "double finish cde context %p on channel %p", + cde_ctx, ch); + + if (ch->has_timedout) { + if (cde_ctx->is_temporary) { + nvgpu_warn(g, + "cde: channel had timed out" + " (temporary channel)"); + /* going to be deleted anyway */ + } else { + nvgpu_warn(g, + "cde: channel had timed out" + ", reloading"); + /* mark it to be deleted, replace with a new one */ + nvgpu_mutex_acquire(&cde_app->mutex); + cde_ctx->is_temporary = true; + if (gk20a_cde_create_context(l)) { + nvgpu_err(g, "cde: can't replace context"); + } + nvgpu_mutex_release(&cde_app->mutex); + } + } + + /* delete temporary contexts later (watch for doubles) */ + if (cde_ctx->is_temporary && cde_ctx->in_use) { + WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); + schedule_delayed_work(&cde_ctx->ctx_deleter_work, + msecs_to_jiffies(CTX_DELETE_TIME)); + } + + if (!ch->has_timedout) + gk20a_cde_ctx_release(cde_ctx); +} + +static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) +{ + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct nvgpu_firmware *img; + struct channel_gk20a *ch; + struct tsg_gk20a *tsg; + struct gr_gk20a *gr = &g->gr; + struct nvgpu_gpfifo_args gpfifo_args; + int err = 0; + u64 vaddr; + + img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); + if (!img) { + nvgpu_err(g, "cde: could not fetch the firmware"); + return -ENOSYS; + } + + tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); + if (!tsg) { + nvgpu_err(g, "cde: could not create TSG"); + err = -ENOMEM; + goto err_get_gk20a_channel; + } + + ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, + cde_ctx, + -1, + false); + if (!ch) { + nvgpu_warn(g, "cde: gk20a channel not available"); + err = -ENOMEM; + goto err_get_gk20a_channel; + } + + ch->timeout.enabled = false; + + /* bind the channel to the vm */ + err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); + if (err) { + nvgpu_warn(g, "cde: could not bind vm"); + goto err_commit_va; + } + + err = gk20a_tsg_bind_channel(tsg, ch); + if (err) { + nvgpu_err(g, "cde: unable to bind to tsg"); + goto err_alloc_gpfifo; + } + + gpfifo_args.num_entries = 1024; + gpfifo_args.num_inflight_jobs = 0; + gpfifo_args.flags = 0; + /* allocate gpfifo (1024 should be more than enough) */ + err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); + if (err) { + nvgpu_warn(g, "cde: unable to allocate gpfifo"); + goto err_alloc_gpfifo; + } + + /* map backing store to gpu virtual space */ + vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, + g->gr.compbit_store.mem.size, + NVGPU_VM_MAP_CACHEABLE, + gk20a_mem_flag_read_only, + false, + gr->compbit_store.mem.aperture); + + if (!vaddr) { + nvgpu_warn(g, "cde: cannot map compression bit backing store"); + err = -ENOMEM; + goto err_map_backingstore; + } + + /* store initialisation data */ + cde_ctx->ch = ch; + cde_ctx->tsg = tsg; + cde_ctx->vm = ch->vm; + cde_ctx->backing_store_vaddr = vaddr; + + /* initialise the firmware */ + err = gk20a_init_cde_img(cde_ctx, img); + if (err) { + nvgpu_warn(g, "cde: image initialisation failed"); + goto err_init_cde_img; + } + + /* initialisation done */ + nvgpu_release_firmware(g, img); + + return 0; + +err_init_cde_img: + nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); +err_map_backingstore: +err_alloc_gpfifo: + nvgpu_vm_put(ch->vm); +err_commit_va: +err_get_gk20a_channel: + nvgpu_release_firmware(g, img); + nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); + return err; +} + +int gk20a_cde_reload(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; + int err; + + if (!cde_app->initialised) + return -ENOSYS; + + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_mutex_acquire(&cde_app->mutex); + + gk20a_cde_stop(l); + + err = gk20a_cde_create_contexts(l); + if (!err) + cde_app->initialised = true; + + nvgpu_mutex_release(&cde_app->mutex); + + gk20a_idle(g); + return err; +} + +int gk20a_init_cde_support(struct nvgpu_os_linux *l) +__acquires(&cde_app->mutex) +__releases(&cde_app->mutex) +{ + struct gk20a_cde_app *cde_app = &l->cde_app; + struct gk20a *g = &l->g; + int err; + + if (cde_app->initialised) + return 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); + + err = nvgpu_mutex_init(&cde_app->mutex); + if (err) + return err; + + nvgpu_mutex_acquire(&cde_app->mutex); + + nvgpu_init_list_node(&cde_app->free_contexts); + nvgpu_init_list_node(&cde_app->used_contexts); + cde_app->ctx_count = 0; + cde_app->ctx_count_top = 0; + cde_app->ctx_usecount = 0; + + err = gk20a_cde_create_contexts(l); + if (!err) + cde_app->initialised = true; + + nvgpu_mutex_release(&cde_app->mutex); + nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); + + if (err) + nvgpu_mutex_destroy(&cde_app->mutex); + + return err; +} + +enum cde_launch_patch_id { + PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, + PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, + PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ + PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, + PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, + PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ + PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ + PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ + PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, + PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ + PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ + PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, + PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, + PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, + PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, + PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, + PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, + PATCH_USER_CONST_XBLOCKS_ID = 1041, + PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, + PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, + PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, + PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, + PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, + PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, + PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, + PATCH_H_LAUNCH_WORD1_ID = 1049, + PATCH_H_LAUNCH_WORD2_ID = 1050, + PATCH_V_LAUNCH_WORD1_ID = 1051, + PATCH_V_LAUNCH_WORD2_ID = 1052, + PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, + PATCH_H_QMD_REGISTER_COUNT_ID = 1054, + PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, + PATCH_V_QMD_REGISTER_COUNT_ID = 1056, +}; + +/* maximum number of WRITE_PATCHes in the below function */ +#define MAX_CDE_LAUNCH_PATCHES 32 + +static int gk20a_buffer_convert_gpu_to_cde_v1( + struct nvgpu_os_linux *l, + struct dma_buf *dmabuf, u32 consumer, + u64 offset, u64 compbits_hoffset, u64 compbits_voffset, + u64 scatterbuffer_offset, + u32 width, u32 height, u32 block_height_log2, + u32 submit_flags, struct nvgpu_channel_fence *fence_in, + struct gk20a_buffer_state *state) +{ + struct gk20a *g = &l->g; + struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; + int param = 0; + int err = 0; + struct gk20a_fence *new_fence = NULL; + const int wgx = 8; + const int wgy = 8; + const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ + const int xalign = compbits_per_byte * wgx; + const int yalign = wgy; + + /* Compute per launch parameters */ + const int xtiles = (width + 7) >> 3; + const int ytiles = (height + 7) >> 3; + const int gridw_h = roundup(xtiles, xalign) / xalign; + const int gridh_h = roundup(ytiles, yalign) / yalign; + const int gridw_v = roundup(ytiles, xalign) / xalign; + const int gridh_v = roundup(xtiles, yalign) / yalign; + const int xblocks = (xtiles + 1) >> 1; + const int voffset = compbits_voffset - compbits_hoffset; + + int hprog = -1; + int vprog = -1; + + if (l->ops.cde.get_program_numbers) + l->ops.cde.get_program_numbers(g, block_height_log2, + l->cde_app.shader_parameter, + &hprog, &vprog); + else { + nvgpu_warn(g, "cde: chip not supported"); + return -ENOSYS; + } + + if (hprog < 0 || vprog < 0) { + nvgpu_warn(g, "cde: could not determine programs"); + return -ENOSYS; + } + + if (xtiles > 8192 / 8 || ytiles > 8192 / 8) + nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", + xtiles, ytiles); + + nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", + width, height, block_height_log2, + compbits_hoffset, compbits_voffset, scatterbuffer_offset); + nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", + width, height, xtiles, ytiles); + nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", + wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); + nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", + hprog, + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], + l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], + vprog, + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], + l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); + + /* Write parameters */ +#define WRITE_PATCH(NAME, VALUE) \ + params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} + WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); + WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, + block_height_log2); + WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); + WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); + WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); + WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); + WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); + + WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); + WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); + WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); + WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); + WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); + WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); + + WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); + WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); + WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); + WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); + WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); + WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); + + WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); + WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, + l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); + WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); + WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, + l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); + + if (consumer & NVGPU_GPU_COMPBITS_CDEH) { + WRITE_PATCH(PATCH_H_LAUNCH_WORD1, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); + WRITE_PATCH(PATCH_H_LAUNCH_WORD2, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); + } else { + WRITE_PATCH(PATCH_H_LAUNCH_WORD1, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); + WRITE_PATCH(PATCH_H_LAUNCH_WORD2, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); + } + + if (consumer & NVGPU_GPU_COMPBITS_CDEV) { + WRITE_PATCH(PATCH_V_LAUNCH_WORD1, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); + WRITE_PATCH(PATCH_V_LAUNCH_WORD2, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); + } else { + WRITE_PATCH(PATCH_V_LAUNCH_WORD1, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); + WRITE_PATCH(PATCH_V_LAUNCH_WORD2, + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); + } +#undef WRITE_PATCH + + err = gk20a_cde_convert(l, dmabuf, + compbits_hoffset, + scatterbuffer_offset, + fence_in, submit_flags, + params, param, &new_fence); + if (err) + goto out; + + /* compbits generated, update state & fence */ + gk20a_fence_put(state->fence); + state->fence = new_fence; + state->valid_compbits |= consumer & + (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); +out: + return err; +} + +static int gk20a_buffer_convert_gpu_to_cde( + struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, + u64 offset, u64 compbits_hoffset, u64 compbits_voffset, + u64 scatterbuffer_offset, + u32 width, u32 height, u32 block_height_log2, + u32 submit_flags, struct nvgpu_channel_fence *fence_in, + struct gk20a_buffer_state *state) +{ + struct gk20a *g = &l->g; + int err = 0; + + if (!l->cde_app.initialised) + return -ENOSYS; + + nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", + l->cde_app.firmware_version); + + if (l->cde_app.firmware_version == 1) { + err = gk20a_buffer_convert_gpu_to_cde_v1( + l, dmabuf, consumer, offset, compbits_hoffset, + compbits_voffset, scatterbuffer_offset, + width, height, block_height_log2, + submit_flags, fence_in, state); + } else { + nvgpu_err(g, "unsupported CDE firmware version %d", + l->cde_app.firmware_version); + err = -EINVAL; + } + + return err; +} + +int gk20a_prepare_compressible_read( + struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, + u64 compbits_hoffset, u64 compbits_voffset, + u64 scatterbuffer_offset, + u32 width, u32 height, u32 block_height_log2, + u32 submit_flags, struct nvgpu_channel_fence *fence, + u32 *valid_compbits, u32 *zbc_color, + struct gk20a_fence **fence_out) +{ + struct gk20a *g = &l->g; + int err = 0; + struct gk20a_buffer_state *state; + struct dma_buf *dmabuf; + u32 missing_bits; + + dmabuf = dma_buf_get(buffer_fd); + if (IS_ERR(dmabuf)) + return -EINVAL; + + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); + if (err) { + dma_buf_put(dmabuf); + return err; + } + + missing_bits = (state->valid_compbits ^ request) & request; + + nvgpu_mutex_acquire(&state->lock); + + if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { + + gk20a_fence_put(state->fence); + state->fence = NULL; + /* state->fence = decompress(); + state->valid_compbits = 0; */ + err = -EINVAL; + goto out; + } else if (missing_bits) { + u32 missing_cde_bits = missing_bits & + (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); + if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && + missing_cde_bits) { + err = gk20a_buffer_convert_gpu_to_cde( + l, dmabuf, + missing_cde_bits, + offset, compbits_hoffset, + compbits_voffset, scatterbuffer_offset, + width, height, block_height_log2, + submit_flags, fence, + state); + if (err) + goto out; + } + } + + if (state->fence && fence_out) + *fence_out = gk20a_fence_get(state->fence); + + if (valid_compbits) + *valid_compbits = state->valid_compbits; + + if (zbc_color) + *zbc_color = state->zbc_color; + +out: + nvgpu_mutex_release(&state->lock); + dma_buf_put(dmabuf); + return err; +} + +int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, + u32 valid_compbits, u64 offset, u32 zbc_color) +{ + int err; + struct gk20a_buffer_state *state; + struct dma_buf *dmabuf; + + dmabuf = dma_buf_get(buffer_fd); + if (IS_ERR(dmabuf)) { + nvgpu_err(g, "invalid dmabuf"); + return -EINVAL; + } + + err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); + if (err) { + nvgpu_err(g, "could not get state from dmabuf"); + dma_buf_put(dmabuf); + return err; + } + + nvgpu_mutex_acquire(&state->lock); + + /* Update the compbits state. */ + state->valid_compbits = valid_compbits; + state->zbc_color = zbc_color; + + /* Discard previous compbit job fence. */ + gk20a_fence_put(state->fence); + state->fence = NULL; + + nvgpu_mutex_release(&state->lock); + dma_buf_put(dmabuf); + return 0; +} + +int nvgpu_cde_init_ops(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + u32 ver = g->params.gpu_arch + g->params.gpu_impl; + + switch (ver) { + case GK20A_GPUID_GM20B: + case GK20A_GPUID_GM20B_B: + l->ops.cde = gm20b_cde_ops.cde; + break; + case NVGPU_GPUID_GP10B: + l->ops.cde = gp10b_cde_ops.cde; + break; + default: + /* CDE is optional, so today ignoring unknown chip is fine */ + break; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h new file mode 100644 index 00000000..5928b624 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde.h @@ -0,0 +1,326 @@ +/* + * GK20A color decompression engine support + * + * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _CDE_GK20A_H_ +#define _CDE_GK20A_H_ + +#include +#include +#include + +#include +#include + +#define MAX_CDE_BUFS 10 +#define MAX_CDE_PARAMS 64 +#define MAX_CDE_USER_PARAMS 40 +#define MAX_CDE_ARRAY_ENTRIES 9 + +/* + * The size of the context ring buffer that is dedicated for handling cde + * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu + * wait on the previous job to that channel, so increasing this value + * reduces the likelihood of stalls. + */ +#define NUM_CDE_CONTEXTS 4 + +struct dma_buf; +struct device; +struct nvgpu_os_linux; +struct gk20a; +struct gk20a_fence; +struct nvgpu_channel_fence; +struct channel_gk20a; +struct vm_gk20a; +struct nvgpu_gpfifo_entry; + +/* + * this element defines a buffer that is allocated and mapped into gpu address + * space. data_byte_offset defines the beginning of the buffer inside the + * firmare. num_bytes defines how many bytes the firmware contains. + * + * If data_byte_offset is zero, we allocate an empty buffer. + */ + +struct gk20a_cde_hdr_buf { + u64 data_byte_offset; + u64 num_bytes; +}; + +/* + * this element defines a constant patching in buffers. It basically + * computes physical address to +source_byte_offset. The + * address is then modified into patch value as per: + * value = (current_value & ~mask) | (address << shift) & mask . + * + * The type field defines the register size as: + * 0=u32, + * 1=u64 (little endian), + * 2=u64 (big endian) + */ + +struct gk20a_cde_hdr_replace { + u32 target_buf; + u32 source_buf; + s32 shift; + u32 type; + u64 target_byte_offset; + u64 source_byte_offset; + u64 mask; +}; + +enum { + TYPE_PARAM_TYPE_U32 = 0, + TYPE_PARAM_TYPE_U64_LITTLE, + TYPE_PARAM_TYPE_U64_BIG +}; + +/* + * this element defines a runtime patching in buffers. Parameters with id from + * 0 to 1024 are reserved for special usage as follows: + * 0 = comptags_per_cacheline, + * 1 = slices_per_fbp, + * 2 = num_fbps + * 3 = source buffer first page offset + * 4 = source buffer block height log2 + * 5 = backing store memory address + * 6 = destination memory address + * 7 = destination size (bytes) + * 8 = backing store size (bytes) + * 9 = cache line size + * + * Parameters above id 1024 are user-specified. I.e. they determine where a + * parameters from user space should be placed in buffers, what is their + * type, etc. + * + * Once the value is available, we add data_offset to the value. + * + * The value address is then modified into patch value as per: + * value = (current_value & ~mask) | (address << shift) & mask . + * + * The type field defines the register size as: + * 0=u32, + * 1=u64 (little endian), + * 2=u64 (big endian) + */ + +struct gk20a_cde_hdr_param { + u32 id; + u32 target_buf; + s32 shift; + u32 type; + s64 data_offset; + u64 target_byte_offset; + u64 mask; +}; + +enum { + TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, + TYPE_PARAM_GPU_CONFIGURATION, + TYPE_PARAM_FIRSTPAGEOFFSET, + TYPE_PARAM_NUMPAGES, + TYPE_PARAM_BACKINGSTORE, + TYPE_PARAM_DESTINATION, + TYPE_PARAM_DESTINATION_SIZE, + TYPE_PARAM_BACKINGSTORE_SIZE, + TYPE_PARAM_SOURCE_SMMU_ADDR, + TYPE_PARAM_BACKINGSTORE_BASE_HW, + TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, + TYPE_PARAM_SCATTERBUFFER, + TYPE_PARAM_SCATTERBUFFER_SIZE, + NUM_RESERVED_PARAMS = 1024, +}; + +/* + * This header element defines a command. The op field determines whether the + * element is defining an init (0) or convert command (1). data_byte_offset + * denotes the beginning address of command elements in the file. + */ + +struct gk20a_cde_hdr_command { + u32 op; + u32 num_entries; + u64 data_byte_offset; +}; + +enum { + TYPE_BUF_COMMAND_INIT = 0, + TYPE_BUF_COMMAND_CONVERT, + TYPE_BUF_COMMAND_NOOP +}; + +/* + * This is a command element defines one entry inside push buffer. target_buf + * defines the buffer including the pushbuffer entries, target_byte_offset the + * offset inside the buffer and num_bytes the number of words in the buffer. + */ + +struct gk20a_cde_cmd_elem { + u32 target_buf; + u32 padding; + u64 target_byte_offset; + u64 num_bytes; +}; + +/* + * This element is used for storing a small array of data. + */ + +enum { + ARRAY_PROGRAM_OFFSET = 0, + ARRAY_REGISTER_COUNT, + ARRAY_LAUNCH_COMMAND, + NUM_CDE_ARRAYS +}; + +struct gk20a_cde_hdr_array { + u32 id; + u32 data[MAX_CDE_ARRAY_ENTRIES]; +}; + +/* + * Following defines a single header element. Each element has a type and + * some of the data structures. + */ + +struct gk20a_cde_hdr_elem { + u32 type; + u32 padding; + union { + struct gk20a_cde_hdr_buf buf; + struct gk20a_cde_hdr_replace replace; + struct gk20a_cde_hdr_param param; + u32 required_class; + struct gk20a_cde_hdr_command command; + struct gk20a_cde_hdr_array array; + }; +}; + +enum { + TYPE_BUF = 0, + TYPE_REPLACE, + TYPE_PARAM, + TYPE_REQUIRED_CLASS, + TYPE_COMMAND, + TYPE_ARRAY +}; + +struct gk20a_cde_param { + u32 id; + u32 padding; + u64 value; +}; + +struct gk20a_cde_ctx { + struct nvgpu_os_linux *l; + struct device *dev; + + /* channel related data */ + struct channel_gk20a *ch; + struct tsg_gk20a *tsg; + struct vm_gk20a *vm; + + /* buf converter configuration */ + struct nvgpu_mem mem[MAX_CDE_BUFS]; + unsigned int num_bufs; + + /* buffer patching params (where should patching be done) */ + struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; + unsigned int num_params; + + /* storage for user space parameter values */ + u32 user_param_values[MAX_CDE_USER_PARAMS]; + + u32 surf_param_offset; + u32 surf_param_lines; + u64 surf_vaddr; + + u64 compbit_vaddr; + u64 compbit_size; + + u64 scatterbuffer_vaddr; + u64 scatterbuffer_size; + + u64 backing_store_vaddr; + + struct nvgpu_gpfifo_entry *init_convert_cmd; + int init_cmd_num_entries; + + struct nvgpu_gpfifo_entry *convert_cmd; + int convert_cmd_num_entries; + + struct kobj_attribute attr; + + bool init_cmd_executed; + + struct nvgpu_list_node list; + bool is_temporary; + bool in_use; + struct delayed_work ctx_deleter_work; +}; + +static inline struct gk20a_cde_ctx * +gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) +{ + return (struct gk20a_cde_ctx *) + ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); +}; + +struct gk20a_cde_app { + bool initialised; + struct nvgpu_mutex mutex; + + struct nvgpu_list_node free_contexts; + struct nvgpu_list_node used_contexts; + unsigned int ctx_count; + unsigned int ctx_usecount; + unsigned int ctx_count_top; + + u32 firmware_version; + + u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; + + u32 shader_parameter; +}; + +void gk20a_cde_destroy(struct nvgpu_os_linux *l); +void gk20a_cde_suspend(struct nvgpu_os_linux *l); +int gk20a_init_cde_support(struct nvgpu_os_linux *l); +int gk20a_cde_reload(struct nvgpu_os_linux *l); +int gk20a_cde_convert(struct nvgpu_os_linux *l, + struct dma_buf *compbits_buf, + u64 compbits_byte_offset, + u64 scatterbuffer_byte_offset, + struct nvgpu_channel_fence *fence, + u32 __flags, struct gk20a_cde_param *params, + int num_params, struct gk20a_fence **fence_out); + +int gk20a_prepare_compressible_read( + struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, + u64 compbits_hoffset, u64 compbits_voffset, + u64 scatterbuffer_offset, + u32 width, u32 height, u32 block_height_log2, + u32 submit_flags, struct nvgpu_channel_fence *fence, + u32 *valid_compbits, u32 *zbc_color, + struct gk20a_fence **fence_out); +int gk20a_mark_compressible_write( + struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, + u32 zbc_color); +int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.c b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c new file mode 100644 index 00000000..1cd15c54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c @@ -0,0 +1,64 @@ +/* + * GM20B CDE + * + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/gk20a.h" +#include "cde_gm20b.h" + +enum programs { + PROG_HPASS = 0, + PROG_VPASS_LARGE = 1, + PROG_VPASS_SMALL = 2, + PROG_HPASS_DEBUG = 3, + PROG_VPASS_LARGE_DEBUG = 4, + PROG_VPASS_SMALL_DEBUG = 5, + PROG_PASSTHROUGH = 6, +}; + +static void gm20b_cde_get_program_numbers(struct gk20a *g, + u32 block_height_log2, + u32 shader_parameter, + int *hprog_out, int *vprog_out) +{ + int hprog = PROG_HPASS; + int vprog = (block_height_log2 >= 2) ? + PROG_VPASS_LARGE : PROG_VPASS_SMALL; + if (shader_parameter == 1) { + hprog = PROG_PASSTHROUGH; + vprog = PROG_PASSTHROUGH; + } else if (shader_parameter == 2) { + hprog = PROG_HPASS_DEBUG; + vprog = (block_height_log2 >= 2) ? + PROG_VPASS_LARGE_DEBUG : + PROG_VPASS_SMALL_DEBUG; + } + + *hprog_out = hprog; + *vprog_out = vprog; +} + +struct nvgpu_os_linux_ops gm20b_cde_ops = { + .cde = { + .get_program_numbers = gm20b_cde_get_program_numbers, + }, +}; diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.h b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h new file mode 100644 index 00000000..640d6ab6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h @@ -0,0 +1,32 @@ +/* + * GM20B CDE + * + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NVHOST_GM20B_CDE +#define _NVHOST_GM20B_CDE + +#include "os_linux.h" + +extern struct nvgpu_os_linux_ops gm20b_cde_ops; + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.c b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c new file mode 100644 index 00000000..5c0e79a7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c @@ -0,0 +1,161 @@ +/* + * GP10B CDE + * + * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/gk20a.h" +#include "cde_gp10b.h" + +#include +#include + +enum gp10b_programs { + GP10B_PROG_HPASS = 0, + GP10B_PROG_HPASS_4K = 1, + GP10B_PROG_VPASS = 2, + GP10B_PROG_VPASS_4K = 3, + GP10B_PROG_HPASS_DEBUG = 4, + GP10B_PROG_HPASS_4K_DEBUG = 5, + GP10B_PROG_VPASS_DEBUG = 6, + GP10B_PROG_VPASS_4K_DEBUG = 7, + GP10B_PROG_PASSTHROUGH = 8, +}; + +void gp10b_cde_get_program_numbers(struct gk20a *g, + u32 block_height_log2, + u32 shader_parameter, + int *hprog_out, int *vprog_out) +{ + int hprog, vprog; + + if (shader_parameter == 1) { + hprog = GP10B_PROG_PASSTHROUGH; + vprog = GP10B_PROG_PASSTHROUGH; + } else { + hprog = GP10B_PROG_HPASS; + vprog = GP10B_PROG_VPASS; + if (shader_parameter == 2) { + hprog = GP10B_PROG_HPASS_DEBUG; + vprog = GP10B_PROG_VPASS_DEBUG; + } + if (!nvgpu_iommuable(g)) { + if (!g->mm.disable_bigpage) { + nvgpu_warn(g, + "When no IOMMU big pages cannot be used"); + } + hprog |= 1; + vprog |= 1; + } + } + + *hprog_out = hprog; + *vprog_out = vprog; +} + +bool gp10b_need_scatter_buffer(struct gk20a *g) +{ + return !nvgpu_iommuable(g); +} + +static u8 parity(u32 a) +{ + a ^= a>>16u; + a ^= a>>8u; + a ^= a>>4u; + a &= 0xfu; + return (0x6996u >> a) & 1u; +} + +int gp10b_populate_scatter_buffer(struct gk20a *g, + struct sg_table *sgt, + size_t surface_size, + void *scatter_buffer_ptr, + size_t scatter_buffer_size) +{ + /* map scatter buffer to CPU VA and fill it */ + const u32 page_size_log2 = 12; + const u32 page_size = 1 << page_size_log2; + const u32 page_size_shift = page_size_log2 - 7u; + + /* 0011 1111 1111 1111 1111 1110 0100 1000 */ + const u32 getSliceMaskGP10B = 0x3ffffe48; + u8 *scatter_buffer = scatter_buffer_ptr; + + size_t i; + struct scatterlist *sg = NULL; + u8 d = 0; + size_t page = 0; + size_t pages_left; + + surface_size = round_up(surface_size, page_size); + + pages_left = surface_size >> page_size_log2; + if ((pages_left >> 3) > scatter_buffer_size) + return -ENOMEM; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + unsigned int j; + u64 surf_pa = sg_phys(sg); + unsigned int n = (int)(sg->length >> page_size_log2); + + nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); + + for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { + u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; + u8 scatter_bit = parity(addr); + u8 bit = page & 7; + + d |= scatter_bit << bit; + if (bit == 7) { + scatter_buffer[page >> 3] = d; + d = 0; + } + + ++page; + --pages_left; + } + + if (pages_left == 0) + break; + } + + /* write the last byte in case the number of pages is not divisible by 8 */ + if ((page & 7) != 0) + scatter_buffer[page >> 3] = d; + + if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) { + nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:"); + for (i = 0; i < page >> 3; i++) { + nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]); + } + } + + return 0; +} + +struct nvgpu_os_linux_ops gp10b_cde_ops = { + .cde = { + .get_program_numbers = gp10b_cde_get_program_numbers, + .need_scatter_buffer = gp10b_need_scatter_buffer, + .populate_scatter_buffer = gp10b_populate_scatter_buffer, + }, +}; diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.h b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h new file mode 100644 index 00000000..52e9f292 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h @@ -0,0 +1,32 @@ +/* + * GP10B CDE + * + * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NVHOST_GP10B_CDE +#define _NVHOST_GP10B_CDE + +#include "os_linux.h" + +extern struct nvgpu_os_linux_ops gp10b_cde_ops; + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c new file mode 100644 index 00000000..165f33db --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ce2.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2017, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +#include "gk20a/ce2_gk20a.h" +#include "gk20a/gk20a.h" +#include "channel.h" + +static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) +{ + /* there is no local memory available, + don't allow local memory related CE flags */ + if (!g->mm.vidmem.size) { + launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | + NVGPU_CE_DST_LOCATION_LOCAL_FB); + } + return launch_flags; +} + +int gk20a_ce_execute_ops(struct gk20a *g, + u32 ce_ctx_id, + u64 src_buf, + u64 dst_buf, + u64 size, + unsigned int payload, + int launch_flags, + int request_operation, + u32 submit_flags, + struct gk20a_fence **gk20a_fence_out) +{ + int ret = -EPERM; + struct gk20a_ce_app *ce_app = &g->ce_app; + struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; + bool found = false; + u32 *cmd_buf_cpu_va; + u64 cmd_buf_gpu_va = 0; + u32 methodSize; + u32 cmd_buf_read_offset; + u32 dma_copy_class; + struct nvgpu_gpfifo_entry gpfifo; + struct nvgpu_channel_fence fence = {0, 0}; + struct gk20a_fence *ce_cmd_buf_fence_out = NULL; + + if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) + goto end; + + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, + &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { + if (ce_ctx->ctx_id == ce_ctx_id) { + found = true; + break; + } + } + + nvgpu_mutex_release(&ce_app->app_mutex); + + if (!found) { + ret = -EINVAL; + goto end; + } + + if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { + ret = -ENODEV; + goto end; + } + + nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); + + ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; + + cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * + (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); + + cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; + + if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { + struct gk20a_fence **prev_post_fence = + &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; + + ret = gk20a_fence_wait(g, *prev_post_fence, + gk20a_get_gr_idle_timeout(g)); + + gk20a_fence_put(*prev_post_fence); + *prev_post_fence = NULL; + if (ret) + goto noop; + } + + cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); + + dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); + methodSize = gk20a_ce_prepare_submit(src_buf, + dst_buf, + size, + &cmd_buf_cpu_va[cmd_buf_read_offset], + NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, + payload, + gk20a_get_valid_launch_flags(g, launch_flags), + request_operation, + dma_copy_class); + + if (methodSize) { + /* store the element into gpfifo */ + gpfifo.entry0 = + u64_lo32(cmd_buf_gpu_va); + gpfifo.entry1 = + (u64_hi32(cmd_buf_gpu_va) | + pbdma_gp_entry1_length_f(methodSize)); + + /* take always the postfence as it is needed for protecting the ce context */ + submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; + + nvgpu_smp_wmb(); + + ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, + 1, submit_flags, &fence, + &ce_cmd_buf_fence_out, NULL); + + if (!ret) { + ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = + ce_cmd_buf_fence_out; + if (gk20a_fence_out) { + gk20a_fence_get(ce_cmd_buf_fence_out); + *gk20a_fence_out = ce_cmd_buf_fence_out; + } + + /* Next available command buffer queue Index */ + ++ce_ctx->cmd_buf_read_queue_offset; + } + } else { + ret = -ENOMEM; + } +noop: + nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); +end: + return ret; +} diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c new file mode 100644 index 00000000..7810bc21 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/channel.c @@ -0,0 +1,1021 @@ +/* + * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +/* + * This is required for nvgpu_vm_find_buf() which is used in the tracing + * code. Once we can get and access userspace buffers without requiring + * direct dma_buf usage this can be removed. + */ +#include + +#include "gk20a/gk20a.h" + +#include "channel.h" +#include "ioctl_channel.h" +#include "os_linux.h" + +#include + +#include +#include +#include +#include + +#include "sync_sema_android.h" + +u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) + flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) + flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) + flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) + flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; + + if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) + flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; + + return flags; +} + +/* + * API to convert error_notifiers in common code and of the form + * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user + * space and of the form NVGPU_CHANNEL_* + */ +static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) +{ + switch (error_notifier) { + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: + return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: + return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; + case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: + return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; + case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: + return NVGPU_CHANNEL_GR_EXCEPTION; + case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: + return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; + case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: + return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; + case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: + return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; + case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: + return NVGPU_CHANNEL_PBDMA_ERROR; + case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: + return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; + case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: + return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; + case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: + return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; + } + + pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); + + return error_notifier; +} + +/** + * nvgpu_set_error_notifier_locked() + * Should be called with ch->error_notifier_mutex held + * + * error should be of the form NVGPU_ERR_NOTIFIER_* + */ +void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + error = nvgpu_error_notifier_to_channel_notifier(error); + + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + struct timespec time_data; + u64 nsec; + + getnstimeofday(&time_data); + nsec = ((u64)time_data.tv_sec) * 1000000000u + + (u64)time_data.tv_nsec; + notification->time_stamp.nanoseconds[0] = + (u32)nsec; + notification->time_stamp.nanoseconds[1] = + (u32)(nsec >> 32); + notification->info32 = error; + notification->status = 0xffff; + + nvgpu_err(ch->g, + "error notifier set to %d for ch %d", error, ch->chid); + } +} + +/* error should be of the form NVGPU_ERR_NOTIFIER_* */ +void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + nvgpu_set_error_notifier_locked(ch, error); + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + + /* Don't overwrite error flag if it is already set */ + if (notification->status != 0xffff) + nvgpu_set_error_notifier_locked(ch, error); + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ +bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + bool notifier_set = false; + + error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + struct nvgpu_notification *notification = + priv->error_notifier.notification; + u32 err = notification->info32; + + if (err == error_notifier) + notifier_set = true; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return notifier_set; +} + +static void gk20a_channel_update_runcb_fn(struct work_struct *work) +{ + struct nvgpu_channel_completion_cb *completion_cb = + container_of(work, struct nvgpu_channel_completion_cb, work); + struct nvgpu_channel_linux *priv = + container_of(completion_cb, + struct nvgpu_channel_linux, completion_cb); + struct channel_gk20a *ch = priv->ch; + void (*fn)(struct channel_gk20a *, void *); + void *user_data; + + nvgpu_spinlock_acquire(&completion_cb->lock); + fn = completion_cb->fn; + user_data = completion_cb->user_data; + nvgpu_spinlock_release(&completion_cb->lock); + + if (fn) + fn(ch, user_data); +} + +static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + priv->completion_cb.fn = NULL; + priv->completion_cb.user_data = NULL; + nvgpu_spinlock_init(&priv->completion_cb.lock); + INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); +} + +static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_spinlock_acquire(&priv->completion_cb.lock); + priv->completion_cb.fn = NULL; + priv->completion_cb.user_data = NULL; + nvgpu_spinlock_release(&priv->completion_cb.lock); + cancel_work_sync(&priv->completion_cb.work); +} + +static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (priv->completion_cb.fn) + schedule_work(&priv->completion_cb.work); +} + +static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (priv->completion_cb.fn) + cancel_work_sync(&priv->completion_cb.work); +} + +struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, + void (*update_fn)(struct channel_gk20a *, void *), + void *update_fn_data, + int runlist_id, + bool is_privileged_channel) +{ + struct channel_gk20a *ch; + struct nvgpu_channel_linux *priv; + + ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, + nvgpu_current_pid(g), nvgpu_current_tid(g)); + + if (ch) { + priv = ch->os_priv; + nvgpu_spinlock_acquire(&priv->completion_cb.lock); + priv->completion_cb.fn = update_fn; + priv->completion_cb.user_data = update_fn_data; + nvgpu_spinlock_release(&priv->completion_cb.lock); + } + + return ch; +} + +static void nvgpu_channel_open_linux(struct channel_gk20a *ch) +{ +} + +static void nvgpu_channel_close_linux(struct channel_gk20a *ch) +{ + nvgpu_channel_work_completion_clear(ch); + +#if defined(CONFIG_GK20A_CYCLE_STATS) + gk20a_channel_free_cycle_stats_buffer(ch); + gk20a_channel_free_cycle_stats_snapshot(ch); +#endif +} + +static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv; + int err; + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) + return -ENOMEM; + + ch->os_priv = priv; + priv->ch = ch; + +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = true; +#endif + + err = nvgpu_mutex_init(&priv->error_notifier.mutex); + if (err) { + nvgpu_kfree(g, priv); + return err; + } + + nvgpu_channel_work_completion_init(ch); + + return 0; +} + +static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_destroy(&priv->error_notifier.mutex); + nvgpu_kfree(g, priv); + + ch->os_priv = NULL; + +#ifdef CONFIG_SYNC + ch->has_os_fence_framework_support = false; +#endif +} + +static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, + const char *fmt, ...) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + char name[30]; + va_list args; + + fence_framework = &priv->fence_framework; + + va_start(args, fmt); + vsnprintf(name, sizeof(name), fmt, args); + va_end(args); + + fence_framework->timeline = gk20a_sync_timeline_create(name); + + if (!fence_framework->timeline) + return -EINVAL; + + return 0; +} +static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_signal(fence_framework->timeline); +} + +static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + gk20a_sync_timeline_destroy(fence_framework->timeline); + fence_framework->timeline = NULL; +} + +static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + struct nvgpu_os_fence_framework *fence_framework; + + fence_framework = &priv->fence_framework; + + return (fence_framework->timeline != NULL); +} + +int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + struct fifo_gk20a *f = &g->fifo; + int chid; + int err; + + for (chid = 0; chid < (int)f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + err = nvgpu_channel_alloc_linux(g, ch); + if (err) + goto err_clean; + } + + g->os_channel.open = nvgpu_channel_open_linux; + g->os_channel.close = nvgpu_channel_close_linux; + g->os_channel.work_completion_signal = + nvgpu_channel_work_completion_signal; + g->os_channel.work_completion_cancel_sync = + nvgpu_channel_work_completion_cancel_sync; + + g->os_channel.os_fence_framework_inst_exists = + nvgpu_channel_fence_framework_exists; + g->os_channel.init_os_fence_framework = + nvgpu_channel_init_os_fence_framework; + g->os_channel.signal_os_fence_framework = + nvgpu_channel_signal_os_fence_framework; + g->os_channel.destroy_os_fence_framework = + nvgpu_channel_destroy_os_fence_framework; + + return 0; + +err_clean: + for (; chid >= 0; chid--) { + struct channel_gk20a *ch = &f->channel[chid]; + + nvgpu_channel_free_linux(g, ch); + } + return err; +} + +void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + struct fifo_gk20a *f = &g->fifo; + unsigned int chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + + nvgpu_channel_free_linux(g, ch); + } + + g->os_channel.os_fence_framework_inst_exists = NULL; + g->os_channel.init_os_fence_framework = NULL; + g->os_channel.signal_os_fence_framework = NULL; + g->os_channel.destroy_os_fence_framework = NULL; +} + +u32 nvgpu_get_gpfifo_entry_size(void) +{ + return sizeof(struct nvgpu_gpfifo_entry); +} + +#ifdef CONFIG_DEBUG_FS +static void trace_write_pushbuffer(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *g) +{ + void *mem = NULL; + unsigned int words; + u64 offset; + struct dma_buf *dmabuf = NULL; + + if (gk20a_debug_trace_cmdbuf) { + u64 gpu_va = (u64)g->entry0 | + (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); + int err; + + words = pbdma_gp_entry1_length_v(g->entry1); + err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); + if (!err) + mem = dma_buf_vmap(dmabuf); + } + + if (mem) { + u32 i; + /* + * Write in batches of 128 as there seems to be a limit + * of how much you can output to ftrace at once. + */ + for (i = 0; i < words; i += 128U) { + trace_gk20a_push_cmdbuf( + c->g->name, + 0, + min(words - i, 128U), + offset + i * sizeof(u32), + mem); + } + dma_buf_vunmap(dmabuf, mem); + } +} +#endif + +static void trace_write_pushbuffer_range(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *g, + struct nvgpu_gpfifo_entry __user *user_gpfifo, + int offset, + int count) +{ +#ifdef CONFIG_DEBUG_FS + u32 size; + int i; + struct nvgpu_gpfifo_entry *gp; + bool gpfifo_allocated = false; + + if (!gk20a_debug_trace_cmdbuf) + return; + + if (!g && !user_gpfifo) + return; + + if (!g) { + size = count * sizeof(struct nvgpu_gpfifo_entry); + if (size) { + g = nvgpu_big_malloc(c->g, size); + if (!g) + return; + + if (copy_from_user(g, user_gpfifo, size)) { + nvgpu_big_free(c->g, g); + return; + } + } + gpfifo_allocated = true; + } + + gp = g + offset; + for (i = 0; i < count; i++, gp++) + trace_write_pushbuffer(c, gp); + + if (gpfifo_allocated) + nvgpu_big_free(c->g, g); +#endif +} + +/* + * Handle the submit synchronization - pre-fences and post-fences. + */ +static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, + struct nvgpu_channel_fence *fence, + struct channel_gk20a_job *job, + struct priv_cmd_entry **wait_cmd, + struct priv_cmd_entry **incr_cmd, + struct gk20a_fence **post_fence, + bool register_irq, + u32 flags) +{ + struct gk20a *g = c->g; + bool need_sync_fence = false; + bool new_sync_created = false; + int wait_fence_fd = -1; + int err = 0; + bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); + bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); + + if (g->aggressive_sync_destroy_thresh) { + nvgpu_mutex_acquire(&c->sync_lock); + if (!c->sync) { + c->sync = gk20a_channel_sync_create(c, false); + if (!c->sync) { + err = -ENOMEM; + nvgpu_mutex_release(&c->sync_lock); + goto fail; + } + new_sync_created = true; + } + nvgpu_atomic_inc(&c->sync->refcount); + nvgpu_mutex_release(&c->sync_lock); + } + + if (g->ops.fifo.resetup_ramfc && new_sync_created) { + err = g->ops.fifo.resetup_ramfc(c); + if (err) + goto fail; + } + + /* + * Optionally insert syncpt/semaphore wait in the beginning of gpfifo + * submission when user requested and the wait hasn't expired. + */ + if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { + int max_wait_cmds = c->deterministic ? 1 : 0; + + if (!pre_alloc_enabled) + job->wait_cmd = nvgpu_kzalloc(g, + sizeof(struct priv_cmd_entry)); + + if (!job->wait_cmd) { + err = -ENOMEM; + goto fail; + } + + if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { + wait_fence_fd = fence->id; + err = c->sync->wait_fd(c->sync, wait_fence_fd, + job->wait_cmd, max_wait_cmds); + } else { + err = c->sync->wait_syncpt(c->sync, fence->id, + fence->value, + job->wait_cmd); + } + + if (err) + goto clean_up_wait_cmd; + + if (job->wait_cmd->valid) + *wait_cmd = job->wait_cmd; + } + + if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && + (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) + need_sync_fence = true; + + /* + * Always generate an increment at the end of a GPFIFO submission. This + * is used to keep track of method completion for idle railgating. The + * sync_pt/semaphore PB is added to the GPFIFO later on in submit. + */ + job->post_fence = gk20a_alloc_fence(c); + if (!job->post_fence) { + err = -ENOMEM; + goto clean_up_wait_cmd; + } + if (!pre_alloc_enabled) + job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); + + if (!job->incr_cmd) { + err = -ENOMEM; + goto clean_up_post_fence; + } + + if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) + err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, + job->post_fence, need_wfi, need_sync_fence, + register_irq); + else + err = c->sync->incr(c->sync, job->incr_cmd, + job->post_fence, need_sync_fence, + register_irq); + if (!err) { + *incr_cmd = job->incr_cmd; + *post_fence = job->post_fence; + } else + goto clean_up_incr_cmd; + + return 0; + +clean_up_incr_cmd: + free_priv_cmdbuf(c, job->incr_cmd); + if (!pre_alloc_enabled) + job->incr_cmd = NULL; +clean_up_post_fence: + gk20a_fence_put(job->post_fence); + job->post_fence = NULL; +clean_up_wait_cmd: + free_priv_cmdbuf(c, job->wait_cmd); + if (!pre_alloc_enabled) + job->wait_cmd = NULL; +fail: + *wait_cmd = NULL; + return err; +} + +static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, + struct priv_cmd_entry *cmd) +{ + struct gk20a *g = c->g; + struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; + struct nvgpu_gpfifo_entry x = { + .entry0 = u64_lo32(cmd->gva), + .entry1 = u64_hi32(cmd->gva) | + pbdma_gp_entry1_length_f(cmd->size) + }; + + nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), + &x, sizeof(x)); + + if (cmd->mem->aperture == APERTURE_SYSMEM) + trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, + cmd->mem->cpu_va + cmd->off * sizeof(u32)); + + c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); +} + +/* + * Copy source gpfifo entries into the gpfifo ring buffer, potentially + * splitting into two memcpys to handle wrap-around. + */ +static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *kern_gpfifo, + struct nvgpu_gpfifo_entry __user *user_gpfifo, + u32 num_entries) +{ + /* byte offsets */ + u32 gpfifo_size = + c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); + u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); + u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); + u32 end = start + len; /* exclusive */ + struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; + struct nvgpu_gpfifo_entry *cpu_src; + int err; + + if (user_gpfifo && !c->gpfifo.pipe) { + /* + * This path (from userspace to sysmem) is special in order to + * avoid two copies unnecessarily (from user to pipe, then from + * pipe to gpu sysmem buffer). + */ + if (end > gpfifo_size) { + /* wrap-around */ + int length0 = gpfifo_size - start; + int length1 = len - length0; + void __user *user2 = (u8 __user *)user_gpfifo + length0; + + err = copy_from_user(gpfifo_mem->cpu_va + start, + user_gpfifo, length0); + if (err) + return err; + + err = copy_from_user(gpfifo_mem->cpu_va, + user2, length1); + if (err) + return err; + } else { + err = copy_from_user(gpfifo_mem->cpu_va + start, + user_gpfifo, len); + if (err) + return err; + } + + trace_write_pushbuffer_range(c, NULL, user_gpfifo, + 0, num_entries); + goto out; + } else if (user_gpfifo) { + /* from userspace to vidmem, use the common copy path below */ + err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len); + if (err) + return err; + + cpu_src = c->gpfifo.pipe; + } else { + /* from kernel to either sysmem or vidmem, don't need + * copy_from_user so use the common path below */ + cpu_src = kern_gpfifo; + } + + if (end > gpfifo_size) { + /* wrap-around */ + int length0 = gpfifo_size - start; + int length1 = len - length0; + void *src2 = (u8 *)cpu_src + length0; + + nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0); + nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1); + } else { + nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len); + + } + + trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); + +out: + c->gpfifo.put = (c->gpfifo.put + num_entries) & + (c->gpfifo.entry_num - 1); + + return 0; +} + +int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_submit_gpfifo_args *args, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct gk20a_fence **fence_out, + struct fifo_profile_gk20a *profile) +{ + struct gk20a *g = c->g; + struct priv_cmd_entry *wait_cmd = NULL; + struct priv_cmd_entry *incr_cmd = NULL; + struct gk20a_fence *post_fence = NULL; + struct channel_gk20a_job *job = NULL; + /* we might need two extra gpfifo entries - one for pre fence + * and one for post fence. */ + const int extra_entries = 2; + bool skip_buffer_refcounting = (flags & + NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); + int err = 0; + bool need_job_tracking; + bool need_deferred_cleanup = false; + struct nvgpu_gpfifo_entry __user *user_gpfifo = args ? + (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL; + + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) + return -ENODEV; + + if (c->has_timedout) + return -ETIMEDOUT; + + if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) + return -ENOMEM; + + /* fifo not large enough for request. Return error immediately. + * Kernel can insert gpfifo entries before and after user gpfifos. + * So, add extra_entries in user request. Also, HW with fifo size N + * can accept only N-1 entreis and so the below condition */ + if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { + nvgpu_err(g, "not enough gpfifo space allocated"); + return -ENOMEM; + } + + if (!gpfifo && !args) + return -EINVAL; + + if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | + NVGPU_SUBMIT_FLAGS_FENCE_GET)) && + !fence) + return -EINVAL; + + /* an address space needs to have been bound at this point. */ + if (!gk20a_channel_as_bound(c)) { + nvgpu_err(g, + "not bound to an address space at time of gpfifo" + " submission."); + return -EINVAL; + } + + gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); + + /* update debug settings */ + nvgpu_ltc_sync_enabled(g); + + nvgpu_log_info(g, "channel %d", c->chid); + + /* + * Job tracking is necessary for any of the following conditions: + * - pre- or post-fence functionality + * - channel wdt + * - GPU rail-gating with non-deterministic channels + * - buffer refcounting + * + * If none of the conditions are met, then job tracking is not + * required and a fast submit can be done (ie. only need to write + * out userspace GPFIFO entries and update GP_PUT). + */ + need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || + (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || + c->timeout.enabled || + (g->can_railgate && !c->deterministic) || + !skip_buffer_refcounting; + + if (need_job_tracking) { + bool need_sync_framework = false; + + /* + * If the channel is to have deterministic latency and + * job tracking is required, the channel must have + * pre-allocated resources. Otherwise, we fail the submit here + */ + if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) + return -EINVAL; + + need_sync_framework = + gk20a_channel_sync_needs_sync_framework(g) || + (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && + flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); + + /* + * Deferred clean-up is necessary for any of the following + * conditions: + * - channel's deterministic flag is not set + * - dependency on sync framework, which could make the + * behavior of the clean-up operation non-deterministic + * (should not be performed in the submit path) + * - channel wdt + * - GPU rail-gating with non-deterministic channels + * - buffer refcounting + * + * If none of the conditions are met, then deferred clean-up + * is not required, and we clean-up one job-tracking + * resource in the submit path. + */ + need_deferred_cleanup = !c->deterministic || + need_sync_framework || + c->timeout.enabled || + (g->can_railgate && + !c->deterministic) || + !skip_buffer_refcounting; + + /* + * For deterministic channels, we don't allow deferred clean_up + * processing to occur. In cases we hit this, we fail the submit + */ + if (c->deterministic && need_deferred_cleanup) + return -EINVAL; + + if (!c->deterministic) { + /* + * Get a power ref unless this is a deterministic + * channel that holds them during the channel lifetime. + * This one is released by gk20a_channel_clean_up_jobs, + * via syncpt or sema interrupt, whichever is used. + */ + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to host gk20a to submit gpfifo, process %s", + current->comm); + return err; + } + } + + if (!need_deferred_cleanup) { + /* clean up a single job */ + gk20a_channel_clean_up_jobs(c, false); + } + } + + + /* Grab access to HW to deal with do_idle */ + if (c->deterministic) + nvgpu_rwsem_down_read(&g->deterministic_busy); + + if (c->deterministic && c->deterministic_railgate_allowed) { + /* + * Nope - this channel has dropped its own power ref. As + * deterministic submits don't hold power on per each submitted + * job like normal ones do, the GPU might railgate any time now + * and thus submit is disallowed. + */ + err = -EINVAL; + goto clean_up; + } + + trace_gk20a_channel_submit_gpfifo(g->name, + c->chid, + num_entries, + flags, + fence ? fence->id : 0, + fence ? fence->value : 0); + + nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + /* + * Make sure we have enough space for gpfifo entries. Check cached + * values first and then read from HW. If no space, return EAGAIN + * and let userpace decide to re-try request or not. + */ + if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { + if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { + err = -EAGAIN; + goto clean_up; + } + } + + if (c->has_timedout) { + err = -ETIMEDOUT; + goto clean_up; + } + + if (need_job_tracking) { + err = channel_gk20a_alloc_job(c, &job); + if (err) + goto clean_up; + + err = gk20a_submit_prepare_syncs(c, fence, job, + &wait_cmd, &incr_cmd, + &post_fence, + need_deferred_cleanup, + flags); + if (err) + goto clean_up_job; + } + + gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); + + if (wait_cmd) + gk20a_submit_append_priv_cmdbuf(c, wait_cmd); + + if (gpfifo || user_gpfifo) + err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, + num_entries); + if (err) + goto clean_up_job; + + /* + * And here's where we add the incr_cmd we generated earlier. It should + * always run! + */ + if (incr_cmd) + gk20a_submit_append_priv_cmdbuf(c, incr_cmd); + + if (fence_out) + *fence_out = gk20a_fence_get(post_fence); + + if (need_job_tracking) + /* TODO! Check for errors... */ + gk20a_channel_add_job(c, job, skip_buffer_refcounting); + gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); + + g->ops.fifo.userd_gp_put(g, c); + + /* No hw access beyond this point */ + if (c->deterministic) + nvgpu_rwsem_up_read(&g->deterministic_busy); + + trace_gk20a_channel_submitted_gpfifo(g->name, + c->chid, + num_entries, + flags, + post_fence ? post_fence->syncpt_id : 0, + post_fence ? post_fence->syncpt_value : 0); + + nvgpu_log_info(g, "post-submit put %d, get %d, size %d", + c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + + gk20a_fifo_profile_snapshot(profile, PROFILE_END); + + nvgpu_log_fn(g, "done"); + return err; + +clean_up_job: + channel_gk20a_free_job(c, job); +clean_up: + nvgpu_log_fn(g, "fail"); + gk20a_fence_put(post_fence); + if (c->deterministic) + nvgpu_rwsem_up_read(&g->deterministic_busy); + else if (need_deferred_cleanup) + gk20a_idle(g); + + return err; +} + diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h new file mode 100644 index 00000000..4a58b10c --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/channel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __NVGPU_CHANNEL_H__ +#define __NVGPU_CHANNEL_H__ + +#include +#include + +#include + +struct channel_gk20a; +struct nvgpu_gpfifo; +struct nvgpu_submit_gpfifo_args; +struct nvgpu_channel_fence; +struct gk20a_fence; +struct fifo_profile_gk20a; +struct nvgpu_os_linux; + +struct sync_fence; +struct sync_timeline; + +struct nvgpu_channel_completion_cb { + /* + * Signal channel owner via a callback, if set, in job cleanup with + * schedule_work. Means that something finished on the channel (perhaps + * more than one job). + */ + void (*fn)(struct channel_gk20a *, void *); + void *user_data; + /* Make access to the two above atomic */ + struct nvgpu_spinlock lock; + /* Per-channel async work task, cannot reschedule itself */ + struct work_struct work; +}; + +struct nvgpu_error_notifier { + struct dma_buf *dmabuf; + void *vaddr; + + struct nvgpu_notification *notification; + + struct nvgpu_mutex mutex; +}; + +/* + * This struct contains fence_related data. + * e.g. sync_timeline for sync_fences. + */ +struct nvgpu_os_fence_framework { + struct sync_timeline *timeline; +}; + +struct nvgpu_channel_linux { + struct channel_gk20a *ch; + + struct nvgpu_os_fence_framework fence_framework; + + struct nvgpu_channel_completion_cb completion_cb; + struct nvgpu_error_notifier error_notifier; + + struct dma_buf *cyclestate_buffer_handler; +}; + +u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); +int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); +void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l); + +struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, + void (*update_fn)(struct channel_gk20a *, void *), + void *update_fn_data, + int runlist_id, + bool is_privileged_channel); + +int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, + struct nvgpu_gpfifo_entry *gpfifo, + struct nvgpu_submit_gpfifo_args *args, + u32 num_entries, + u32 flags, + struct nvgpu_channel_fence *fence, + struct gk20a_fence **fence_out, + struct fifo_profile_gk20a *profile); + +#endif /* __NVGPU_CHANNEL_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/clk.c b/drivers/gpu/nvgpu/os/linux/clk.c new file mode 100644 index 00000000..414b17c4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/clk.c @@ -0,0 +1,165 @@ +/* + * Linux clock support + * + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include + +#include "clk.h" +#include "os_linux.h" +#include "platform_gk20a.h" + +#include "gk20a/gk20a.h" + +static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); + unsigned long ret; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + if (g->clk.tegra_clk) + ret = g->clk.cached_rate ? + g->clk.cached_rate : + clk_get_rate(g->clk.tegra_clk); + else + ret = platform->cached_rate ? + platform->cached_rate : + clk_get_rate(platform->clk[0]); + break; + case CTRL_CLK_DOMAIN_PWRCLK: + ret = clk_get_rate(platform->clk[1]); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + ret = 0; + break; + } + + return ret; +} + +static int nvgpu_linux_clk_set_rate(struct gk20a *g, + u32 api_domain, unsigned long rate) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); + int ret; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + if (g->clk.tegra_clk) { + ret = clk_set_rate(g->clk.tegra_clk, rate); + if (!ret) + g->clk.cached_rate = rate; + } else { + ret = clk_set_rate(platform->clk[0], rate); + if (!ret) + platform->cached_rate = rate; + } + break; + case CTRL_CLK_DOMAIN_PWRCLK: + ret = clk_set_rate(platform->clk[1], rate); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + ret = -EINVAL; + break; + } + + return ret; +} + +static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); + + /* + * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed + * to frequency governor is a shared user on the gbus. The latter can be + * accessed as GPU clock parent, and incorporate DVFS related data. + */ + if (g->clk.tegra_clk) + return tegra_dvfs_get_fmax_at_vmin_safe_t( + clk_get_parent(g->clk.tegra_clk)); + + if (platform->maxmin_clk_id) + return tegra_bpmp_dvfs_get_fmax_at_vmin( + platform->maxmin_clk_id); + + return 0; +} + +static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g) +{ + struct clk *c; + + c = clk_get_sys("gpu_ref", "gpu_ref"); + if (IS_ERR(c)) { + nvgpu_err(g, "failed to get GPCPLL reference clock"); + return 0; + } + + return clk_get_rate(c); +} + +static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk, + unsigned long rate) +{ + return tegra_dvfs_predict_mv_at_hz_cur_tfloor( + clk_get_parent(clk->tegra_clk), rate); +} + +static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) +{ + int ret; + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk)); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + ret = 0; + break; + } + + return ret; +} + +static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) +{ + return clk_prepare_enable(clk->tegra_clk); +} + +static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk) +{ + clk_disable_unprepare(clk->tegra_clk); +} + +void nvgpu_linux_init_clk_support(struct gk20a *g) +{ + g->ops.clk.get_rate = nvgpu_linux_clk_get_rate; + g->ops.clk.set_rate = nvgpu_linux_clk_set_rate; + g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe; + g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate; + g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor; + g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; + g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; + g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; +} diff --git a/drivers/gpu/nvgpu/os/linux/clk.h b/drivers/gpu/nvgpu/os/linux/clk.h new file mode 100644 index 00000000..614a7fd7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/clk.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVGPU_COMMON_LINUX_CLK_H + +struct gk20a; +void nvgpu_linux_init_clk_support(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c new file mode 100644 index 00000000..353f6363 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/comptags.c @@ -0,0 +1,140 @@ +/* +* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +#include + +#include "gk20a/gk20a.h" +#include "dmabuf.h" + +void gk20a_get_comptags(struct nvgpu_os_buffer *buf, + struct gk20a_comptags *comptags) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, + buf->dev); + + if (!comptags) + return; + + if (!priv) { + memset(comptags, 0, sizeof(*comptags)); + return; + } + + nvgpu_mutex_acquire(&priv->lock); + *comptags = priv->comptags; + nvgpu_mutex_release(&priv->lock); +} + +int gk20a_alloc_or_get_comptags(struct gk20a *g, + struct nvgpu_os_buffer *buf, + struct gk20a_comptag_allocator *allocator, + struct gk20a_comptags *comptags) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, + buf->dev); + u32 offset; + int err; + unsigned int ctag_granularity; + u32 lines; + + if (!priv) + return -ENOSYS; + + nvgpu_mutex_acquire(&priv->lock); + + if (priv->comptags.allocated) { + /* + * already allocated + */ + *comptags = priv->comptags; + + err = 0; + goto exit_locked; + } + + ctag_granularity = g->ops.fb.compression_page_size(g); + lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); + + /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ + if (lines < 1) { + err = -EINVAL; + goto exit_locked; + } + + /* store the allocator so we can use it when we free the ctags */ + priv->comptag_allocator = allocator; + err = gk20a_comptaglines_alloc(allocator, &offset, lines); + if (!err) { + priv->comptags.offset = offset; + priv->comptags.lines = lines; + priv->comptags.needs_clear = true; + } else { + priv->comptags.offset = 0; + priv->comptags.lines = 0; + priv->comptags.needs_clear = false; + } + + /* + * We don't report an error here if comptag alloc failed. The + * caller will simply fallback to incompressible kinds. It + * would not be safe to re-allocate comptags anyways on + * successive calls, as that would break map aliasing. + */ + err = 0; + priv->comptags.allocated = true; + + *comptags = priv->comptags; + +exit_locked: + nvgpu_mutex_release(&priv->lock); + + return err; +} + +bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, + buf->dev); + bool clear_started = false; + + if (priv) { + nvgpu_mutex_acquire(&priv->lock); + + clear_started = priv->comptags.needs_clear; + + if (!clear_started) + nvgpu_mutex_release(&priv->lock); + } + + return clear_started; +} + +void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf, + bool clear_successful) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, + buf->dev); + if (priv) { + if (clear_successful) + priv->comptags.needs_clear = false; + + nvgpu_mutex_release(&priv->lock); + } +} diff --git a/drivers/gpu/nvgpu/os/linux/cond.c b/drivers/gpu/nvgpu/os/linux/cond.c new file mode 100644 index 00000000..633c34fd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cond.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +int nvgpu_cond_init(struct nvgpu_cond *cond) +{ + init_waitqueue_head(&cond->wq); + cond->initialized = true; + + return 0; +} + +void nvgpu_cond_destroy(struct nvgpu_cond *cond) +{ + cond->initialized = false; +} + +int nvgpu_cond_signal(struct nvgpu_cond *cond) +{ + if (!cond->initialized) + return -EINVAL; + + wake_up(&cond->wq); + + return 0; +} + +int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond) +{ + if (!cond->initialized) + return -EINVAL; + + wake_up_interruptible(&cond->wq); + + return 0; +} + +int nvgpu_cond_broadcast(struct nvgpu_cond *cond) +{ + if (!cond->initialized) + return -EINVAL; + + wake_up_all(&cond->wq); + + return 0; +} + +int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond) +{ + if (!cond->initialized) + return -EINVAL; + + wake_up_interruptible_all(&cond->wq); + + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c new file mode 100644 index 00000000..a335988a --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/gr_gk20a.h" + +#include +#include +#include +#include + +#include "platform_gk20a.h" +#include "os_linux.h" +#include "ctxsw_trace.h" + +#include +#include + +#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) + +/* Userland-facing FIFO (one global + eventually one per VM) */ +struct gk20a_ctxsw_dev { + struct gk20a *g; + + struct nvgpu_ctxsw_ring_header *hdr; + struct nvgpu_ctxsw_trace_entry *ents; + struct nvgpu_ctxsw_trace_filter filter; + bool write_enabled; + struct nvgpu_cond readout_wq; + size_t size; + u32 num_ents; + + nvgpu_atomic_t vma_ref; + + struct nvgpu_mutex write_lock; +}; + + +struct gk20a_ctxsw_trace { + struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; +}; + +static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) +{ + return (hdr->write_idx == hdr->read_idx); +} + +static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) +{ + return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; +} + +static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) +{ + return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; +} + +ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, + loff_t *off) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; + struct nvgpu_ctxsw_trace_entry __user *entry = + (struct nvgpu_ctxsw_trace_entry *) buf; + size_t copied = 0; + int err; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, + "filp=%p buf=%p size=%zu", filp, buf, size); + + nvgpu_mutex_acquire(&dev->write_lock); + while (ring_is_empty(hdr)) { + nvgpu_mutex_release(&dev->write_lock); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, + !ring_is_empty(hdr), 0); + if (err) + return err; + nvgpu_mutex_acquire(&dev->write_lock); + } + + while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { + if (ring_is_empty(hdr)) + break; + + if (copy_to_user(entry, &dev->ents[hdr->read_idx], + sizeof(*entry))) { + nvgpu_mutex_release(&dev->write_lock); + return -EFAULT; + } + + hdr->read_idx++; + if (hdr->read_idx >= hdr->num_ents) + hdr->read_idx = 0; + + entry++; + copied += sizeof(*entry); + size -= sizeof(*entry); + } + + nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, + hdr->read_idx); + + *off = hdr->read_idx; + nvgpu_mutex_release(&dev->write_lock); + + return copied; +} + +static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) +{ + struct gk20a *g = dev->g; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); + nvgpu_mutex_acquire(&dev->write_lock); + dev->write_enabled = true; + nvgpu_mutex_release(&dev->write_lock); + dev->g->ops.fecs_trace.enable(dev->g); + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) +{ + struct gk20a *g = dev->g; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); + dev->g->ops.fecs_trace.disable(dev->g); + nvgpu_mutex_acquire(&dev->write_lock); + dev->write_enabled = false; + nvgpu_mutex_release(&dev->write_lock); + return 0; +} + +static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, + size_t size) +{ + struct gk20a *g = dev->g; + void *buf; + int err; + + if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) + return -EBUSY; + + err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); + if (err) + return err; + + + dev->hdr = buf; + dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); + dev->size = size; + dev->num_ents = dev->hdr->num_ents; + + nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", + dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); + return 0; +} + +int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, + void **buf, size_t *size) +{ + struct nvgpu_ctxsw_ring_header *hdr; + + *size = roundup(*size, PAGE_SIZE); + hdr = vmalloc_user(*size); + if (!hdr) + return -ENOMEM; + + hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; + hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; + hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) + / sizeof(struct nvgpu_ctxsw_trace_entry); + hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); + hdr->drop_count = 0; + hdr->read_idx = 0; + hdr->write_idx = 0; + hdr->write_seqno = 0; + + *buf = hdr; + return 0; +} + +int gk20a_ctxsw_dev_ring_free(struct gk20a *g) +{ + struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; + + nvgpu_vfree(g, dev->hdr); + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_ring_setup_args *args) +{ + struct gk20a *g = dev->g; + size_t size = args->size; + int ret; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); + + if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) + return -EINVAL; + + nvgpu_mutex_acquire(&dev->write_lock); + ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); + nvgpu_mutex_release(&dev->write_lock); + + return ret; +} + +static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_trace_filter_args *args) +{ + struct gk20a *g = dev->g; + + nvgpu_mutex_acquire(&dev->write_lock); + dev->filter = args->filter; + nvgpu_mutex_release(&dev->write_lock); + + if (g->ops.fecs_trace.set_filter) + g->ops.fecs_trace.set_filter(g, &dev->filter); + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, + struct nvgpu_ctxsw_trace_filter_args *args) +{ + nvgpu_mutex_acquire(&dev->write_lock); + args->filter = dev->filter; + nvgpu_mutex_release(&dev->write_lock); + + return 0; +} + +static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) +{ + struct gk20a *g = dev->g; + int err; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + err = gk20a_busy(g); + if (err) + return err; + + if (g->ops.fecs_trace.flush) + err = g->ops.fecs_trace.flush(g); + + if (likely(!err)) + err = g->ops.fecs_trace.poll(g); + + gk20a_idle(g); + return err; +} + +int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l; + struct gk20a *g; + struct gk20a_ctxsw_trace *trace; + struct gk20a_ctxsw_dev *dev; + int err; + size_t size; + u32 n; + + /* only one VM for now */ + const int vmid = 0; + + l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); + g = gk20a_get(&l->g); + if (!g) + return -ENODEV; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); + + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto free_ref; + } + + err = gk20a_busy(g); + if (err) + goto free_ref; + + trace = g->ctxsw_trace; + if (!trace) { + err = -ENODEV; + goto idle; + } + + /* Allow only one user for this device */ + dev = &trace->devs[vmid]; + nvgpu_mutex_acquire(&dev->write_lock); + if (dev->hdr) { + err = -EBUSY; + goto done; + } + + /* By default, allocate ring buffer big enough to accommodate + * FECS records with default event filter */ + + /* enable all traces by default */ + NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); + + /* compute max number of entries generated with this filter */ + n = g->ops.fecs_trace.max_entries(g, &dev->filter); + + size = sizeof(struct nvgpu_ctxsw_ring_header) + + n * sizeof(struct nvgpu_ctxsw_trace_entry); + nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", + size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); + + err = gk20a_ctxsw_dev_alloc_buffer(dev, size); + if (!err) { + filp->private_data = dev; + nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", + filp, dev, size); + } + +done: + nvgpu_mutex_release(&dev->write_lock); + +idle: + gk20a_idle(g); +free_ref: + if (err) + gk20a_put(g); + return err; +} + +int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); + + g->ops.fecs_trace.disable(g); + + nvgpu_mutex_acquire(&dev->write_lock); + dev->write_enabled = false; + nvgpu_mutex_release(&dev->write_lock); + + if (dev->hdr) { + dev->g->ops.fecs_trace.free_user_buffer(dev->g); + dev->hdr = NULL; + } + gk20a_put(g); + return 0; +} + +long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + switch (cmd) { + case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: + err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); + break; + case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: + err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); + break; + case NVGPU_CTXSW_IOCTL_RING_SETUP: + err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, + (struct nvgpu_ctxsw_ring_setup_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_SET_FILTER: + err = gk20a_ctxsw_dev_ioctl_set_filter(dev, + (struct nvgpu_ctxsw_trace_filter_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_GET_FILTER: + err = gk20a_ctxsw_dev_ioctl_get_filter(dev, + (struct nvgpu_ctxsw_trace_filter_args *) buf); + break; + case NVGPU_CTXSW_IOCTL_POLL: + err = gk20a_ctxsw_dev_ioctl_poll(dev); + break; + default: + dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); + + return err; +} + +unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; + unsigned int mask = 0; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + nvgpu_mutex_acquire(&dev->write_lock); + poll_wait(filp, &dev->readout_wq.wq, wait); + if (!ring_is_empty(hdr)) + mask |= POLLIN | POLLRDNORM; + nvgpu_mutex_release(&dev->write_lock); + + return mask; +} + +static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = vma->vm_private_data; + struct gk20a *g = dev->g; + + nvgpu_atomic_inc(&dev->vma_ref); + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", + nvgpu_atomic_read(&dev->vma_ref)); +} + +static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = vma->vm_private_data; + struct gk20a *g = dev->g; + + nvgpu_atomic_dec(&dev->vma_ref); + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", + nvgpu_atomic_read(&dev->vma_ref)); +} + +static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { + .open = gk20a_ctxsw_dev_vma_open, + .close = gk20a_ctxsw_dev_vma_close, +}; + +int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, + struct vm_area_struct *vma) +{ + return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); +} + +int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct gk20a_ctxsw_dev *dev = filp->private_data; + struct gk20a *g = dev->g; + int ret; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + + ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); + if (likely(!ret)) { + vma->vm_private_data = dev; + vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; + vma->vm_ops->open(vma); + } + + return ret; +} + +#ifdef CONFIG_GK20A_CTXSW_TRACE +static int gk20a_ctxsw_init_devs(struct gk20a *g) +{ + struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; + struct gk20a_ctxsw_dev *dev = trace->devs; + int err; + int i; + + for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { + dev->g = g; + dev->hdr = NULL; + dev->write_enabled = false; + nvgpu_cond_init(&dev->readout_wq); + err = nvgpu_mutex_init(&dev->write_lock); + if (err) + return err; + nvgpu_atomic_set(&dev->vma_ref, 0); + dev++; + } + return 0; +} +#endif + +int gk20a_ctxsw_trace_init(struct gk20a *g) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; + int err; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); + + /* if tracing is not supported, skip this */ + if (!g->ops.fecs_trace.init) + return 0; + + if (likely(trace)) + return 0; + + trace = nvgpu_kzalloc(g, sizeof(*trace)); + if (unlikely(!trace)) + return -ENOMEM; + g->ctxsw_trace = trace; + + err = gk20a_ctxsw_init_devs(g); + if (err) + goto fail; + + err = g->ops.fecs_trace.init(g); + if (unlikely(err)) + goto fail; + + return 0; + +fail: + memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); + nvgpu_kfree(g, trace); + g->ctxsw_trace = NULL; + return err; +#else + return 0; +#endif +} + +void gk20a_ctxsw_trace_cleanup(struct gk20a *g) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + struct gk20a_ctxsw_trace *trace; + struct gk20a_ctxsw_dev *dev; + int i; + + if (!g->ctxsw_trace) + return; + + trace = g->ctxsw_trace; + dev = trace->devs; + + for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { + nvgpu_mutex_destroy(&dev->write_lock); + dev++; + } + + nvgpu_kfree(g, g->ctxsw_trace); + g->ctxsw_trace = NULL; + + g->ops.fecs_trace.deinit(g); +#endif +} + +int gk20a_ctxsw_trace_write(struct gk20a *g, + struct nvgpu_ctxsw_trace_entry *entry) +{ + struct nvgpu_ctxsw_ring_header *hdr; + struct gk20a_ctxsw_dev *dev; + int ret = 0; + const char *reason; + u32 write_idx; + + if (!g->ctxsw_trace) + return 0; + + if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) + return -ENODEV; + + dev = &g->ctxsw_trace->devs[entry->vmid]; + hdr = dev->hdr; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, + "dev=%p hdr=%p", dev, hdr); + + nvgpu_mutex_acquire(&dev->write_lock); + + if (unlikely(!hdr)) { + /* device has been released */ + ret = -ENODEV; + goto done; + } + + write_idx = hdr->write_idx; + if (write_idx >= dev->num_ents) { + nvgpu_err(dev->g, + "write_idx=%u out of range [0..%u]", + write_idx, dev->num_ents); + ret = -ENOSPC; + reason = "write_idx out of range"; + goto disable; + } + + entry->seqno = hdr->write_seqno++; + + if (!dev->write_enabled) { + ret = -EBUSY; + reason = "write disabled"; + goto drop; + } + + if (unlikely(ring_is_full(hdr))) { + ret = -ENOSPC; + reason = "user fifo full"; + goto drop; + } + + if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { + reason = "filtered out"; + goto filter; + } + + nvgpu_log(g, gpu_dbg_ctxsw, + "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", + entry->seqno, entry->context_id, entry->pid, + entry->tag, entry->timestamp); + + dev->ents[write_idx] = *entry; + + /* ensure record is written before updating write index */ + nvgpu_smp_wmb(); + + write_idx++; + if (unlikely(write_idx >= hdr->num_ents)) + write_idx = 0; + hdr->write_idx = write_idx; + nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", + hdr->read_idx, hdr->write_idx, ring_len(hdr)); + + nvgpu_mutex_release(&dev->write_lock); + return ret; + +disable: + g->ops.fecs_trace.disable(g); + +drop: + hdr->drop_count++; + +filter: + nvgpu_log(g, gpu_dbg_ctxsw, + "dropping seqno=%d context_id=%08x pid=%lld " + "tag=%x time=%llx (%s)", + entry->seqno, entry->context_id, entry->pid, + entry->tag, entry->timestamp, reason); + +done: + nvgpu_mutex_release(&dev->write_lock); + return ret; +} + +void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) +{ + struct gk20a_ctxsw_dev *dev; + + if (!g->ctxsw_trace) + return; + + dev = &g->ctxsw_trace->devs[vmid]; + nvgpu_cond_signal_interruptible(&dev->readout_wq); +} + +void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + struct nvgpu_ctxsw_trace_entry entry = { + .vmid = 0, + .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, + .context_id = 0, + .pid = ch->tgid, + }; + + if (!g->ctxsw_trace) + return; + + g->ops.ptimer.read_ptimer(g, &entry.timestamp); + gk20a_ctxsw_trace_write(g, &entry); + gk20a_ctxsw_trace_wake_up(g, 0); +#endif + trace_gk20a_channel_reset(ch->chid, ch->tsgid); +} + +void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) +{ +#ifdef CONFIG_GK20A_CTXSW_TRACE + struct nvgpu_ctxsw_trace_entry entry = { + .vmid = 0, + .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, + .context_id = 0, + .pid = tsg->tgid, + }; + + if (!g->ctxsw_trace) + return; + + g->ops.ptimer.read_ptimer(g, &entry.timestamp); + gk20a_ctxsw_trace_write(g, &entry); + gk20a_ctxsw_trace_wake_up(g, 0); +#endif + trace_gk20a_channel_reset(~0, tsg->tsgid); +} diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h new file mode 100644 index 00000000..88ca7f25 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __CTXSW_TRACE_H__ +#define __CTXSW_TRACE_H__ + +#include + +#define GK20A_CTXSW_TRACE_NUM_DEVS 1 + +struct file; +struct inode; +struct poll_table_struct; + +struct gk20a; + +int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); +int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); +long gk20a_ctxsw_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); +ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, + size_t size, loff_t *offs); +unsigned int gk20a_ctxsw_dev_poll(struct file *filp, + struct poll_table_struct *pts); + +#endif /* __CTXSW_TRACE_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c new file mode 100644 index 00000000..8738f3e7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug.c @@ -0,0 +1,452 @@ +/* + * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_cde.h" +#include "debug_ce.h" +#include "debug_fifo.h" +#include "debug_gr.h" +#include "debug_allocator.h" +#include "debug_kmem.h" +#include "debug_pmu.h" +#include "debug_sched.h" +#include "debug_hal.h" +#include "debug_xve.h" +#include "os_linux.h" +#include "platform_gk20a.h" + +#include "gk20a/gk20a.h" + +#include +#include +#include + +#include + +unsigned int gk20a_debug_trace_cmdbuf; + +static inline void gk20a_debug_write_printk(void *ctx, const char *str, + size_t len) +{ + pr_info("%s", str); +} + +static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, + size_t len) +{ + seq_write((struct seq_file *)ctx, str, len); +} + +void gk20a_debug_output(struct gk20a_debug_output *o, + const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + o->fn(o->ctx, o->buf, len); +} + +static int gk20a_gr_dump_regs(struct gk20a *g, + struct gk20a_debug_output *o) +{ + if (g->ops.gr.dump_gr_regs) + gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); + + return 0; +} + +int gk20a_gr_debug_dump(struct gk20a *g) +{ + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_printk + }; + + gk20a_gr_dump_regs(g, &o); + + return 0; +} + +static int gk20a_gr_debug_show(struct seq_file *s, void *unused) +{ + struct device *dev = s->private; + struct gk20a *g = gk20a_get_platform(dev)->g; + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_to_seqfile, + .ctx = s, + }; + int err; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu: %d", err); + return -EINVAL; + } + + gk20a_gr_dump_regs(g, &o); + + gk20a_idle(g); + + return 0; +} + +void gk20a_debug_dump(struct gk20a *g) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_printk + }; + + if (platform->dump_platform_dependencies) + platform->dump_platform_dependencies(dev_from_gk20a(g)); + + /* HAL only initialized after 1st power-on */ + if (g->ops.debug.show_dump) + g->ops.debug.show_dump(g, &o); +} + +static int gk20a_debug_show(struct seq_file *s, void *unused) +{ + struct device *dev = s->private; + struct gk20a_debug_output o = { + .fn = gk20a_debug_write_to_seqfile, + .ctx = s, + }; + struct gk20a *g; + int err; + + g = gk20a_get_platform(dev)->g; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu: %d", err); + return -EFAULT; + } + + /* HAL only initialized after 1st power-on */ + if (g->ops.debug.show_dump) + g->ops.debug.show_dump(g, &o); + + gk20a_idle(g); + return 0; +} + +static int gk20a_gr_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_gr_debug_show, inode->i_private); +} + +static int gk20a_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_debug_show, inode->i_private); +} + +static const struct file_operations gk20a_gr_debug_fops = { + .open = gk20a_gr_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations gk20a_debug_fops = { + .open = gk20a_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) +{ + g->ops.fifo.dump_pbdma_status(g, o); + g->ops.fifo.dump_eng_status(g, o); + + gk20a_debug_dump_all_channel_status_ramfc(g, o); +} + +static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + struct gk20a *g = file->private_data; + + if (g->mm.disable_bigpage) + buf[0] = 'Y'; + else + buf[0] = 'N'; + buf[1] = '\n'; + buf[2] = 0x00; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + int buf_size; + bool bv; + struct gk20a *g = file->private_data; + + buf_size = min(count, (sizeof(buf)-1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + if (strtobool(buf, &bv) == 0) { + g->mm.disable_bigpage = bv; + gk20a_init_gpu_characteristics(g); + } + + return count; +} + +static struct file_operations disable_bigpage_fops = { + .open = simple_open, + .read = disable_bigpage_read, + .write = disable_bigpage_write, +}; + +static int railgate_residency_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + unsigned long time_since_last_state_transition_ms; + unsigned long total_rail_gate_time_ms; + unsigned long total_rail_ungate_time_ms; + + if (platform->is_railgated(dev_from_gk20a(g))) { + time_since_last_state_transition_ms = + jiffies_to_msecs(jiffies - + g->pstats.last_rail_gate_complete); + total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; + total_rail_gate_time_ms = + g->pstats.total_rail_gate_time_ms + + time_since_last_state_transition_ms; + } else { + time_since_last_state_transition_ms = + jiffies_to_msecs(jiffies - + g->pstats.last_rail_ungate_complete); + total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; + total_rail_ungate_time_ms = + g->pstats.total_rail_ungate_time_ms + + time_since_last_state_transition_ms; + } + + seq_printf(s, "Time with Rails Gated: %lu ms\n" + "Time with Rails UnGated: %lu ms\n" + "Total railgating cycles: %lu\n", + total_rail_gate_time_ms, + total_rail_ungate_time_ms, + g->pstats.railgating_cycle_count - 1); + return 0; + +} + +static int railgate_residency_open(struct inode *inode, struct file *file) +{ + return single_open(file, railgate_residency_show, inode->i_private); +} + +static const struct file_operations railgate_residency_fops = { + .open = railgate_residency_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int gk20a_railgating_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *d; + + d = debugfs_create_file( + "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g, + &railgate_residency_fops); + if (!d) + return -ENOMEM; + + return 0; +} +static ssize_t timeouts_enabled_read(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + struct gk20a *g = file->private_data; + + if (nvgpu_is_timeouts_enabled(g)) + buf[0] = 'Y'; + else + buf[0] = 'N'; + buf[1] = '\n'; + buf[2] = 0x00; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t timeouts_enabled_write(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + int buf_size; + bool timeouts_enabled; + struct gk20a *g = file->private_data; + + buf_size = min(count, (sizeof(buf)-1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + if (strtobool(buf, &timeouts_enabled) == 0) { + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + if (timeouts_enabled == false) { + /* requesting to disable timeouts */ + if (g->timeouts_disabled_by_user == false) { + nvgpu_atomic_inc(&g->timeouts_disabled_refcount); + g->timeouts_disabled_by_user = true; + } + } else { + /* requesting to enable timeouts */ + if (g->timeouts_disabled_by_user == true) { + nvgpu_atomic_dec(&g->timeouts_disabled_refcount); + g->timeouts_disabled_by_user = false; + } + } + nvgpu_mutex_release(&g->dbg_sessions_lock); + } + + return count; +} + +static const struct file_operations timeouts_enabled_fops = { + .open = simple_open, + .read = timeouts_enabled_read, + .write = timeouts_enabled_write, +}; + +void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + + l->debugfs = debugfs_create_dir(dev_name(dev), NULL); + if (!l->debugfs) + return; + + if (debugfs_symlink) + l->debugfs_alias = + debugfs_create_symlink(debugfs_symlink, + NULL, dev_name(dev)); + + debugfs_create_file("status", S_IRUGO, l->debugfs, + dev, &gk20a_debug_fops); + debugfs_create_file("gr_status", S_IRUGO, l->debugfs, + dev, &gk20a_gr_debug_fops); + debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, + l->debugfs, &gk20a_debug_trace_cmdbuf); + + debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, + l->debugfs, &g->ch_wdt_timeout_ms); + + debugfs_create_u32("disable_syncpoints", S_IRUGO, + l->debugfs, &g->disable_syncpoints); + + /* New debug logging API. */ + debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR, + l->debugfs, &g->log_mask); + debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR, + l->debugfs, &g->log_trace); + + l->debugfs_ltc_enabled = + debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, + l->debugfs, + &g->mm.ltc_enabled_target); + + l->debugfs_gr_idle_timeout_default = + debugfs_create_u32("gr_idle_timeout_default_us", + S_IRUGO|S_IWUSR, l->debugfs, + &g->gr_idle_timeout_default); + l->debugfs_timeouts_enabled = + debugfs_create_file("timeouts_enabled", + S_IRUGO|S_IWUSR, + l->debugfs, + g, + &timeouts_enabled_fops); + + l->debugfs_disable_bigpage = + debugfs_create_file("disable_bigpage", + S_IRUGO|S_IWUSR, + l->debugfs, + g, + &disable_bigpage_fops); + + l->debugfs_timeslice_low_priority_us = + debugfs_create_u32("timeslice_low_priority_us", + S_IRUGO|S_IWUSR, + l->debugfs, + &g->timeslice_low_priority_us); + l->debugfs_timeslice_medium_priority_us = + debugfs_create_u32("timeslice_medium_priority_us", + S_IRUGO|S_IWUSR, + l->debugfs, + &g->timeslice_medium_priority_us); + l->debugfs_timeslice_high_priority_us = + debugfs_create_u32("timeslice_high_priority_us", + S_IRUGO|S_IWUSR, + l->debugfs, + &g->timeslice_high_priority_us); + l->debugfs_runlist_interleave = + debugfs_create_bool("runlist_interleave", + S_IRUGO|S_IWUSR, + l->debugfs, + &g->runlist_interleave); + l->debugfs_force_preemption_gfxp = + debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, + l->debugfs, + &g->gr.ctx_vars.force_preemption_gfxp); + + l->debugfs_force_preemption_cilp = + debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, + l->debugfs, + &g->gr.ctx_vars.force_preemption_cilp); + + l->debugfs_dump_ctxsw_stats = + debugfs_create_bool("dump_ctxsw_stats_on_channel_close", + S_IRUGO|S_IWUSR, l->debugfs, + &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); + + gr_gk20a_debugfs_init(g); + gk20a_pmu_debugfs_init(g); + gk20a_railgating_debugfs_init(g); +#ifdef CONFIG_NVGPU_SUPPORT_CDE + gk20a_cde_debugfs_init(g); +#endif + gk20a_ce_debugfs_init(g); + nvgpu_alloc_debugfs_init(g); + nvgpu_hal_debugfs_init(g); + gk20a_fifo_debugfs_init(g); + gk20a_sched_debugfs_init(g); +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + nvgpu_kmem_debugfs_init(g); +#endif + if (g->pci_vendor_id) + nvgpu_xve_debugfs_init(g); +} + +void gk20a_debug_deinit(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (!l->debugfs) + return; + + gk20a_fifo_debugfs_deinit(g); + + debugfs_remove_recursive(l->debugfs); + debugfs_remove(l->debugfs_alias); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.c b/drivers/gpu/nvgpu/os/linux/debug_allocator.c new file mode 100644 index 00000000..d63a9030 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_allocator.h" +#include "os_linux.h" + +#include +#include + +#include + +static int __alloc_show(struct seq_file *s, void *unused) +{ + struct nvgpu_allocator *a = s->private; + + nvgpu_alloc_print_stats(a, s, 1); + + return 0; +} + +static int __alloc_open(struct inode *inode, struct file *file) +{ + return single_open(file, __alloc_show, inode->i_private); +} + +static const struct file_operations __alloc_fops = { + .open = __alloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (!l->debugfs_allocators) + return; + + a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, + l->debugfs_allocators, + a, &__alloc_fops); +} + +void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) +{ +} + +void nvgpu_alloc_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs); + if (IS_ERR_OR_NULL(l->debugfs_allocators)) { + l->debugfs_allocators = NULL; + return; + } +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.h b/drivers/gpu/nvgpu/os/linux/debug_allocator.h new file mode 100644 index 00000000..1b21cfc5 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_ALLOCATOR_H__ +#define __NVGPU_DEBUG_ALLOCATOR_H__ + +struct gk20a; +void nvgpu_alloc_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.c b/drivers/gpu/nvgpu/os/linux/debug_cde.c new file mode 100644 index 00000000..f0afa6ee --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_cde.c @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_cde.h" +#include "platform_gk20a.h" +#include "os_linux.h" + +#include + + +static ssize_t gk20a_cde_reload_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct nvgpu_os_linux *l = file->private_data; + gk20a_cde_reload(l); + return count; +} + +static const struct file_operations gk20a_cde_reload_fops = { + .open = simple_open, + .write = gk20a_cde_reload_write, +}; + +void gk20a_cde_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + if (!platform->has_cde) + return; + + debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, + l->debugfs, &l->cde_app.shader_parameter); + debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, + l->debugfs, &l->cde_app.ctx_count); + debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, + l->debugfs, &l->cde_app.ctx_usecount); + debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, + l->debugfs, &l->cde_app.ctx_count_top); + debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, + l, &gk20a_cde_reload_fops); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.h b/drivers/gpu/nvgpu/os/linux/debug_cde.h new file mode 100644 index 00000000..4895edd6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_cde.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_CDE_H__ +#define __NVGPU_DEBUG_CDE_H__ + +struct gk20a; +void gk20a_cde_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_CDE_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.c b/drivers/gpu/nvgpu/os/linux/debug_ce.c new file mode 100644 index 00000000..cea0bb47 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_ce.h" +#include "os_linux.h" + +#include + +void gk20a_ce_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, + l->debugfs, &g->ce_app.ctx_count); + debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, + l->debugfs, &g->ce_app.app_state); + debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, + l->debugfs, &g->ce_app.next_ctx_id); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.h b/drivers/gpu/nvgpu/os/linux/debug_ce.h new file mode 100644 index 00000000..2a8750c4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_CE_H__ +#define __NVGPU_DEBUG_CE_H__ + +struct gk20a; +void gk20a_ce_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_CE_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_clk.c b/drivers/gpu/nvgpu/os/linux/debug_clk.c new file mode 100644 index 00000000..2484d44b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_clk.c @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +#include "gm20b/clk_gm20b.h" +#include "os_linux.h" +#include "platform_gk20a.h" + +static int rate_get(void *data, u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + struct clk_gk20a *clk = &g->clk; + + *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq); + return 0; +} +static int rate_set(void *data, u64 val) +{ + struct gk20a *g = (struct gk20a *)data; + return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val); +} +DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); + +static int pll_reg_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct nvgpu_clk_pll_debug_data d; + u32 reg, m, n, pl, f; + int err = 0; + + if (g->ops.clk.get_pll_debug_data) { + err = g->ops.clk.get_pll_debug_data(g, &d); + if (err) + return err; + } else { + return -EINVAL; + } + + seq_printf(s, "bypassctrl = %s, ", + d.trim_sys_bypassctrl_val ? "bypass" : "vco"); + seq_printf(s, "sel_vco = %s, ", + d.trim_sys_sel_vco_val ? "vco" : "bypass"); + + seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val, + d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled", + d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked", + d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off"); + + reg = d.trim_sys_gpcpll_coeff_val; + m = d.trim_sys_gpcpll_coeff_mdiv; + n = d.trim_sys_gpcpll_coeff_ndiv; + pl = d.trim_sys_gpcpll_coeff_pldiv; + f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl)); + seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); + seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); + + seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n", + d.trim_sys_gpcpll_dvfs0_val, + d.trim_sys_gpcpll_dvfs0_dfs_coeff, + d.trim_sys_gpcpll_dvfs0_dfs_det_max, + d.trim_sys_gpcpll_dvfs0_dfs_dc_offset); + + return 0; +} + +static int pll_reg_open(struct inode *inode, struct file *file) +{ + return single_open(file, pll_reg_show, inode->i_private); +} + +static const struct file_operations pll_reg_fops = { + .open = pll_reg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pll_reg_raw_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct nvgpu_clk_pll_debug_data d; + u32 reg; + int err = 0; + + if (g->ops.clk.get_pll_debug_data) { + err = g->ops.clk.get_pll_debug_data(g, &d); + if (err) + return err; + } else { + return -EINVAL; + } + + seq_puts(s, "GPCPLL REGISTERS:\n"); + for (reg = d.trim_sys_gpcpll_cfg_reg; + reg <= d.trim_sys_gpcpll_dvfs2_reg; + reg += sizeof(u32)) + seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); + + seq_puts(s, "\nGPC CLK OUT REGISTERS:\n"); + + seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg, + d.trim_sys_sel_vco_val); + seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg, + d.trim_sys_gpc2clk_out_val); + seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg, + d.trim_sys_bypassctrl_val); + + return 0; +} + +static int pll_reg_raw_open(struct inode *inode, struct file *file) +{ + return single_open(file, pll_reg_raw_show, inode->i_private); +} + +static ssize_t pll_reg_raw_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct gk20a *g = file->f_path.dentry->d_inode->i_private; + char buf[80]; + u32 reg, val; + int err = 0; + + if (sizeof(buf) <= count) + return -EINVAL; + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + /* terminate buffer and trim - white spaces may be appended + * at the end when invoked from shell command line */ + buf[count] = '\0'; + strim(buf); + + if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2) + return -EINVAL; + + if (g->ops.clk.pll_reg_write(g, reg, val)) + err = g->ops.clk.pll_reg_write(g, reg, val); + else + err = -EINVAL; + + return err; +} + +static const struct file_operations pll_reg_raw_fops = { + .open = pll_reg_raw_open, + .read = seq_read, + .write = pll_reg_raw_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int monitor_get(void *data, u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + int err = 0; + + if (g->ops.clk.get_gpcclk_clock_counter) + err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val); + else + err = -EINVAL; + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); + +static int voltage_get(void *data, u64 *val) +{ + struct gk20a *g = (struct gk20a *)data; + int err = 0; + + if (g->ops.clk.get_voltage) + err = g->ops.clk.get_voltage(&g->clk, val); + else + err = -EINVAL; + + return err; +} +DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n"); + +static int pll_param_show(struct seq_file *s, void *data) +{ + struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); + + seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n", + gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope, + gpc_pll_params->vco_ctrl); + return 0; +} + +static int pll_param_open(struct inode *inode, struct file *file) +{ + return single_open(file, pll_param_show, inode->i_private); +} + +static const struct file_operations pll_param_fops = { + .open = pll_param_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int gm20b_clk_init_debugfs(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *d; + + if (!l->debugfs) + return -EINVAL; + + d = debugfs_create_file( + "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops); + if (!d) + goto err_out; + + d = debugfs_create_file("pll_reg_raw", + S_IRUGO, l->debugfs, g, &pll_reg_raw_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "monitor", S_IRUGO, l->debugfs, g, &monitor_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "voltage", S_IRUGO, l->debugfs, g, &voltage_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops); + if (!d) + goto err_out; + + d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs, + (u32 *)&g->clk.gpc_pll.mode); + if (!d) + goto err_out; + + d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO, + l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq); + if (!d) + goto err_out; + + return 0; + +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + return -ENOMEM; +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.c b/drivers/gpu/nvgpu/os/linux/debug_fifo.c new file mode 100644 index 00000000..2b5674c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.c @@ -0,0 +1,378 @@ +/* + * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_fifo.h" +#include "os_linux.h" + +#include +#include + +#include +#include + +void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); + +static void *gk20a_fifo_sched_debugfs_seq_start( + struct seq_file *s, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void *gk20a_fifo_sched_debugfs_seq_next( + struct seq_file *s, void *v, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + ++(*pos); + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void gk20a_fifo_sched_debugfs_seq_stop( + struct seq_file *s, void *v) +{ +} + +static int gk20a_fifo_sched_debugfs_seq_show( + struct seq_file *s, void *v) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = v; + struct tsg_gk20a *tsg = NULL; + + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + u32 runlist_id; + int ret = SEQ_SKIP; + u32 engine_id; + + engine_id = gk20a_fifo_get_gr_engine_id(g); + engine_info = (f->engine_info + engine_id); + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + if (ch == f->channel) { + seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); + seq_puts(s, " (usecs) (msecs)\n"); + ret = 0; + } + + if (!test_bit(ch->chid, runlist->active_channels)) + return ret; + + if (gk20a_channel_get(ch)) { + tsg = tsg_gk20a_from_ch(ch); + + if (tsg) + seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", + ch->chid, + ch->tsgid, + ch->tgid, + tsg->timeslice_us, + ch->timeout_ms_max, + tsg->interleave_level, + tsg->gr_ctx.graphics_preempt_mode, + tsg->gr_ctx.compute_preempt_mode); + gk20a_channel_put(ch); + } + return 0; +} + +static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { + .start = gk20a_fifo_sched_debugfs_seq_start, + .next = gk20a_fifo_sched_debugfs_seq_next, + .stop = gk20a_fifo_sched_debugfs_seq_stop, + .show = gk20a_fifo_sched_debugfs_seq_show +}; + +static int gk20a_fifo_sched_debugfs_open(struct inode *inode, + struct file *file) +{ + struct gk20a *g = inode->i_private; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); + if (err) + return err; + + nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); + + ((struct seq_file *)file->private_data)->private = inode->i_private; + return 0; +}; + +/* + * The file operations structure contains our open function along with + * set of the canned seq_ ops. + */ +static const struct file_operations gk20a_fifo_sched_debugfs_fops = { + .owner = THIS_MODULE, + .open = gk20a_fifo_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static int gk20a_fifo_profile_enable(void *data, u64 val) +{ + struct gk20a *g = (struct gk20a *) data; + struct fifo_gk20a *f = &g->fifo; + + + nvgpu_mutex_acquire(&f->profile.lock); + if (val == 0) { + if (f->profile.enabled) { + f->profile.enabled = false; + nvgpu_ref_put(&f->profile.ref, + __gk20a_fifo_profile_free); + } + } else { + if (!f->profile.enabled) { + /* not kref init as it can have a running condition if + * we enable/disable/enable while kickoff is happening + */ + if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { + f->profile.data = nvgpu_vzalloc(g, + FIFO_PROFILING_ENTRIES * + sizeof(struct fifo_profile_gk20a)); + f->profile.sorted = nvgpu_vzalloc(g, + FIFO_PROFILING_ENTRIES * + sizeof(u64)); + if (!(f->profile.data && f->profile.sorted)) { + nvgpu_vfree(g, f->profile.data); + nvgpu_vfree(g, f->profile.sorted); + nvgpu_mutex_release(&f->profile.lock); + return -ENOMEM; + } + nvgpu_ref_init(&f->profile.ref); + } + atomic_set(&f->profile.get.atomic_var, 0); + f->profile.enabled = true; + } + } + nvgpu_mutex_release(&f->profile.lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE( + gk20a_fifo_profile_enable_debugfs_fops, + NULL, + gk20a_fifo_profile_enable, + "%llu\n" +); + +static int __profile_cmp(const void *a, const void *b) +{ + return *((unsigned long long *) a) - *((unsigned long long *) b); +} + +/* + * This uses about 800b in the stack, but the function using it is not part + * of a callstack where much memory is being used, so it is fine + */ +#define PERCENTILE_WIDTH 5 +#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) + +static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, + u64 *percentiles, u32 index_end, u32 index_start) +{ + unsigned int nelem = 0; + unsigned int index; + struct fifo_profile_gk20a *profile; + + for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { + profile = &g->fifo.profile.data[index]; + + if (profile->timestamp[index_end] > + profile->timestamp[index_start]) { + /* This is a valid element */ + g->fifo.profile.sorted[nelem] = + profile->timestamp[index_end] - + profile->timestamp[index_start]; + nelem++; + } + } + + /* sort it */ + sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), + __profile_cmp, NULL); + + /* build ranges */ + for (index = 0; index < PERCENTILE_RANGES; index++) { + percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : + g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * + nelem)/100 - 1]; + } + return nelem; +} + +static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + unsigned int get, nelem, index; + /* + * 800B in the stack, but function is declared statically and only + * called from debugfs handler + */ + u64 percentiles_ioctl[PERCENTILE_RANGES]; + u64 percentiles_kickoff[PERCENTILE_RANGES]; + u64 percentiles_jobtracking[PERCENTILE_RANGES]; + u64 percentiles_append[PERCENTILE_RANGES]; + u64 percentiles_userd[PERCENTILE_RANGES]; + + if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { + seq_printf(s, "Profiling disabled\n"); + return 0; + } + + get = atomic_read(&g->fifo.profile.get.atomic_var); + + __gk20a_fifo_create_stats(g, percentiles_ioctl, + PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_kickoff, + PROFILE_END, PROFILE_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_jobtracking, + PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); + __gk20a_fifo_create_stats(g, percentiles_append, + PROFILE_APPEND, PROFILE_JOB_TRACKING); + nelem = __gk20a_fifo_create_stats(g, percentiles_userd, + PROFILE_END, PROFILE_APPEND); + + seq_printf(s, "Number of kickoffs: %d\n", nelem); + seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); + + for (index = 0; index < PERCENTILE_RANGES; index++) + seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", + PERCENTILE_WIDTH * (index+1), + percentiles_ioctl[index], + percentiles_kickoff[index], + percentiles_append[index], + percentiles_jobtracking[index], + percentiles_userd[index]); + + nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); + + return 0; +} + +static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_fifo_profile_stats, inode->i_private); +} + +static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { + .open = gk20a_fifo_profile_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void gk20a_fifo_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *gpu_root = l->debugfs; + struct dentry *fifo_root; + struct dentry *profile_root; + + fifo_root = debugfs_create_dir("fifo", gpu_root); + if (IS_ERR_OR_NULL(fifo_root)) + return; + + nvgpu_log(g, gpu_dbg_info, "g=%p", g); + + debugfs_create_file("sched", 0600, fifo_root, g, + &gk20a_fifo_sched_debugfs_fops); + + profile_root = debugfs_create_dir("profile", fifo_root); + if (IS_ERR_OR_NULL(profile_root)) + return; + + nvgpu_mutex_init(&g->fifo.profile.lock); + g->fifo.profile.enabled = false; + atomic_set(&g->fifo.profile.get.atomic_var, 0); + atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); + + debugfs_create_file("enable", 0600, profile_root, g, + &gk20a_fifo_profile_enable_debugfs_fops); + + debugfs_create_file("stats", 0600, profile_root, g, + &gk20a_fifo_profile_stats_debugfs_fops); + +} + +void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) +{ + if (profile) + profile->timestamp[idx] = nvgpu_current_time_ns(); +} + +void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) +{ + struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, + profile.ref); + nvgpu_vfree(f->g, f->profile.data); + nvgpu_vfree(f->g, f->profile.sorted); +} + +/* Get the next element in the ring buffer of profile entries + * and grab a reference to the structure + */ +struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct fifo_profile_gk20a *profile; + unsigned int index; + + /* If kref is zero, profiling is not enabled */ + if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) + return NULL; + index = atomic_inc_return(&f->profile.get.atomic_var); + profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; + + return profile; +} + +/* Free the reference to the structure. This allows deferred cleanups */ +void gk20a_fifo_profile_release(struct gk20a *g, + struct fifo_profile_gk20a *profile) +{ + nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); +} + +void gk20a_fifo_debugfs_deinit(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + + nvgpu_mutex_acquire(&f->profile.lock); + if (f->profile.enabled) { + f->profile.enabled = false; + nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); + } + nvgpu_mutex_release(&f->profile.lock); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.h b/drivers/gpu/nvgpu/os/linux/debug_fifo.h new file mode 100644 index 00000000..46ac853e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_FIFO_H__ +#define __NVGPU_DEBUG_FIFO_H__ + +struct gk20a; +void gk20a_fifo_debugfs_init(struct gk20a *g); +void gk20a_fifo_debugfs_deinit(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_FIFO_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.c b/drivers/gpu/nvgpu/os/linux/debug_gr.c new file mode 100644 index 00000000..d54c6d63 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_gr.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_gr.h" +#include "os_linux.h" + +#include + +int gr_gk20a_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->debugfs_gr_default_attrib_cb_size = + debugfs_create_u32("gr_default_attrib_cb_size", + S_IRUGO|S_IWUSR, l->debugfs, + &g->gr.attrib_cb_default_size); + + return 0; +} + diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.h b/drivers/gpu/nvgpu/os/linux/debug_gr.h new file mode 100644 index 00000000..4b46acbb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_gr.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_GR_H__ +#define __NVGPU_DEBUG_GR_H__ + +struct gk20a; +int gr_gk20a_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_GR_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.c b/drivers/gpu/nvgpu/os/linux/debug_hal.c new file mode 100644 index 00000000..031e335e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_hal.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_hal.h" +#include "os_linux.h" + +#include +#include + +/* Format and print a single function pointer to the specified seq_file. */ +static void __hal_print_op(struct seq_file *s, void *op_ptr) +{ + seq_printf(s, "%pF\n", op_ptr); +} + +/* + * Prints an array of function pointer addresses in op_ptrs to the + * specified seq_file + */ +static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops) +{ + int i; + + for (i = 0; i < num_ops; i++) + __hal_print_op(s, op_ptrs[i]); +} + +/* + * Show file operation, which generates content of the file once. Prints a list + * of gpu operations as defined by gops and the corresponding function pointer + * destination addresses. Relies on no compiler reordering of struct fields and + * assumption that all members are function pointers. + */ +static int __hal_show(struct seq_file *s, void *unused) +{ + struct gpu_ops *gops = s->private; + + __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *)); + + return 0; +} + +static int __hal_open(struct inode *inode, struct file *file) +{ + return single_open(file, __hal_show, inode->i_private); +} + +static const struct file_operations __hal_fops = { + .open = __hal_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_hal_debugfs_fini(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (!(l->debugfs_hal == NULL)) + debugfs_remove_recursive(l->debugfs_hal); +} + +void nvgpu_hal_debugfs_init(struct gk20a *g) +{ + struct dentry *d; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (!l->debugfs) + return; + l->debugfs_hal = debugfs_create_dir("hal", l->debugfs); + if (IS_ERR_OR_NULL(l->debugfs_hal)) { + l->debugfs_hal = NULL; + return; + } + + /* Pass along reference to the gpu_ops struct as private data */ + d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal, + &g->ops, &__hal_fops); + if (!d) { + nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__); + debugfs_remove_recursive(l->debugfs_hal); + return; + } +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.h b/drivers/gpu/nvgpu/os/linux/debug_hal.h new file mode 100644 index 00000000..eee6f234 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_hal.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_HAL_H__ +#define __NVGPU_DEBUG_HAL_H__ + +struct gk20a; +void nvgpu_hal_debugfs_fini(struct gk20a *g); +void nvgpu_hal_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_HAL_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.c b/drivers/gpu/nvgpu/os/linux/debug_kmem.c new file mode 100644 index 00000000..a0c7d47d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.c @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +#include "os_linux.h" +#include "debug_kmem.h" +#include "kmem_priv.h" + +/** + * to_human_readable_bytes - Determine suffix for passed size. + * + * @bytes - Number of bytes to generate a suffix for. + * @hr_bytes [out] - The human readable number of bytes. + * @hr_suffix [out] - The suffix for the HR number of bytes. + * + * Computes a human readable decomposition of the passed number of bytes. The + * suffix for the bytes is passed back through the @hr_suffix pointer. The right + * number of bytes is then passed back in @hr_bytes. This returns the following + * ranges: + * + * 0 - 1023 B + * 1 - 1023 KB + * 1 - 1023 MB + * 1 - 1023 GB + * 1 - 1023 TB + * 1 - ... PB + */ +static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, + const char **hr_suffix) +{ + static const char *suffixes[] = + { "B", "KB", "MB", "GB", "TB", "PB" }; + + u64 suffix_ind = 0; + + while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { + bytes >>= 10; + suffix_ind++; + } + + /* + * Handle case where bytes > 1023PB. + */ + suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? + suffix_ind : ARRAY_SIZE(suffixes) - 1; + + *hr_bytes = bytes; + *hr_suffix = suffixes[suffix_ind]; +} + +/** + * print_hr_bytes - Print human readable bytes + * + * @s - A seq_file to print to. May be NULL. + * @msg - A message to print before the bytes. + * @bytes - Number of bytes. + * + * Print @msg followed by the human readable decomposition of the passed number + * of bytes. + * + * If @s is NULL then this prints will be made to the kernel log. + */ +static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) +{ + u64 hr_bytes; + const char *hr_suffix; + + __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); + __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); +} + +/** + * print_histogram - Build a histogram of the memory usage. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + */ +static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + int i; + u64 pot_min, pot_max; + u64 nr_buckets; + unsigned int *buckets; + unsigned int total_allocs; + struct nvgpu_rbtree_node *node; + static const char histogram_line[] = + "++++++++++++++++++++++++++++++++++++++++"; + + /* + * pot_min is essentially a round down to the nearest power of 2. This + * is the start of the histogram. pot_max is just a round up to the + * nearest power of two. Each histogram bucket is one power of two so + * the histogram buckets are exponential. + */ + pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); + pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); + + nr_buckets = __ffs(pot_max) - __ffs(pot_min); + + buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); + if (!buckets) { + __pstat(s, "OOM: could not allocate bucket storage!?\n"); + return; + } + + /* + * Iterate across all of the allocs and determine what bucket they + * should go in. Round the size down to the nearest power of two to + * find the right bucket. + */ + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + int b; + u64 bucket_min; + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + bucket_min = (u64)rounddown_pow_of_two(alloc->size); + if (bucket_min < tracker->min_alloc) + bucket_min = tracker->min_alloc; + + b = __ffs(bucket_min) - __ffs(pot_min); + + /* + * Handle the one case were there's an alloc exactly as big as + * the maximum bucket size of the largest bucket. Most of the + * buckets have an inclusive minimum and exclusive maximum. But + * the largest bucket needs to have an _inclusive_ maximum as + * well. + */ + if (b == (int)nr_buckets) + b--; + + buckets[b]++; + + nvgpu_rbtree_enum_next(&node, node); + } + + total_allocs = 0; + for (i = 0; i < (int)nr_buckets; i++) + total_allocs += buckets[i]; + + __pstat(s, "Alloc histogram:\n"); + + /* + * Actually compute the histogram lines. + */ + for (i = 0; i < (int)nr_buckets; i++) { + char this_line[sizeof(histogram_line) + 1]; + u64 line_length; + u64 hr_bytes; + const char *hr_suffix; + + memset(this_line, 0, sizeof(this_line)); + + /* + * Compute the normalized line length. Cant use floating point + * so we will just multiply everything by 1000 and use fixed + * point. + */ + line_length = (1000 * buckets[i]) / total_allocs; + line_length *= sizeof(histogram_line); + line_length /= 1000; + + memset(this_line, '+', line_length); + + __to_human_readable_bytes(1 << (__ffs(pot_min) + i), + &hr_bytes, &hr_suffix); + __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", + hr_bytes, hr_bytes << 1, + hr_suffix, buckets[i], this_line); + } +} + +/** + * nvgpu_kmem_print_stats - Print kmem tracking stats. + * + * @tracker The tracking to pull data from. + * @s A seq_file to dump info into. + * + * Print stats from a tracker. If @s is non-null then seq_printf() will be + * used with @s. Otherwise the stats are pr_info()ed. + */ +void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + nvgpu_lock_tracker(tracker); + + __pstat(s, "Mem tracker: %s\n\n", tracker->name); + + __pstat(s, "Basic Stats:\n"); + __pstat(s, " Number of allocs %lld\n", + tracker->nr_allocs); + __pstat(s, " Number of frees %lld\n", + tracker->nr_frees); + print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); + print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); + print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); + print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); + print_hr_bytes(s, " Bytes allocated (real) ", + tracker->bytes_alloced_real); + print_hr_bytes(s, " Bytes freed (real) ", + tracker->bytes_freed_real); + __pstat(s, "\n"); + + print_histogram(tracker, s); + + nvgpu_unlock_tracker(tracker); +} + +static int __kmem_tracking_show(struct seq_file *s, void *unused) +{ + struct nvgpu_mem_alloc_tracker *tracker = s->private; + + nvgpu_kmem_print_stats(tracker, s); + + return 0; +} + +static int __kmem_tracking_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_tracking_show, inode->i_private); +} + +static const struct file_operations __kmem_tracking_fops = { + .open = __kmem_tracking_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __kmem_traces_dump_tracker(struct gk20a *g, + struct nvgpu_mem_alloc_tracker *tracker, + struct seq_file *s) +{ + struct nvgpu_rbtree_node *node; + + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + kmem_print_mem_alloc(g, alloc, s); + + nvgpu_rbtree_enum_next(&node, node); + } + + return 0; +} + +static int __kmem_traces_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + + nvgpu_lock_tracker(g->vmallocs); + seq_puts(s, "Oustanding vmallocs:\n"); + __kmem_traces_dump_tracker(g, g->vmallocs, s); + seq_puts(s, "\n"); + nvgpu_unlock_tracker(g->vmallocs); + + nvgpu_lock_tracker(g->kmallocs); + seq_puts(s, "Oustanding kmallocs:\n"); + __kmem_traces_dump_tracker(g, g->kmallocs, s); + nvgpu_unlock_tracker(g->kmallocs); + + return 0; +} + +static int __kmem_traces_open(struct inode *inode, struct file *file) +{ + return single_open(file, __kmem_traces_show, inode->i_private); +} + +static const struct file_operations __kmem_traces_fops = { + .open = __kmem_traces_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nvgpu_kmem_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *node; + + l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); + if (IS_ERR_OR_NULL(l->debugfs_kmem)) + return; + + node = debugfs_create_file(g->vmallocs->name, S_IRUGO, + l->debugfs_kmem, + g->vmallocs, &__kmem_tracking_fops); + node = debugfs_create_file(g->kmallocs->name, S_IRUGO, + l->debugfs_kmem, + g->kmallocs, &__kmem_tracking_fops); + node = debugfs_create_file("traces", S_IRUGO, + l->debugfs_kmem, + g, &__kmem_traces_fops); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.h b/drivers/gpu/nvgpu/os/linux/debug_kmem.h new file mode 100644 index 00000000..44322b53 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_KMEM_H__ +#define __NVGPU_DEBUG_KMEM_H__ + +struct gk20a; +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE +void nvgpu_kmem_debugfs_init(struct gk20a *g); +#endif + +#endif /* __NVGPU_DEBUG_KMEM_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.c b/drivers/gpu/nvgpu/os/linux/debug_pmu.c new file mode 100644 index 00000000..f4ed992d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.c @@ -0,0 +1,481 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include "debug_pmu.h" +#include "os_linux.h" + +#include +#include +#include + +static int lpwr_debug_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + if (g->ops.pmu.pmu_pg_engines_feature_list && + g->ops.pmu.pmu_pg_engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { + seq_printf(s, "PSTATE: %u\n" + "RPPG Enabled: %u\n" + "RPPG ref count: %u\n" + "RPPG state: %u\n" + "MSCG Enabled: %u\n" + "MSCG pstate state: %u\n" + "MSCG transition state: %u\n", + g->ops.clk_arb.get_current_pstate(g), + g->elpg_enabled, g->pmu.elpg_refcnt, + g->pmu.elpg_stat, g->mscg_enabled, + g->pmu.mscg_stat, g->pmu.mscg_transition_state); + + } else + seq_printf(s, "ELPG Enabled: %u\n" + "ELPG ref count: %u\n" + "ELPG state: %u\n", + g->elpg_enabled, g->pmu.elpg_refcnt, + g->pmu.elpg_stat); + + return 0; + +} + +static int lpwr_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, lpwr_debug_show, inode->i_private); +} + +static const struct file_operations lpwr_debug_fops = { + .open = lpwr_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mscg_stat_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + u64 total_ingating, total_ungating, residency, divisor, dividend; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + int err; + + /* Don't unnecessarily power on the device */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); + gk20a_idle(g); + } + total_ingating = g->pg_ingating_time_us + + (u64)pg_stat_data.ingating_time; + total_ungating = g->pg_ungating_time_us + + (u64)pg_stat_data.ungating_time; + + divisor = total_ingating + total_ungating; + + /* We compute the residency on a scale of 1000 */ + dividend = total_ingating * 1000; + + if (divisor) + residency = div64_u64(dividend, divisor); + else + residency = 0; + + seq_printf(s, + "Time in MSCG: %llu us\n" + "Time out of MSCG: %llu us\n" + "MSCG residency ratio: %llu\n" + "MSCG Entry Count: %u\n" + "MSCG Avg Entry latency %u\n" + "MSCG Avg Exit latency %u\n", + total_ingating, total_ungating, + residency, pg_stat_data.gating_cnt, + pg_stat_data.avg_entry_latency_us, + pg_stat_data.avg_exit_latency_us); + return 0; + +} + +static int mscg_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, mscg_stat_show, inode->i_private); +} + +static const struct file_operations mscg_stat_fops = { + .open = mscg_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mscg_transitions_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u32 total_gating_cnt; + int err; + + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); + gk20a_idle(g); + } + total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; + + seq_printf(s, "%u\n", total_gating_cnt); + return 0; + +} + +static int mscg_transitions_open(struct inode *inode, struct file *file) +{ + return single_open(file, mscg_transitions_show, inode->i_private); +} + +static const struct file_operations mscg_transitions_fops = { + .open = mscg_transitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int elpg_stat_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u64 total_ingating, total_ungating, residency, divisor, dividend; + int err; + + /* Don't unnecessarily power on the device */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); + gk20a_idle(g); + } + total_ingating = g->pg_ingating_time_us + + (u64)pg_stat_data.ingating_time; + total_ungating = g->pg_ungating_time_us + + (u64)pg_stat_data.ungating_time; + divisor = total_ingating + total_ungating; + + /* We compute the residency on a scale of 1000 */ + dividend = total_ingating * 1000; + + if (divisor) + residency = div64_u64(dividend, divisor); + else + residency = 0; + + seq_printf(s, + "Time in ELPG: %llu us\n" + "Time out of ELPG: %llu us\n" + "ELPG residency ratio: %llu\n" + "ELPG Entry Count: %u\n" + "ELPG Avg Entry latency %u us\n" + "ELPG Avg Exit latency %u us\n", + total_ingating, total_ungating, + residency, pg_stat_data.gating_cnt, + pg_stat_data.avg_entry_latency_us, + pg_stat_data.avg_exit_latency_us); + return 0; + +} + +static int elpg_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_stat_show, inode->i_private); +} + +static const struct file_operations elpg_stat_fops = { + .open = elpg_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int elpg_transitions_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct pmu_pg_stats_data pg_stat_data = { 0 }; + u32 total_gating_cnt; + int err; + + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_pmu_get_pg_stats(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); + gk20a_idle(g); + } + total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; + + seq_printf(s, "%u\n", total_gating_cnt); + return 0; + +} + +static int elpg_transitions_open(struct inode *inode, struct file *file) +{ + return single_open(file, elpg_transitions_show, inode->i_private); +} + +static const struct file_operations elpg_transitions_fops = { + .open = elpg_transitions_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int falc_trace_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + struct nvgpu_pmu *pmu = &g->pmu; + u32 i = 0, j = 0, k, l, m; + char part_str[40]; + void *tracebuffer; + char *trace; + u32 *trace1; + + /* allocate system memory to copy pmu trace buffer */ + tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); + if (tracebuffer == NULL) + return -ENOMEM; + + /* read pmu traces into system memory buffer */ + nvgpu_mem_rd_n(g, &pmu->trace_buf, + 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); + + trace = (char *)tracebuffer; + trace1 = (u32 *)tracebuffer; + + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { + for (j = 0; j < 0x40; j++) + if (trace1[(i / 4) + j]) + break; + if (j == 0x40) + break; + seq_printf(s, "Index %x: ", trace1[(i / 4)]); + l = 0; + m = 0; + while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { + if (k >= 40) + break; + strncpy(part_str, (trace+i+20+m), k); + part_str[k] = 0; + seq_printf(s, "%s0x%x", part_str, + trace1[(i / 4) + 1 + l]); + l++; + m += k + 2; + } + seq_printf(s, "%s", (trace+i+20+m)); + } + + nvgpu_kfree(g, tracebuffer); + return 0; +} + +static int falc_trace_open(struct inode *inode, struct file *file) +{ + return single_open(file, falc_trace_show, inode->i_private); +} + +static const struct file_operations falc_trace_fops = { + .open = falc_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int perfmon_events_enable_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); + return 0; + +} + +static int perfmon_events_enable_open(struct inode *inode, struct file *file) +{ + return single_open(file, perfmon_events_enable_show, inode->i_private); +} + +static ssize_t perfmon_events_enable_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct gk20a *g = s->private; + unsigned long val = 0; + char buf[40]; + int buf_size; + int err; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, userbuf, buf_size)) + return -EFAULT; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + /* Don't turn on gk20a unnecessarily */ + if (g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + if (val && !g->pmu.perfmon_sampling_enabled && + nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + g->pmu.perfmon_sampling_enabled = true; + g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); + } else if (!val && g->pmu.perfmon_sampling_enabled && + nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + g->pmu.perfmon_sampling_enabled = false; + g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); + } + gk20a_idle(g); + } else { + g->pmu.perfmon_sampling_enabled = val ? true : false; + } + + return count; +} + +static const struct file_operations perfmon_events_enable_fops = { + .open = perfmon_events_enable_open, + .read = seq_read, + .write = perfmon_events_enable_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int perfmon_events_count_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); + return 0; + +} + +static int perfmon_events_count_open(struct inode *inode, struct file *file) +{ + return single_open(file, perfmon_events_count_show, inode->i_private); +} + +static const struct file_operations perfmon_events_count_fops = { + .open = perfmon_events_count_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int security_show(struct seq_file *s, void *data) +{ + struct gk20a *g = s->private; + + seq_printf(s, "%d\n", g->pmu.pmu_mode); + return 0; + +} + +static int security_open(struct inode *inode, struct file *file) +{ + return single_open(file, security_show, inode->i_private); +} + +static const struct file_operations security_fops = { + .open = security_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int gk20a_pmu_debugfs_init(struct gk20a *g) +{ + struct dentry *d; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + d = debugfs_create_file( + "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g, + &lpwr_debug_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, + &mscg_stat_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "mscg_transitions", S_IRUGO, l->debugfs, g, + &mscg_transitions_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, + &elpg_stat_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "elpg_transitions", S_IRUGO, l->debugfs, g, + &elpg_transitions_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "falc_trace", S_IRUGO, l->debugfs, g, + &falc_trace_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "perfmon_events_enable", S_IRUGO, l->debugfs, g, + &perfmon_events_enable_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "perfmon_events_count", S_IRUGO, l->debugfs, g, + &perfmon_events_count_fops); + if (!d) + goto err_out; + + d = debugfs_create_file( + "pmu_security", S_IRUGO, l->debugfs, g, + &security_fops); + if (!d) + goto err_out; + return 0; +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + return -ENOMEM; +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.h b/drivers/gpu/nvgpu/os/linux/debug_pmu.h new file mode 100644 index 00000000..c4e3243d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_PMU_H__ +#define __NVGPU_DEBUG_PMU_H__ + +struct gk20a; +int gk20a_pmu_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_PMU_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.c b/drivers/gpu/nvgpu/os/linux/debug_sched.c new file mode 100644 index 00000000..5b7cbddf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_sched.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "debug_sched.h" +#include "os_linux.h" + +#include +#include + +static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + bool sched_busy = true; + + int n = sched->bitmap_size / sizeof(u64); + int i; + int err; + + err = gk20a_busy(g); + if (err) + return err; + + if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { + sched_busy = false; + nvgpu_mutex_release(&sched->busy_lock); + } + + seq_printf(s, "control_locked=%d\n", sched->control_locked); + seq_printf(s, "busy=%d\n", sched_busy); + seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); + + nvgpu_mutex_acquire(&sched->status_lock); + + seq_puts(s, "active_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); + + seq_puts(s, "recent_tsg_bitmap\n"); + for (i = 0; i < n; i++) + seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); + + nvgpu_mutex_release(&sched->status_lock); + + gk20a_idle(g); + + return 0; +} + +static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, gk20a_sched_debugfs_show, inode->i_private); +} + +static const struct file_operations gk20a_sched_debugfs_fops = { + .open = gk20a_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void gk20a_sched_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs, + g, &gk20a_sched_debugfs_fops); +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.h b/drivers/gpu/nvgpu/os/linux/debug_sched.h new file mode 100644 index 00000000..34a8f55f --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_sched.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_SCHED_H__ +#define __NVGPU_DEBUG_SCHED_H__ + +struct gk20a; +void gk20a_sched_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_SCHED_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.c b/drivers/gpu/nvgpu/os/linux/debug_xve.c new file mode 100644 index 00000000..743702a2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_xve.c @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +#include "debug_xve.h" +#include "os_linux.h" + +#include +#include + +static ssize_t xve_link_speed_write(struct file *filp, + const char __user *buff, + size_t len, loff_t *off) +{ + struct gk20a *g = ((struct seq_file *)filp->private_data)->private; + char kbuff[16]; + u32 buff_size, check_len; + u32 link_speed = 0; + int ret; + + buff_size = min_t(size_t, 16, len); + + memset(kbuff, 0, 16); + if (copy_from_user(kbuff, buff, buff_size)) + return -EFAULT; + + check_len = strlen("Gen1"); + if (strncmp(kbuff, "Gen1", check_len) == 0) + link_speed = GPU_XVE_SPEED_2P5; + else if (strncmp(kbuff, "Gen2", check_len) == 0) + link_speed = GPU_XVE_SPEED_5P0; + else if (strncmp(kbuff, "Gen3", check_len) == 0) + link_speed = GPU_XVE_SPEED_8P0; + else + nvgpu_err(g, "%s: Unknown PCIe speed: %s", + __func__, kbuff); + + if (!link_speed) + return -EINVAL; + + /* Brief pause... To help rate limit this. */ + nvgpu_msleep(250); + + /* + * And actually set the speed. Yay. + */ + ret = g->ops.xve.set_speed(g, link_speed); + if (ret) + return ret; + + return len; +} + +static int xve_link_speed_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + u32 speed; + int err; + + err = g->ops.xve.get_speed(g, &speed); + if (err) + return err; + + seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed)); + + return 0; +} + +static int xve_link_speed_open(struct inode *inode, struct file *file) +{ + return single_open(file, xve_link_speed_show, inode->i_private); +} + +static const struct file_operations xve_link_speed_fops = { + .open = xve_link_speed_open, + .read = seq_read, + .write = xve_link_speed_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int xve_available_speeds_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + u32 available_speeds; + + g->ops.xve.available_speeds(g, &available_speeds); + + seq_puts(s, "Available PCIe bus speeds:\n"); + if (available_speeds & GPU_XVE_SPEED_2P5) + seq_puts(s, " Gen1\n"); + if (available_speeds & GPU_XVE_SPEED_5P0) + seq_puts(s, " Gen2\n"); + if (available_speeds & GPU_XVE_SPEED_8P0) + seq_puts(s, " Gen3\n"); + + return 0; +} + +static int xve_available_speeds_open(struct inode *inode, struct file *file) +{ + return single_open(file, xve_available_speeds_show, inode->i_private); +} + +static const struct file_operations xve_available_speeds_fops = { + .open = xve_available_speeds_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int xve_link_control_status_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + u32 link_status; + + link_status = g->ops.xve.get_link_control_status(g); + seq_printf(s, "0x%08x\n", link_status); + + return 0; +} + +static int xve_link_control_status_open(struct inode *inode, struct file *file) +{ + return single_open(file, xve_link_control_status_show, inode->i_private); +} + +static const struct file_operations xve_link_control_status_fops = { + .open = xve_link_control_status_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int nvgpu_xve_debugfs_init(struct gk20a *g) +{ + int err = -ENODEV; + + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *gpu_root = l->debugfs; + + l->debugfs_xve = debugfs_create_dir("xve", gpu_root); + if (IS_ERR_OR_NULL(l->debugfs_xve)) + goto fail; + + /* + * These are just debug nodes. If they fail to get made it's not worth + * worrying the higher level SW. + */ + debugfs_create_file("link_speed", S_IRUGO, + l->debugfs_xve, g, + &xve_link_speed_fops); + debugfs_create_file("available_speeds", S_IRUGO, + l->debugfs_xve, g, + &xve_available_speeds_fops); + debugfs_create_file("link_control_status", S_IRUGO, + l->debugfs_xve, g, + &xve_link_control_status_fops); + + err = 0; +fail: + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.h b/drivers/gpu/nvgpu/os/linux/debug_xve.h new file mode 100644 index 00000000..f3b1ac54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_xve.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __NVGPU_DEBUG_XVE_H__ +#define __NVGPU_DEBUG_XVE_H__ + +struct gk20a; +int nvgpu_xve_debugfs_init(struct gk20a *g); + +#endif /* __NVGPU_DEBUG_SVE_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/dma.c b/drivers/gpu/nvgpu/os/linux/dma.c new file mode 100644 index 00000000..f513dcd6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dma.c @@ -0,0 +1,694 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "gk20a/gk20a.h" + +#include "platform_gk20a.h" +#include "os_linux.h" + +#ifdef __DMA_ATTRS_LONGS +#define NVGPU_DEFINE_DMA_ATTRS(x) \ + struct dma_attrs x = { \ + .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ + } +#define NVGPU_DMA_ATTR(attrs) &attrs +#else +#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0 +#define NVGPU_DMA_ATTR(attrs) attrs +#endif + +/* + * Enough to hold all the possible flags in string form. When a new flag is + * added it must be added here as well!! + */ +#define NVGPU_DMA_STR_SIZE \ + sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS") + +/* + * The returned string is kmalloc()ed here but must be freed by the caller. + */ +static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags) +{ + char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE); + int bytes_available = NVGPU_DMA_STR_SIZE; + + /* + * Return the empty buffer if there's no flags. Makes it easier on the + * calling code to just print it instead of any if (NULL) type logic. + */ + if (!flags) + return buf; + +#define APPEND_FLAG(flag, str_flag) \ + do { \ + if (flags & flag) { \ + strncat(buf, str_flag, bytes_available); \ + bytes_available -= strlen(str_flag); \ + } \ + } while (0) + + APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING "); + APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS "); +#undef APPEND_FLAG + + return buf; +} + +/** + * __dma_dbg - Debug print for DMA allocs and frees. + * + * @g - The GPU. + * @size - The requested size of the alloc (size_t). + * @flags - The flags (unsigned long). + * @type - A string describing the type (i.e: sysmem or vidmem). + * @what - A string with 'alloc' or 'free'. + * + * @flags is the DMA flags. If there are none or it doesn't make sense to print + * flags just pass 0. + * + * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function. + */ +static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags, + const char *type, const char *what) +{ + char *flags_str = NULL; + + /* + * Don't bother making the flags_str if debugging is + * not enabled. This saves a malloc and a free. + */ + if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma)) + return; + + flags_str = nvgpu_dma_flags_to_str(g, flags); + + __nvgpu_log_dbg(g, gpu_dbg_dma, + __func__, __LINE__, + "DMA %s: [%s] size=%-7zu " + "aligned=%-7zu total=%-10llukB %s", + what, type, + size, PAGE_ALIGN(size), + g->dma_memory_used >> 10, + flags_str); + + if (flags_str) + nvgpu_kfree(g, flags_str); +} + +#define dma_dbg_alloc(g, size, flags, type) \ + __dma_dbg(g, size, flags, type, "alloc") +#define dma_dbg_free(g, size, flags, type) \ + __dma_dbg(g, size, flags, type, "free") + +/* + * For after the DMA alloc is done. + */ +#define __dma_dbg_done(g, size, type, what) \ + nvgpu_log(g, gpu_dbg_dma, \ + "DMA %s: [%s] size=%-7zu Done!", \ + what, type, size); \ + +#define dma_dbg_alloc_done(g, size, type) \ + __dma_dbg_done(g, size, type, "alloc") +#define dma_dbg_free_done(g, size, type) \ + __dma_dbg_done(g, size, type, "free") + +#if defined(CONFIG_GK20A_VIDMEM) +static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, + size_t size) +{ + u64 addr = 0; + + if (at) + addr = nvgpu_alloc_fixed(allocator, at, size, 0); + else + addr = nvgpu_alloc(allocator, size); + + return addr; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, + unsigned long flags) +#define ATTR_ARG(x) *x +#else +static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, + unsigned long flags) +#define ATTR_ARG(x) x +#endif +{ + if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); + if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) + dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); +#undef ATTR_ARG +} + +int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags(g, 0, size, mem); +} + +int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, + struct nvgpu_mem *mem) +{ + if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { + /* + * Force the no-kernel-mapping flag on because we don't support + * the lack of it for vidmem - the user should not care when + * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a + * difference, the user should use the flag explicitly anyway. + * + * Incoming flags are ignored here, since bits other than the + * no-kernel-mapping flag are ignored by the vidmem mapping + * functions anyway. + */ + int err = nvgpu_dma_alloc_flags_vid(g, + NVGPU_DMA_NO_KERNEL_MAPPING, + size, mem); + + if (!err) + return 0; + /* + * Fall back to sysmem (which may then also fail) in case + * vidmem is exhausted. + */ + } + + return nvgpu_dma_alloc_flags_sys(g, flags, size, mem); +} + +int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_sys(g, 0, size, mem); +} + +int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + struct device *d = dev_from_gk20a(g); + int err; + dma_addr_t iova; + NVGPU_DEFINE_DMA_ATTRS(dma_attrs); + void *alloc_ret; + + if (nvgpu_mem_is_valid(mem)) { + nvgpu_warn(g, "memory leak !!"); + WARN_ON(1); + } + + /* + * WAR for IO coherent chips: the DMA API does not seem to generate + * mappings that work correctly. Unclear why - Bug ID: 2040115. + * + * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING + * and then make a vmap() ourselves. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + flags |= NVGPU_DMA_NO_KERNEL_MAPPING; + + /* + * Before the debug print so we see this in the total. But during + * cleanup in the fail path this has to be subtracted. + */ + g->dma_memory_used += PAGE_ALIGN(size); + + dma_dbg_alloc(g, size, flags, "sysmem"); + + /* + * Save the old size but for actual allocation purposes the size is + * going to be page aligned. + */ + mem->size = size; + size = PAGE_ALIGN(size); + + nvgpu_dma_flags_to_attrs(&dma_attrs, flags); + + alloc_ret = dma_alloc_attrs(d, size, &iova, + GFP_KERNEL|__GFP_ZERO, + NVGPU_DMA_ATTR(dma_attrs)); + if (!alloc_ret) + return -ENOMEM; + + if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { + mem->priv.pages = alloc_ret; + err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, + mem->priv.pages, + iova, size); + } else { + mem->cpu_va = alloc_ret; + err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va, + iova, size, flags); + } + if (err) + goto fail_free_dma; + + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { + mem->cpu_va = vmap(mem->priv.pages, + size >> PAGE_SHIFT, + 0, PAGE_KERNEL); + if (!mem->cpu_va) { + err = -ENOMEM; + goto fail_free_sgt; + } + } + + mem->aligned_size = size; + mem->aperture = APERTURE_SYSMEM; + mem->priv.flags = flags; + + dma_dbg_alloc_done(g, mem->size, "sysmem"); + + return 0; + +fail_free_sgt: + nvgpu_free_sgtable(g, &mem->priv.sgt); +fail_free_dma: + dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); + mem->cpu_va = NULL; + mem->priv.sgt = NULL; + mem->size = 0; + g->dma_memory_used -= mem->aligned_size; + return err; +} + +int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_vid(g, + NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); +} + +int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0); +} + +int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, + size_t size, struct nvgpu_mem *mem, u64 at) +{ +#if defined(CONFIG_GK20A_VIDMEM) + u64 addr; + int err; + struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? + &g->mm.vidmem.allocator : + &g->mm.vidmem.bootstrap_allocator; + int before_pending; + + if (nvgpu_mem_is_valid(mem)) { + nvgpu_warn(g, "memory leak !!"); + WARN_ON(1); + } + + dma_dbg_alloc(g, size, flags, "vidmem"); + + mem->size = size; + size = PAGE_ALIGN(size); + + if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) + return -ENOSYS; + + /* + * Our own allocator doesn't have any flags yet, and we can't + * kernel-map these, so require explicit flags. + */ + WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); + + nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); + before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var); + addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); + nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); + if (!addr) { + /* + * If memory is known to be freed soon, let the user know that + * it may be available after a while. + */ + if (before_pending) + return -EAGAIN; + else + return -ENOMEM; + } + + if (at) + mem->mem_flags |= NVGPU_MEM_FLAG_FIXED; + + mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); + if (!mem->priv.sgt) { + err = -ENOMEM; + goto fail_physfree; + } + + err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); + if (err) + goto fail_kfree; + + nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr); + sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); + + mem->aligned_size = size; + mem->aperture = APERTURE_VIDMEM; + mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr; + mem->allocator = vidmem_alloc; + mem->priv.flags = flags; + + nvgpu_init_list_node(&mem->clear_list_entry); + + dma_dbg_alloc_done(g, mem->size, "vidmem"); + + return 0; + +fail_kfree: + nvgpu_kfree(g, mem->priv.sgt); +fail_physfree: + nvgpu_free(&g->mm.vidmem.allocator, addr); + mem->size = 0; + return err; +#else + return -ENOSYS; +#endif +} + +int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_map_flags(vm, 0, size, mem); +} + +int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) { + /* + * Force the no-kernel-mapping flag on because we don't support + * the lack of it for vidmem - the user should not care when + * using nvgpu_dma_alloc_map and it's vidmem, or if there's a + * difference, the user should use the flag explicitly anyway. + */ + int err = nvgpu_dma_alloc_map_flags_vid(vm, + flags | NVGPU_DMA_NO_KERNEL_MAPPING, + size, mem); + + if (!err) + return 0; + /* + * Fall back to sysmem (which may then also fail) in case + * vidmem is exhausted. + */ + } + + return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem); +} + +int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem); +} + +int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem); + + if (err) + return err; + + mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, + gk20a_mem_flag_none, false, + mem->aperture); + if (!mem->gpu_va) { + err = -ENOMEM; + goto fail_free; + } + + return 0; + +fail_free: + nvgpu_dma_free(vm->mm->g, mem); + return err; +} + +int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, + struct nvgpu_mem *mem) +{ + return nvgpu_dma_alloc_map_flags_vid(vm, + NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); +} + +int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, + size_t size, struct nvgpu_mem *mem) +{ + int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem); + + if (err) + return err; + + mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, + gk20a_mem_flag_none, false, + mem->aperture); + if (!mem->gpu_va) { + err = -ENOMEM; + goto fail_free; + } + + return 0; + +fail_free: + nvgpu_dma_free(vm->mm->g, mem); + return err; +} + +static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct device *d = dev_from_gk20a(g); + + g->dma_memory_used -= mem->aligned_size; + + dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem"); + + if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && + !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && + (mem->cpu_va || mem->priv.pages)) { + /* + * Free side of WAR for bug 2040115. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + vunmap(mem->cpu_va); + + if (mem->priv.flags) { + NVGPU_DEFINE_DMA_ATTRS(dma_attrs); + + nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); + + if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { + dma_free_attrs(d, mem->aligned_size, mem->priv.pages, + sg_dma_address(mem->priv.sgt->sgl), + NVGPU_DMA_ATTR(dma_attrs)); + } else { + dma_free_attrs(d, mem->aligned_size, mem->cpu_va, + sg_dma_address(mem->priv.sgt->sgl), + NVGPU_DMA_ATTR(dma_attrs)); + } + } else { + dma_free_coherent(d, mem->aligned_size, mem->cpu_va, + sg_dma_address(mem->priv.sgt->sgl)); + } + mem->cpu_va = NULL; + mem->priv.pages = NULL; + } + + /* + * When this flag is set we expect that pages is still populated but not + * by the DMA API. + */ + if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) + nvgpu_kfree(g, mem->priv.pages); + + if (mem->priv.sgt) + nvgpu_free_sgtable(g, &mem->priv.sgt); + + dma_dbg_free_done(g, mem->size, "sysmem"); + + mem->size = 0; + mem->aligned_size = 0; + mem->aperture = APERTURE_INVALID; +} + +static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) +{ +#if defined(CONFIG_GK20A_VIDMEM) + size_t mem_size = mem->size; + + dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); + + /* Sanity check - only this supported when allocating. */ + WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); + + if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { + int err = nvgpu_vidmem_clear_list_enqueue(g, mem); + + /* + * If there's an error here then that means we can't clear the + * vidmem. That's too bad; however, we still own the nvgpu_mem + * buf so we have to free that. + * + * We don't need to worry about the vidmem allocator itself + * since when that gets cleaned up in the driver shutdown path + * all the outstanding allocs are force freed. + */ + if (err) + nvgpu_kfree(g, mem); + } else { + nvgpu_memset(g, mem, 0, 0, mem->aligned_size); + nvgpu_free(mem->allocator, + (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); + nvgpu_free_sgtable(g, &mem->priv.sgt); + + mem->size = 0; + mem->aligned_size = 0; + mem->aperture = APERTURE_INVALID; + } + + dma_dbg_free_done(g, mem_size, "vidmem"); +#endif +} + +void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem) +{ + switch (mem->aperture) { + case APERTURE_SYSMEM: + return nvgpu_dma_free_sys(g, mem); + case APERTURE_VIDMEM: + return nvgpu_dma_free_vid(g, mem); + default: + break; /* like free() on "null" memory */ + } +} + +void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem) +{ + if (mem->gpu_va) + nvgpu_gmmu_unmap(vm, mem, mem->gpu_va); + mem->gpu_va = 0; + + nvgpu_dma_free(vm->mm->g, mem); +} + +int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt, + void *cpuva, u64 iova, size_t size, unsigned long flags) +{ + int err = 0; + struct sg_table *tbl; + NVGPU_DEFINE_DMA_ATTRS(dma_attrs); + + tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); + if (!tbl) { + err = -ENOMEM; + goto fail; + } + + nvgpu_dma_flags_to_attrs(&dma_attrs, flags); + err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova, + size, NVGPU_DMA_ATTR(dma_attrs)); + if (err) + goto fail; + + sg_dma_address(tbl->sgl) = iova; + *sgt = tbl; + + return 0; + +fail: + if (tbl) + nvgpu_kfree(g, tbl); + + return err; +} + +int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt, + void *cpuva, u64 iova, size_t size) +{ + return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0); +} + +int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt, + struct page **pages, u64 iova, size_t size) +{ + int err = 0; + struct sg_table *tbl; + + tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); + if (!tbl) { + err = -ENOMEM; + goto fail; + } + + err = sg_alloc_table_from_pages(tbl, pages, + DIV_ROUND_UP(size, PAGE_SIZE), + 0, size, GFP_KERNEL); + if (err) + goto fail; + + sg_dma_address(tbl->sgl) = iova; + *sgt = tbl; + + return 0; + +fail: + if (tbl) + nvgpu_kfree(g, tbl); + + return err; +} + +void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt) +{ + sg_free_table(*sgt); + nvgpu_kfree(g, *sgt); + *sgt = NULL; +} + +bool nvgpu_iommuable(struct gk20a *g) +{ +#ifdef CONFIG_TEGRA_GK20A + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + /* + * Check against the nvgpu device to see if it's been marked as + * IOMMU'able. + */ + if (!device_is_iommuable(l->dev)) + return false; +#endif + + return true; +} diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c new file mode 100644 index 00000000..129739f0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c @@ -0,0 +1,218 @@ +/* +* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include + +#include +#include + +#include "gk20a/gk20a.h" + +#include "platform_gk20a.h" +#include "dmabuf.h" +#include "os_linux.h" + +static void gk20a_mm_delete_priv(void *_priv) +{ + struct gk20a_buffer_state *s, *s_tmp; + struct gk20a_dmabuf_priv *priv = _priv; + struct gk20a *g; + + if (!priv) + return; + + g = priv->g; + + if (priv->comptags.allocated && priv->comptags.lines) { + BUG_ON(!priv->comptag_allocator); + gk20a_comptaglines_free(priv->comptag_allocator, + priv->comptags.offset, + priv->comptags.lines); + } + + /* Free buffer states */ + nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states, + gk20a_buffer_state, list) { + gk20a_fence_put(s->fence); + nvgpu_list_del(&s->list); + nvgpu_kfree(g, s); + } + + nvgpu_kfree(g, priv); +} + +enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, + struct dma_buf *dmabuf) +{ + struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf); + bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY); + + if (buf_owner == NULL) { + /* Not nvgpu-allocated, assume system memory */ + return APERTURE_SYSMEM; + } else if (WARN_ON(buf_owner == g && unified_memory)) { + /* Looks like our video memory, but this gpu doesn't support + * it. Warn about a bug and bail out */ + nvgpu_warn(g, + "dmabuf is our vidmem but we don't have local vidmem"); + return APERTURE_INVALID; + } else if (buf_owner != g) { + /* Someone else's vidmem */ + return APERTURE_INVALID; + } else { + /* Yay, buf_owner == g */ + return APERTURE_VIDMEM; + } +} + +struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, + struct dma_buf_attachment **attachment) +{ + struct gk20a_dmabuf_priv *priv; + + priv = dma_buf_get_drvdata(dmabuf, dev); + if (WARN_ON(!priv)) + return ERR_PTR(-EINVAL); + + nvgpu_mutex_acquire(&priv->lock); + + if (priv->pin_count == 0) { + priv->attach = dma_buf_attach(dmabuf, dev); + if (IS_ERR(priv->attach)) { + nvgpu_mutex_release(&priv->lock); + return (struct sg_table *)priv->attach; + } + + priv->sgt = dma_buf_map_attachment(priv->attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(priv->sgt)) { + dma_buf_detach(dmabuf, priv->attach); + nvgpu_mutex_release(&priv->lock); + return priv->sgt; + } + } + + priv->pin_count++; + nvgpu_mutex_release(&priv->lock); + *attachment = priv->attach; + return priv->sgt; +} + +void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment, + struct sg_table *sgt) +{ + struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); + dma_addr_t dma_addr; + + if (IS_ERR(priv) || !priv) + return; + + nvgpu_mutex_acquire(&priv->lock); + WARN_ON(priv->sgt != sgt); + WARN_ON(priv->attach != attachment); + priv->pin_count--; + WARN_ON(priv->pin_count < 0); + dma_addr = sg_dma_address(priv->sgt->sgl); + if (priv->pin_count == 0) { + dma_buf_unmap_attachment(priv->attach, priv->sgt, + DMA_BIDIRECTIONAL); + dma_buf_detach(dmabuf, priv->attach); + } + nvgpu_mutex_release(&priv->lock); +} + +int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) +{ + struct gk20a *g = gk20a_get_platform(dev)->g; + struct gk20a_dmabuf_priv *priv; + + priv = dma_buf_get_drvdata(dmabuf, dev); + if (likely(priv)) + return 0; + + nvgpu_mutex_acquire(&g->mm.priv_lock); + priv = dma_buf_get_drvdata(dmabuf, dev); + if (priv) + goto priv_exist_or_err; + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) { + priv = ERR_PTR(-ENOMEM); + goto priv_exist_or_err; + } + + nvgpu_mutex_init(&priv->lock); + nvgpu_init_list_node(&priv->states); + priv->g = g; + dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); + +priv_exist_or_err: + nvgpu_mutex_release(&g->mm.priv_lock); + if (IS_ERR(priv)) + return -ENOMEM; + + return 0; +} + +int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, + u64 offset, struct gk20a_buffer_state **state) +{ + int err = 0; + struct gk20a_dmabuf_priv *priv; + struct gk20a_buffer_state *s; + struct device *dev = dev_from_gk20a(g); + + if (WARN_ON(offset >= (u64)dmabuf->size)) + return -EINVAL; + + err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); + if (err) + return err; + + priv = dma_buf_get_drvdata(dmabuf, dev); + if (WARN_ON(!priv)) + return -ENOSYS; + + nvgpu_mutex_acquire(&priv->lock); + + nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list) + if (s->offset == offset) + goto out; + + /* State not found, create state. */ + s = nvgpu_kzalloc(g, sizeof(*s)); + if (!s) { + err = -ENOMEM; + goto out; + } + + s->offset = offset; + nvgpu_init_list_node(&s->list); + nvgpu_mutex_init(&s->lock); + nvgpu_list_add_tail(&s->list, &priv->states); + +out: + nvgpu_mutex_release(&priv->lock); + if (!err) + *state = s; + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.h b/drivers/gpu/nvgpu/os/linux/dmabuf.h new file mode 100644 index 00000000..8399eaaf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dmabuf.h @@ -0,0 +1,62 @@ +/* +* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __COMMON_LINUX_DMABUF_H__ +#define __COMMON_LINUX_DMABUF_H__ + +#include +#include +#include +#include + +struct sg_table; +struct dma_buf; +struct dma_buf_attachment; +struct device; + +struct gk20a; +struct gk20a_buffer_state; + +struct gk20a_dmabuf_priv { + struct nvgpu_mutex lock; + + struct gk20a *g; + + struct gk20a_comptag_allocator *comptag_allocator; + struct gk20a_comptags comptags; + + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + int pin_count; + + struct nvgpu_list_node states; + + u64 buffer_id; +}; + +struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, + struct dma_buf_attachment **attachment); +void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment, + struct sg_table *sgt); + +int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); + +int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, + u64 offset, struct gk20a_buffer_state **state); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c new file mode 100644 index 00000000..8f33c5d2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "module.h" +#include "os_linux.h" +#include "sysfs.h" +#include "ioctl.h" +#include "gk20a/regops_gk20a.h" + +#define EMC3D_DEFAULT_RATIO 750 + +void nvgpu_kernel_restart(void *cmd) +{ + kernel_restart(cmd); +} + +static void nvgpu_init_vars(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev); + + nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq); + nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq); + + init_rwsem(&l->busy_lock); + nvgpu_rwsem_init(&g->deterministic_busy); + + nvgpu_spinlock_init(&g->mc_enable_lock); + + nvgpu_mutex_init(&platform->railgate_lock); + nvgpu_mutex_init(&g->dbg_sessions_lock); + nvgpu_mutex_init(&g->client_lock); + nvgpu_mutex_init(&g->poweron_lock); + nvgpu_mutex_init(&g->poweroff_lock); + nvgpu_mutex_init(&g->ctxsw_disable_lock); + + l->regs_saved = l->regs; + l->bar1_saved = l->bar1; + + g->emc3d_ratio = EMC3D_DEFAULT_RATIO; + + /* Set DMA parameters to allow larger sgt lists */ + dev->dma_parms = &l->dma_parms; + dma_set_max_seg_size(dev, UINT_MAX); + + /* + * A default of 16GB is the largest supported DMA size that is + * acceptable to all currently supported Tegra SoCs. + */ + if (!platform->dma_mask) + platform->dma_mask = DMA_BIT_MASK(34); + + dma_set_mask(dev, platform->dma_mask); + dma_set_coherent_mask(dev, platform->dma_mask); + + nvgpu_init_list_node(&g->profiler_objects); + + nvgpu_init_list_node(&g->boardobj_head); + nvgpu_init_list_node(&g->boardobjgrp_head); +} + +static void nvgpu_init_gr_vars(struct gk20a *g) +{ + gk20a_init_gr(g); + + nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); + g->gr.max_comptag_mem = totalram_pages + >> (10 - (PAGE_SHIFT - 10)); +} + +static void nvgpu_init_timeout(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + g->timeouts_disabled_by_user = false; + nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0); + + if (nvgpu_platform_is_silicon(g)) { + g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; + } else if (nvgpu_platform_is_fpga(g)) { + g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA; + } else { + g->gr_idle_timeout_default = (u32)ULONG_MAX; + } + g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; + g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US; +} + +static void nvgpu_init_timeslice(struct gk20a *g) +{ + g->runlist_interleave = true; + + g->timeslice_low_priority_us = 1300; + g->timeslice_medium_priority_us = 2600; + g->timeslice_high_priority_us = 5200; + + g->min_timeslice_us = 1000; + g->max_timeslice_us = 50000; +} + +static void nvgpu_init_pm_vars(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + /* + * Set up initial power settings. For non-slicon platforms, disable + * power features and for silicon platforms, read from platform data + */ + g->slcg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false; + g->blcg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false; + g->elcg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false; + g->elpg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false; + g->aelpg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false; + g->mscg_enabled = + nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false; + g->can_elpg = + nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false; + + __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG, + nvgpu_platform_is_silicon(g) ? platform->can_elcg : false); + __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG, + nvgpu_platform_is_silicon(g) ? platform->can_slcg : false); + __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG, + nvgpu_platform_is_silicon(g) ? platform->can_blcg : false); + + g->aggressive_sync_destroy = platform->aggressive_sync_destroy; + g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; + g->has_syncpoints = platform->has_syncpoints; +#ifdef CONFIG_NVGPU_SUPPORT_CDE + g->has_cde = platform->has_cde; +#endif + g->ptimer_src_freq = platform->ptimer_src_freq; + g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); + g->can_railgate = platform->can_railgate_init; + g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; + /* if default delay is not set, set default delay to 500msec */ + if (platform->railgate_delay_init) + g->railgate_delay = platform->railgate_delay_init; + else + g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT; + __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon); + + /* set default values to aelpg parameters */ + g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; + g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; + g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; + g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; + g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; + + __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm); +} + +static void nvgpu_init_vbios_vars(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos); + g->vbios_min_version = platform->vbios_min_version; +} + +static void nvgpu_init_ltc_vars(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + g->ltc_streamid = platform->ltc_streamid; +} + +static void nvgpu_init_mm_vars(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); + + g->mm.disable_bigpage = platform->disable_bigpage; + __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, + platform->honors_aperture); + __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, + platform->unified_memory); + __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, + platform->unify_address_spaces); + + nvgpu_mutex_init(&g->mm.tlb_lock); + nvgpu_mutex_init(&g->mm.priv_lock); +} + +int nvgpu_probe(struct gk20a *g, + const char *debugfs_symlink, + const char *interface_name, + struct class *class) +{ + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev); + int err = 0; + + nvgpu_init_vars(g); + nvgpu_init_gr_vars(g); + nvgpu_init_timeout(g); + nvgpu_init_timeslice(g); + nvgpu_init_pm_vars(g); + nvgpu_init_vbios_vars(g); + nvgpu_init_ltc_vars(g); + err = nvgpu_init_soc_vars(g); + if (err) { + nvgpu_err(g, "init soc vars failed"); + return err; + } + + /* Initialize the platform interface. */ + err = platform->probe(dev); + if (err) { + if (err == -EPROBE_DEFER) + nvgpu_info(g, "platform probe failed"); + else + nvgpu_err(g, "platform probe failed"); + return err; + } + + nvgpu_init_mm_vars(g); + + /* platform probe can defer do user init only if probe succeeds */ + err = gk20a_user_init(dev, interface_name, class); + if (err) + return err; + + if (platform->late_probe) { + err = platform->late_probe(dev); + if (err) { + nvgpu_err(g, "late probe failed"); + return err; + } + } + + nvgpu_create_sysfs(dev); + gk20a_debug_init(g, debugfs_symlink); + + g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); + if (!g->dbg_regops_tmp_buf) { + nvgpu_err(g, "couldn't allocate regops tmp buf"); + return -ENOMEM; + } + g->dbg_regops_tmp_buf_ops = + SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + + g->remove_support = gk20a_remove_support; + + nvgpu_ref_init(&g->refcount); + + return 0; +} + +/** + * cyclic_delta - Returns delta of cyclic integers a and b. + * + * @a - First integer + * @b - Second integer + * + * Note: if a is ahead of b, delta is positive. + */ +static int cyclic_delta(int a, int b) +{ + return a - b; +} + +/** + * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete + * + * @g - The GPU to wait on. + * + * Waits until all interrupt handlers that have been scheduled to run have + * completed. + */ +void nvgpu_wait_for_deferred_interrupts(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count); + int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count); + + /* wait until all stalling irqs are handled */ + NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq, + cyclic_delta(stall_irq_threshold, + atomic_read(&l->sw_irq_stall_last_handled)) + <= 0, 0); + + /* wait until all non-stalling irqs are handled */ + NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq, + cyclic_delta(nonstall_irq_threshold, + atomic_read(&l->sw_irq_nonstall_last_handled)) + <= 0, 0); +} + +static void nvgpu_free_gk20a(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + kfree(l); +} + +void nvgpu_init_gk20a(struct gk20a *g) +{ + g->free = nvgpu_free_gk20a; +} diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.h b/drivers/gpu/nvgpu/os/linux/driver_common.h new file mode 100644 index 00000000..6f42f775 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/driver_common.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVGPU_LINUX_DRIVER_COMMON +#define NVGPU_LINUX_DRIVER_COMMON + +void nvgpu_init_gk20a(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/dt.c b/drivers/gpu/nvgpu/os/linux/dt.c new file mode 100644 index 00000000..88e391e3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dt.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "os_linux.h" + +int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name, + u32 index, u32 *value) +{ + struct device *dev = dev_from_gk20a(g); + struct device_node *np = dev->of_node; + + return of_property_read_u32_index(np, name, index, value); +} diff --git a/drivers/gpu/nvgpu/os/linux/firmware.c b/drivers/gpu/nvgpu/os/linux/firmware.c new file mode 100644 index 00000000..9a4dc653 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/firmware.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "os_linux.h" + +static const struct firmware *do_request_firmware(struct device *dev, + const char *prefix, const char *fw_name, int flags) +{ + const struct firmware *fw; + char *fw_path = NULL; + int path_len, err; + + if (prefix) { + path_len = strlen(prefix) + strlen(fw_name); + path_len += 2; /* for the path separator and zero terminator*/ + + fw_path = nvgpu_kzalloc(get_gk20a(dev), + sizeof(*fw_path) * path_len); + if (!fw_path) + return NULL; + + sprintf(fw_path, "%s/%s", prefix, fw_name); + fw_name = fw_path; + } + + if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN) + err = request_firmware_direct(&fw, fw_name, dev); + else + err = request_firmware(&fw, fw_name, dev); + + nvgpu_kfree(get_gk20a(dev), fw_path); + if (err) + return NULL; + return fw; +} + +/* This is a simple wrapper around request_firmware that takes 'fw_name' and + * applies an IP specific relative path prefix to it. The caller is + * responsible for calling nvgpu_release_firmware later. */ +struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g, + const char *fw_name, + int flags) +{ + struct device *dev = dev_from_gk20a(g); + struct nvgpu_firmware *fw; + const struct firmware *linux_fw; + + /* current->fs is NULL when calling from SYS_EXIT. + Add a check here to prevent crash in request_firmware */ + if (!current->fs || !fw_name) + return NULL; + + fw = nvgpu_kzalloc(g, sizeof(*fw)); + if (!fw) + return NULL; + + linux_fw = do_request_firmware(dev, g->name, fw_name, flags); + +#ifdef CONFIG_TEGRA_GK20A + /* TO BE REMOVED - Support loading from legacy SOC specific path. */ + if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) { + struct gk20a_platform *platform = gk20a_get_platform(dev); + linux_fw = do_request_firmware(dev, + platform->soc_name, fw_name, flags); + } +#endif + + if (!linux_fw) + goto err; + + fw->data = nvgpu_kmalloc(g, linux_fw->size); + if (!fw->data) + goto err_release; + + memcpy(fw->data, linux_fw->data, linux_fw->size); + fw->size = linux_fw->size; + + release_firmware(linux_fw); + + return fw; + +err_release: + release_firmware(linux_fw); +err: + nvgpu_kfree(g, fw); + return NULL; +} + +void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw) +{ + if(!fw) + return; + + nvgpu_kfree(g, fw->data); + nvgpu_kfree(g, fw); +} diff --git a/drivers/gpu/nvgpu/os/linux/fuse.c b/drivers/gpu/nvgpu/os/linux/fuse.c new file mode 100644 index 00000000..27851f92 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/fuse.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include + +int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g) +{ + return tegra_sku_info.gpu_speedo_id; +} + +/* + * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100 + * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100 + */ +void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val) +{ + tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0); +} + +void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val) +{ + tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0); +} + +void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val) +{ + tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0); +} + +void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val) +{ + tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0); +} + +int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val) +{ + return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val); +} + +int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val) +{ + return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val); +} diff --git a/drivers/gpu/nvgpu/os/linux/intr.c b/drivers/gpu/nvgpu/os/linux/intr.c new file mode 100644 index 00000000..7ffc7e87 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/intr.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mc_gk20a.h" + +#include +#include +#include "os_linux.h" + +irqreturn_t nvgpu_intr_stall(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 mc_intr_0; + + trace_mc_gk20a_intr_stall(g->name); + + if (!g->power_on) + return IRQ_NONE; + + /* not from gpu when sharing irq with others */ + mc_intr_0 = g->ops.mc.intr_stall(g); + if (unlikely(!mc_intr_0)) + return IRQ_NONE; + + g->ops.mc.intr_stall_pause(g); + + atomic_inc(&l->hw_irq_stall_count); + + trace_mc_gk20a_intr_stall_done(g->name); + + return IRQ_WAKE_THREAD; +} + +irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int hw_irq_count; + + nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched"); + + trace_mc_gk20a_intr_thread_stall(g->name); + + hw_irq_count = atomic_read(&l->hw_irq_stall_count); + g->ops.mc.isr_stall(g); + g->ops.mc.intr_stall_resume(g); + /* sync handled irq counter before re-enabling interrupts */ + atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count); + + nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq); + + trace_mc_gk20a_intr_thread_stall_done(g->name); + + return IRQ_HANDLED; +} + +irqreturn_t nvgpu_intr_nonstall(struct gk20a *g) +{ + u32 non_stall_intr_val; + u32 hw_irq_count; + int ops_old, ops_new, ops = 0; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (!g->power_on) + return IRQ_NONE; + + /* not from gpu when sharing irq with others */ + non_stall_intr_val = g->ops.mc.intr_nonstall(g); + if (unlikely(!non_stall_intr_val)) + return IRQ_NONE; + + g->ops.mc.intr_nonstall_pause(g); + + ops = g->ops.mc.isr_nonstall(g); + if (ops) { + do { + ops_old = atomic_read(&l->nonstall_ops); + ops_new = ops_old | ops; + } while (ops_old != atomic_cmpxchg(&l->nonstall_ops, + ops_old, ops_new)); + + queue_work(l->nonstall_work_queue, &l->nonstall_fn_work); + } + + hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count); + + /* sync handled irq counter before re-enabling interrupts */ + atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count); + + g->ops.mc.intr_nonstall_resume(g); + + nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq); + + return IRQ_HANDLED; +} + +void nvgpu_intr_nonstall_cb(struct work_struct *work) +{ + struct nvgpu_os_linux *l = + container_of(work, struct nvgpu_os_linux, nonstall_fn_work); + struct gk20a *g = &l->g; + + do { + u32 ops; + + ops = atomic_xchg(&l->nonstall_ops, 0); + mc_gk20a_handle_intr_nonstall(g, ops); + } while (atomic_read(&l->nonstall_ops) != 0); +} diff --git a/drivers/gpu/nvgpu/os/linux/intr.h b/drivers/gpu/nvgpu/os/linux/intr.h new file mode 100644 index 00000000..d43cdccb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/intr.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __NVGPU_LINUX_INTR_H__ +#define __NVGPU_LINUX_INTR_H__ +struct gk20a; + +irqreturn_t nvgpu_intr_stall(struct gk20a *g); +irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g); +irqreturn_t nvgpu_intr_nonstall(struct gk20a *g); +void nvgpu_intr_nonstall_cb(struct work_struct *work); +#endif diff --git a/drivers/gpu/nvgpu/os/linux/io.c b/drivers/gpu/nvgpu/os/linux/io.c new file mode 100644 index 00000000..c06512a5 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/io.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include + +#include "os_linux.h" +#include "gk20a/gk20a.h" + +void nvgpu_writel(struct gk20a *g, u32 r, u32 v) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (unlikely(!l->regs)) { + __gk20a_warn_on_no_regs(); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + writel_relaxed(v, l->regs + r); + nvgpu_wmb(); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } +} + +u32 nvgpu_readl(struct gk20a *g, u32 r) +{ + u32 v = __nvgpu_readl(g, r); + + if (v == 0xffffffff) + __nvgpu_check_gpu_state(g); + + return v; +} + +u32 __nvgpu_readl(struct gk20a *g, u32 r) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 v = 0xffffffff; + + if (unlikely(!l->regs)) { + __gk20a_warn_on_no_regs(); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + v = readl(l->regs + r); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } + + return v; +} + +void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (unlikely(!l->regs)) { + __gk20a_warn_on_no_regs(); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); + } else { + nvgpu_wmb(); + do { + writel_relaxed(v, l->regs + r); + } while (readl(l->regs + r) != v); + nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); + } +} + +void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (unlikely(!l->bar1)) { + __gk20a_warn_on_no_regs(); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); + } else { + nvgpu_wmb(); + writel_relaxed(v, l->bar1 + b); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); + } +} + +u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 v = 0xffffffff; + + if (unlikely(!l->bar1)) { + __gk20a_warn_on_no_regs(); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); + } else { + v = readl(l->bar1 + b); + nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); + } + + return v; +} + +bool nvgpu_io_exists(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + return l->regs != NULL; +} + +bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + return r < resource_size(l->regs); +} diff --git a/drivers/gpu/nvgpu/os/linux/io_usermode.c b/drivers/gpu/nvgpu/os/linux/io_usermode.c new file mode 100644 index 00000000..ce7c9e75 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/io_usermode.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include + +#include "os_linux.h" +#include "gk20a/gk20a.h" + +#include + +void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r()); + + writel_relaxed(v, reg); + nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v); +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.c b/drivers/gpu/nvgpu/os/linux/ioctl.c new file mode 100644 index 00000000..359e5103 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl.c @@ -0,0 +1,296 @@ +/* + * NVGPU IOCTLs + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" + +#include "ioctl_channel.h" +#include "ioctl_ctrl.h" +#include "ioctl_as.h" +#include "ioctl_tsg.h" +#include "ioctl_dbg.h" +#include "module.h" +#include "os_linux.h" +#include "ctxsw_trace.h" +#include "platform_gk20a.h" + +#define GK20A_NUM_CDEVS 7 + +const struct file_operations gk20a_channel_ops = { + .owner = THIS_MODULE, + .release = gk20a_channel_release, + .open = gk20a_channel_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_channel_ioctl, +#endif + .unlocked_ioctl = gk20a_channel_ioctl, +}; + +static const struct file_operations gk20a_ctrl_ops = { + .owner = THIS_MODULE, + .release = gk20a_ctrl_dev_release, + .open = gk20a_ctrl_dev_open, + .unlocked_ioctl = gk20a_ctrl_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_ctrl_dev_ioctl, +#endif +}; + +static const struct file_operations gk20a_dbg_ops = { + .owner = THIS_MODULE, + .release = gk20a_dbg_gpu_dev_release, + .open = gk20a_dbg_gpu_dev_open, + .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, + .poll = gk20a_dbg_gpu_dev_poll, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, +#endif +}; + +static const struct file_operations gk20a_as_ops = { + .owner = THIS_MODULE, + .release = gk20a_as_dev_release, + .open = gk20a_as_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_as_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_as_dev_ioctl, +}; + +/* + * Note: We use a different 'open' to trigger handling of the profiler session. + * Most of the code is shared between them... Though, at some point if the + * code does get too tangled trying to handle each in the same path we can + * separate them cleanly. + */ +static const struct file_operations gk20a_prof_ops = { + .owner = THIS_MODULE, + .release = gk20a_dbg_gpu_dev_release, + .open = gk20a_prof_gpu_dev_open, + .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, +#endif +}; + +static const struct file_operations gk20a_tsg_ops = { + .owner = THIS_MODULE, + .release = nvgpu_ioctl_tsg_dev_release, + .open = nvgpu_ioctl_tsg_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl, +#endif + .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl, +}; + +#ifdef CONFIG_GK20A_CTXSW_TRACE +static const struct file_operations gk20a_ctxsw_ops = { + .owner = THIS_MODULE, + .release = gk20a_ctxsw_dev_release, + .open = gk20a_ctxsw_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_ctxsw_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, + .poll = gk20a_ctxsw_dev_poll, + .read = gk20a_ctxsw_dev_read, + .mmap = gk20a_ctxsw_dev_mmap, +}; +#endif + +static const struct file_operations gk20a_sched_ops = { + .owner = THIS_MODULE, + .release = gk20a_sched_dev_release, + .open = gk20a_sched_dev_open, +#ifdef CONFIG_COMPAT + .compat_ioctl = gk20a_sched_dev_ioctl, +#endif + .unlocked_ioctl = gk20a_sched_dev_ioctl, + .poll = gk20a_sched_dev_poll, + .read = gk20a_sched_dev_read, +}; + +static int gk20a_create_device( + struct device *dev, int devno, + const char *interface_name, const char *cdev_name, + struct cdev *cdev, struct device **out, + const struct file_operations *ops, + struct class *class) +{ + struct device *subdev; + int err; + struct gk20a *g = gk20a_from_dev(dev); + + nvgpu_log_fn(g, " "); + + cdev_init(cdev, ops); + cdev->owner = THIS_MODULE; + + err = cdev_add(cdev, devno, 1); + if (err) { + dev_err(dev, "failed to add %s cdev\n", cdev_name); + return err; + } + + subdev = device_create(class, NULL, devno, NULL, + interface_name, cdev_name); + + if (IS_ERR(subdev)) { + err = PTR_ERR(dev); + cdev_del(cdev); + dev_err(dev, "failed to create %s device for %s\n", + cdev_name, dev_name(dev)); + return err; + } + + *out = subdev; + return 0; +} + +void gk20a_user_deinit(struct device *dev, struct class *class) +{ + struct gk20a *g = gk20a_from_dev(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (l->channel.node) { + device_destroy(class, l->channel.cdev.dev); + cdev_del(&l->channel.cdev); + } + + if (l->as_dev.node) { + device_destroy(class, l->as_dev.cdev.dev); + cdev_del(&l->as_dev.cdev); + } + + if (l->ctrl.node) { + device_destroy(class, l->ctrl.cdev.dev); + cdev_del(&l->ctrl.cdev); + } + + if (l->dbg.node) { + device_destroy(class, l->dbg.cdev.dev); + cdev_del(&l->dbg.cdev); + } + + if (l->prof.node) { + device_destroy(class, l->prof.cdev.dev); + cdev_del(&l->prof.cdev); + } + + if (l->tsg.node) { + device_destroy(class, l->tsg.cdev.dev); + cdev_del(&l->tsg.cdev); + } + + if (l->ctxsw.node) { + device_destroy(class, l->ctxsw.cdev.dev); + cdev_del(&l->ctxsw.cdev); + } + + if (l->sched.node) { + device_destroy(class, l->sched.cdev.dev); + cdev_del(&l->sched.cdev); + } + + if (l->cdev_region) + unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS); +} + +int gk20a_user_init(struct device *dev, const char *interface_name, + struct class *class) +{ + int err; + dev_t devno; + struct gk20a *g = gk20a_from_dev(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev)); + if (err) { + dev_err(dev, "failed to allocate devno\n"); + goto fail; + } + l->cdev_region = devno; + + err = gk20a_create_device(dev, devno++, interface_name, "", + &l->channel.cdev, &l->channel.node, + &gk20a_channel_ops, + class); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, interface_name, "-as", + &l->as_dev.cdev, &l->as_dev.node, + &gk20a_as_ops, + class); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", + &l->ctrl.cdev, &l->ctrl.node, + &gk20a_ctrl_ops, + class); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, interface_name, "-dbg", + &l->dbg.cdev, &l->dbg.node, + &gk20a_dbg_ops, + class); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, interface_name, "-prof", + &l->prof.cdev, &l->prof.node, + &gk20a_prof_ops, + class); + if (err) + goto fail; + + err = gk20a_create_device(dev, devno++, interface_name, "-tsg", + &l->tsg.cdev, &l->tsg.node, + &gk20a_tsg_ops, + class); + if (err) + goto fail; + +#if defined(CONFIG_GK20A_CTXSW_TRACE) + err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", + &l->ctxsw.cdev, &l->ctxsw.node, + &gk20a_ctxsw_ops, + class); + if (err) + goto fail; +#endif + + err = gk20a_create_device(dev, devno++, interface_name, "-sched", + &l->sched.cdev, &l->sched.node, + &gk20a_sched_ops, + class); + if (err) + goto fail; + + return 0; +fail: + gk20a_user_deinit(dev, &nvgpu_class); + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.h b/drivers/gpu/nvgpu/os/linux/ioctl.h new file mode 100644 index 00000000..7bf16711 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef __NVGPU_IOCTL_H__ +#define __NVGPU_IOCTL_H__ + +struct device; +struct class; + +int gk20a_user_init(struct device *dev, const char *interface_name, + struct class *class); +void gk20a_user_deinit(struct device *dev, struct class *class); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c new file mode 100644 index 00000000..47f612cc --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -0,0 +1,423 @@ +/* + * GK20A Address Spaces + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "ioctl_as.h" +#include "os_linux.h" + +static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) +{ + u32 core_flags = 0; + + if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) + core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET; + if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) + core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE; + + return core_flags; +} + +static int gk20a_as_ioctl_bind_channel( + struct gk20a_as_share *as_share, + struct nvgpu_as_bind_channel_args *args) +{ + int err = 0; + struct channel_gk20a *ch; + struct gk20a *g = gk20a_from_vm(as_share->vm); + + nvgpu_log_fn(g, " "); + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) + return -EINVAL; + + if (gk20a_channel_as_bound(ch)) { + err = -EINVAL; + goto out; + } + + /* this will set channel_gk20a->vm */ + err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch); + +out: + gk20a_channel_put(ch); + return err; +} + +static int gk20a_as_ioctl_alloc_space( + struct gk20a_as_share *as_share, + struct nvgpu_as_alloc_space_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + + nvgpu_log_fn(g, " "); + return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size, + &args->o_a.offset, + gk20a_as_translate_as_alloc_space_flags(g, + args->flags)); +} + +static int gk20a_as_ioctl_free_space( + struct gk20a_as_share *as_share, + struct nvgpu_as_free_space_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + + nvgpu_log_fn(g, " "); + return nvgpu_vm_area_free(as_share->vm, args->offset); +} + +static int gk20a_as_ioctl_map_buffer_ex( + struct gk20a_as_share *as_share, + struct nvgpu_as_map_buffer_ex_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + + nvgpu_log_fn(g, " "); + + /* unsupported, direct kind control must be used */ + if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) { + struct gk20a *g = as_share->vm->mm->g; + nvgpu_log_info(g, "Direct kind control must be requested"); + return -EINVAL; + } + + return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, + &args->offset, args->flags, + args->compr_kind, + args->incompr_kind, + args->buffer_offset, + args->mapping_size, + NULL); +} + +static int gk20a_as_ioctl_unmap_buffer( + struct gk20a_as_share *as_share, + struct nvgpu_as_unmap_buffer_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + + nvgpu_log_fn(g, " "); + + nvgpu_vm_unmap(as_share->vm, args->offset, NULL); + + return 0; +} + +static int gk20a_as_ioctl_map_buffer_batch( + struct gk20a_as_share *as_share, + struct nvgpu_as_map_buffer_batch_args *args) +{ + struct gk20a *g = gk20a_from_vm(as_share->vm); + u32 i; + int err = 0; + + struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = + (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) + args->unmaps; + struct nvgpu_as_map_buffer_ex_args __user *user_map_args = + (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) + args->maps; + + struct vm_gk20a_mapping_batch batch; + + nvgpu_log_fn(g, " "); + + if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT || + args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT) + return -EINVAL; + + nvgpu_vm_mapping_batch_start(&batch); + + for (i = 0; i < args->num_unmaps; ++i) { + struct nvgpu_as_unmap_buffer_args unmap_args; + + if (copy_from_user(&unmap_args, &user_unmap_args[i], + sizeof(unmap_args))) { + err = -EFAULT; + break; + } + + nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); + } + + if (err) { + nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); + + args->num_unmaps = i; + args->num_maps = 0; + return err; + } + + for (i = 0; i < args->num_maps; ++i) { + s16 compressible_kind; + s16 incompressible_kind; + + struct nvgpu_as_map_buffer_ex_args map_args; + memset(&map_args, 0, sizeof(map_args)); + + if (copy_from_user(&map_args, &user_map_args[i], + sizeof(map_args))) { + err = -EFAULT; + break; + } + + if (map_args.flags & + NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { + compressible_kind = map_args.compr_kind; + incompressible_kind = map_args.incompr_kind; + } else { + /* direct kind control must be used */ + err = -EINVAL; + break; + } + + err = nvgpu_vm_map_buffer( + as_share->vm, map_args.dmabuf_fd, + &map_args.offset, map_args.flags, + compressible_kind, incompressible_kind, + map_args.buffer_offset, + map_args.mapping_size, + &batch); + if (err) + break; + } + + nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); + + if (err) + args->num_maps = i; + /* note: args->num_unmaps will be unmodified, which is ok + * since all unmaps are done */ + + return err; +} + +static int gk20a_as_ioctl_get_va_regions( + struct gk20a_as_share *as_share, + struct nvgpu_as_get_va_regions_args *args) +{ + unsigned int i; + unsigned int write_entries; + struct nvgpu_as_va_region __user *user_region_ptr; + struct vm_gk20a *vm = as_share->vm; + struct gk20a *g = gk20a_from_vm(vm); + unsigned int page_sizes = gmmu_page_size_kernel; + + nvgpu_log_fn(g, " "); + + if (!vm->big_pages) + page_sizes--; + + write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); + if (write_entries > page_sizes) + write_entries = page_sizes; + + user_region_ptr = + (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; + + for (i = 0; i < write_entries; ++i) { + struct nvgpu_as_va_region region; + struct nvgpu_allocator *vma = vm->vma[i]; + + memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); + + region.page_size = vm->gmmu_page_sizes[i]; + region.offset = nvgpu_alloc_base(vma); + /* No __aeabi_uldivmod() on some platforms... */ + region.pages = (nvgpu_alloc_end(vma) - + nvgpu_alloc_base(vma)) >> ilog2(region.page_size); + + if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) + return -EFAULT; + } + + args->buf_size = + page_sizes * sizeof(struct nvgpu_as_va_region); + + return 0; +} + +static int nvgpu_as_ioctl_get_sync_ro_map( + struct gk20a_as_share *as_share, + struct nvgpu_as_get_sync_ro_map_args *args) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct vm_gk20a *vm = as_share->vm; + struct gk20a *g = gk20a_from_vm(vm); + u64 base_gpuva; + u32 sync_size; + int err = 0; + + if (!g->ops.fifo.get_sync_ro_map) + return -EINVAL; + + if (!gk20a_platform_has_syncpoints(g)) + return -EINVAL; + + err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size); + if (err) + return err; + + args->base_gpuva = base_gpuva; + args->sync_size = sync_size; + + return err; +#else + return -EINVAL; +#endif +} + +int gk20a_as_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l; + struct gk20a_as_share *as_share; + struct gk20a *g; + int err; + + l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev); + g = &l->g; + + nvgpu_log_fn(g, " "); + + err = gk20a_as_alloc_share(g, 0, 0, &as_share); + if (err) { + nvgpu_log_fn(g, "failed to alloc share"); + return err; + } + + filp->private_data = as_share; + return 0; +} + +int gk20a_as_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_as_share *as_share = filp->private_data; + + if (!as_share) + return 0; + + return gk20a_as_release_share(as_share); +} + +long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int err = 0; + struct gk20a_as_share *as_share = filp->private_data; + struct gk20a *g = gk20a_from_as(as_share->as); + + u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE]; + + nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + err = gk20a_busy(g); + if (err) + return err; + + switch (cmd) { + case NVGPU_AS_IOCTL_BIND_CHANNEL: + trace_gk20a_as_ioctl_bind_channel(g->name); + err = gk20a_as_ioctl_bind_channel(as_share, + (struct nvgpu_as_bind_channel_args *)buf); + + break; + case NVGPU32_AS_IOCTL_ALLOC_SPACE: + { + struct nvgpu32_as_alloc_space_args *args32 = + (struct nvgpu32_as_alloc_space_args *)buf; + struct nvgpu_as_alloc_space_args args; + + args.pages = args32->pages; + args.page_size = args32->page_size; + args.flags = args32->flags; + args.o_a.offset = args32->o_a.offset; + trace_gk20a_as_ioctl_alloc_space(g->name); + err = gk20a_as_ioctl_alloc_space(as_share, &args); + args32->o_a.offset = args.o_a.offset; + break; + } + case NVGPU_AS_IOCTL_ALLOC_SPACE: + trace_gk20a_as_ioctl_alloc_space(g->name); + err = gk20a_as_ioctl_alloc_space(as_share, + (struct nvgpu_as_alloc_space_args *)buf); + break; + case NVGPU_AS_IOCTL_FREE_SPACE: + trace_gk20a_as_ioctl_free_space(g->name); + err = gk20a_as_ioctl_free_space(as_share, + (struct nvgpu_as_free_space_args *)buf); + break; + case NVGPU_AS_IOCTL_MAP_BUFFER_EX: + trace_gk20a_as_ioctl_map_buffer(g->name); + err = gk20a_as_ioctl_map_buffer_ex(as_share, + (struct nvgpu_as_map_buffer_ex_args *)buf); + break; + case NVGPU_AS_IOCTL_UNMAP_BUFFER: + trace_gk20a_as_ioctl_unmap_buffer(g->name); + err = gk20a_as_ioctl_unmap_buffer(as_share, + (struct nvgpu_as_unmap_buffer_args *)buf); + break; + case NVGPU_AS_IOCTL_GET_VA_REGIONS: + trace_gk20a_as_ioctl_get_va_regions(g->name); + err = gk20a_as_ioctl_get_va_regions(as_share, + (struct nvgpu_as_get_va_regions_args *)buf); + break; + case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: + err = gk20a_as_ioctl_map_buffer_batch(as_share, + (struct nvgpu_as_map_buffer_batch_args *)buf); + break; + case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP: + err = nvgpu_as_ioctl_get_sync_ro_map(as_share, + (struct nvgpu_as_get_sync_ro_map_args *)buf); + break; + default: + err = -ENOTTY; + break; + } + + gk20a_idle(g); + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) + err = -EFAULT; + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.h b/drivers/gpu/nvgpu/os/linux/ioctl_as.h new file mode 100644 index 00000000..b3de3782 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.h @@ -0,0 +1,30 @@ +/* + * GK20A Address Spaces + * + * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef __NVGPU_COMMON_LINUX_AS_H__ +#define __NVGPU_COMMON_LINUX_AS_H__ + +struct inode; +struct file; + +/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and + * num_maps */ +#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256 + +/* struct file_operations driver interface */ +int gk20a_as_dev_open(struct inode *inode, struct file *filp); +int gk20a_as_dev_release(struct inode *inode, struct file *filp); +long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c new file mode 100644 index 00000000..b04bb9de --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -0,0 +1,1388 @@ +/* + * GK20A Graphics channel + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" +#include "gk20a/fence_gk20a.h" + +#include "platform_gk20a.h" +#include "ioctl_channel.h" +#include "channel.h" +#include "os_linux.h" +#include "ctxsw_trace.h" + +/* the minimal size of client buffer */ +#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ + (sizeof(struct gk20a_cs_snapshot_fifo) + \ + sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) + +static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + return "WFI"; + default: + return "?"; + } +} + +static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + return "WFI"; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + return "CTA"; + default: + return "?"; + } +} + +static void gk20a_channel_trace_sched_param( + void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, + const char *graphics_preempt_mode, + const char *compute_preempt_mode), + struct channel_gk20a *ch) +{ + struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); + + if (!tsg) + return; + + (trace)(ch->chid, ch->tsgid, ch->pid, + tsg_gk20a_from_ch(ch)->timeslice_us, + ch->timeout_ms_max, + gk20a_fifo_interleave_level_name(tsg->interleave_level), + gr_gk20a_graphics_preempt_mode_name( + tsg->gr_ctx.graphics_preempt_mode), + gr_gk20a_compute_preempt_mode_name( + tsg->gr_ctx.compute_preempt_mode)); +} + +/* + * Although channels do have pointers back to the gk20a struct that they were + * created under in cases where the driver is killed that pointer can be bad. + * The channel memory can be freed before the release() function for a given + * channel is called. This happens when the driver dies and userspace doesn't + * get a chance to call release() until after the entire gk20a driver data is + * unloaded and freed. + */ +struct channel_priv { + struct gk20a *g; + struct channel_gk20a *c; +}; + +#if defined(CONFIG_GK20A_CYCLE_STATS) + +void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + /* disable existing cyclestats buffer */ + nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); + if (priv->cyclestate_buffer_handler) { + dma_buf_vunmap(priv->cyclestate_buffer_handler, + ch->cyclestate.cyclestate_buffer); + dma_buf_put(priv->cyclestate_buffer_handler); + priv->cyclestate_buffer_handler = NULL; + ch->cyclestate.cyclestate_buffer = NULL; + ch->cyclestate.cyclestate_buffer_size = 0; + } + nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); +} + +static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, + struct nvgpu_cycle_stats_args *args) +{ + struct dma_buf *dmabuf; + void *virtual_address; + struct nvgpu_channel_linux *priv = ch->os_priv; + + /* is it allowed to handle calls for current GPU? */ + if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) + return -ENOSYS; + + if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) { + + /* set up new cyclestats buffer */ + dmabuf = dma_buf_get(args->dmabuf_fd); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + virtual_address = dma_buf_vmap(dmabuf); + if (!virtual_address) + return -ENOMEM; + + priv->cyclestate_buffer_handler = dmabuf; + ch->cyclestate.cyclestate_buffer = virtual_address; + ch->cyclestate.cyclestate_buffer_size = dmabuf->size; + return 0; + + } else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) { + gk20a_channel_free_cycle_stats_buffer(ch); + return 0; + + } else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) { + /* no requst from GL */ + return 0; + + } else { + pr_err("channel already has cyclestats buffer\n"); + return -EINVAL; + } +} + +static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) +{ + int ret; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (ch->cs_client) + ret = gr_gk20a_css_flush(ch, ch->cs_client); + else + ret = -EBADF; + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; +} + +static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, + u32 dmabuf_fd, + u32 perfmon_id_count, + u32 *perfmon_id_start) +{ + int ret = 0; + struct gk20a *g = ch->g; + struct gk20a_cs_snapshot_client_linux *client_linux; + struct gk20a_cs_snapshot_client *client; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (ch->cs_client) { + nvgpu_mutex_release(&ch->cs_client_mutex); + return -EEXIST; + } + + client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); + if (!client_linux) { + ret = -ENOMEM; + goto err; + } + + client_linux->dmabuf_fd = dmabuf_fd; + client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); + if (IS_ERR(client_linux->dma_handler)) { + ret = PTR_ERR(client_linux->dma_handler); + client_linux->dma_handler = NULL; + goto err_free; + } + + client = &client_linux->cs_client; + client->snapshot_size = client_linux->dma_handler->size; + if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { + ret = -ENOMEM; + goto err_put; + } + + client->snapshot = (struct gk20a_cs_snapshot_fifo *) + dma_buf_vmap(client_linux->dma_handler); + if (!client->snapshot) { + ret = -ENOMEM; + goto err_put; + } + + ch->cs_client = client; + + ret = gr_gk20a_css_attach(ch, + perfmon_id_count, + perfmon_id_start, + ch->cs_client); + + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; + +err_put: + dma_buf_put(client_linux->dma_handler); +err_free: + nvgpu_kfree(g, client_linux); +err: + nvgpu_mutex_release(&ch->cs_client_mutex); + return ret; +} + +int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) +{ + int ret; + struct gk20a_cs_snapshot_client_linux *client_linux; + + nvgpu_mutex_acquire(&ch->cs_client_mutex); + if (!ch->cs_client) { + nvgpu_mutex_release(&ch->cs_client_mutex); + return 0; + } + + client_linux = container_of(ch->cs_client, + struct gk20a_cs_snapshot_client_linux, + cs_client); + + ret = gr_gk20a_css_detach(ch, ch->cs_client); + + if (client_linux->dma_handler) { + if (ch->cs_client->snapshot) + dma_buf_vunmap(client_linux->dma_handler, + ch->cs_client->snapshot); + dma_buf_put(client_linux->dma_handler); + } + + ch->cs_client = NULL; + nvgpu_kfree(ch->g, client_linux); + + nvgpu_mutex_release(&ch->cs_client_mutex); + + return ret; +} + +static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, + struct nvgpu_cycle_stats_snapshot_args *args) +{ + int ret; + + /* is it allowed to handle calls for current GPU? */ + if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) + return -ENOSYS; + + if (!args->dmabuf_fd) + return -EINVAL; + + /* handle the command (most frequent cases first) */ + switch (args->cmd) { + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: + ret = gk20a_flush_cycle_stats_snapshot(ch); + args->extra = 0; + break; + + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: + ret = gk20a_attach_cycle_stats_snapshot(ch, + args->dmabuf_fd, + args->extra, + &args->extra); + break; + + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: + ret = gk20a_channel_free_cycle_stats_snapshot(ch); + args->extra = 0; + break; + + default: + pr_err("cyclestats: unknown command %u\n", args->cmd); + ret = -EINVAL; + break; + } + + return ret; +} +#endif + +static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, + struct nvgpu_channel_wdt_args *args) +{ + u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | + NVGPU_IOCTL_CHANNEL_ENABLE_WDT); + + if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) + ch->timeout.enabled = false; + else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) + ch->timeout.enabled = true; + else + return -EINVAL; + + if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) + ch->timeout.limit_ms = args->timeout_ms; + + ch->timeout.debug_dump = (args->wdt_status & + NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; + + return 0; +} + +static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) +{ + struct nvgpu_channel_linux *priv = ch->os_priv; + + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + if (priv->error_notifier.dmabuf) { + dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); + dma_buf_put(priv->error_notifier.dmabuf); + priv->error_notifier.dmabuf = NULL; + priv->error_notifier.notification = NULL; + priv->error_notifier.vaddr = NULL; + } + nvgpu_mutex_release(&priv->error_notifier.mutex); +} + +static int gk20a_init_error_notifier(struct channel_gk20a *ch, + struct nvgpu_set_error_notifier *args) +{ + struct dma_buf *dmabuf; + void *va; + u64 end = args->offset + sizeof(struct nvgpu_notification); + struct nvgpu_channel_linux *priv = ch->os_priv; + + if (!args->mem) { + pr_err("gk20a_init_error_notifier: invalid memory handle\n"); + return -EINVAL; + } + + dmabuf = dma_buf_get(args->mem); + + gk20a_channel_free_error_notifiers(ch); + + if (IS_ERR(dmabuf)) { + pr_err("Invalid handle: %d\n", args->mem); + return -EINVAL; + } + + if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { + dma_buf_put(dmabuf); + nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); + return -EINVAL; + } + + nvgpu_speculation_barrier(); + + /* map handle */ + va = dma_buf_vmap(dmabuf); + if (!va) { + dma_buf_put(dmabuf); + pr_err("Cannot map notifier handle\n"); + return -ENOMEM; + } + + priv->error_notifier.notification = va + args->offset; + priv->error_notifier.vaddr = va; + memset(priv->error_notifier.notification, 0, + sizeof(struct nvgpu_notification)); + + /* set channel notifiers pointer */ + nvgpu_mutex_acquire(&priv->error_notifier.mutex); + priv->error_notifier.dmabuf = dmabuf; + nvgpu_mutex_release(&priv->error_notifier.mutex); + + return 0; +} + +/* + * This returns the channel with a reference. The caller must + * gk20a_channel_put() the ref back after use. + * + * NULL is returned if the channel was not found. + */ +struct channel_gk20a *gk20a_get_channel_from_file(int fd) +{ + struct channel_gk20a *ch; + struct channel_priv *priv; + struct file *f = fget(fd); + + if (!f) + return NULL; + + if (f->f_op != &gk20a_channel_ops) { + fput(f); + return NULL; + } + + priv = (struct channel_priv *)f->private_data; + ch = gk20a_channel_get(priv->c); + fput(f); + return ch; +} + +int gk20a_channel_release(struct inode *inode, struct file *filp) +{ + struct channel_priv *priv = filp->private_data; + struct channel_gk20a *ch; + struct gk20a *g; + + int err; + + /* We could still end up here even if the channel_open failed, e.g. + * if we ran out of hw channel IDs. + */ + if (!priv) + return 0; + + ch = priv->c; + g = priv->g; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to release a channel!"); + goto channel_release; + } + + trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); + + gk20a_channel_close(ch); + gk20a_channel_free_error_notifiers(ch); + + gk20a_idle(g); + +channel_release: + gk20a_put(g); + nvgpu_kfree(g, filp->private_data); + filp->private_data = NULL; + return 0; +} + +/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ +static int __gk20a_channel_open(struct gk20a *g, + struct file *filp, s32 runlist_id) +{ + int err; + struct channel_gk20a *ch; + struct channel_priv *priv; + + nvgpu_log_fn(g, " "); + + g = gk20a_get(g); + if (!g) + return -ENODEV; + + trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) { + err = -ENOMEM; + goto free_ref; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on, %d", err); + goto fail_busy; + } + /* All the user space channel should be non privilege */ + ch = gk20a_open_new_channel(g, runlist_id, false, + nvgpu_current_pid(g), nvgpu_current_tid(g)); + gk20a_idle(g); + if (!ch) { + nvgpu_err(g, + "failed to get f"); + err = -ENOMEM; + goto fail_busy; + } + + gk20a_channel_trace_sched_param( + trace_gk20a_channel_sched_defaults, ch); + + priv->g = g; + priv->c = ch; + + filp->private_data = priv; + return 0; + +fail_busy: + nvgpu_kfree(g, priv); +free_ref: + gk20a_put(g); + return err; +} + +int gk20a_channel_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l = container_of(inode->i_cdev, + struct nvgpu_os_linux, channel.cdev); + struct gk20a *g = &l->g; + int ret; + + nvgpu_log_fn(g, "start"); + ret = __gk20a_channel_open(g, filp, -1); + + nvgpu_log_fn(g, "end"); + return ret; +} + +int gk20a_channel_open_ioctl(struct gk20a *g, + struct nvgpu_channel_open_args *args) +{ + int err; + int fd; + struct file *file; + char name[64]; + s32 runlist_id = args->in.runlist_id; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + snprintf(name, sizeof(name), "nvhost-%s-fd%d", + dev_name(dev_from_gk20a(g)), fd); + + file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + + err = __gk20a_channel_open(g, file, runlist_id); + if (err) + goto clean_up_file; + + fd_install(fd, file); + args->out.channel_fd = fd; + return 0; + +clean_up_file: + fput(file); +clean_up: + put_unused_fd(fd); + return err; +} + +static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) + flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR; + + if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) + flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC; + + if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) + flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE; + + return flags; +} + +static void nvgpu_get_gpfifo_ex_args( + struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, + struct nvgpu_gpfifo_args *gpfifo_args) +{ + gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries; + gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs; + gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( + alloc_gpfifo_ex_args->flags); +} + +static void nvgpu_get_gpfifo_args( + struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, + struct nvgpu_gpfifo_args *gpfifo_args) +{ + /* + * Kernel can insert one extra gpfifo entry before user + * submitted gpfifos and another one after, for internal usage. + * Triple the requested size. + */ + gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3; + gpfifo_args->num_inflight_jobs = 0; + gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( + alloc_gpfifo_args->flags); +} + +static void nvgpu_get_fence_args( + struct nvgpu_fence *fence_args_in, + struct nvgpu_channel_fence *fence_args_out) +{ + fence_args_out->id = fence_args_in->id; + fence_args_out->value = fence_args_in->value; +} + +static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, + ulong id, u32 offset, + u32 payload, u32 timeout) +{ + struct dma_buf *dmabuf; + void *data; + u32 *semaphore; + int ret = 0; + + /* do not wait if channel has timed out */ + if (ch->has_timedout) + return -ETIMEDOUT; + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); + return -EINVAL; + } + + data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); + if (!data) { + nvgpu_err(ch->g, "failed to map notifier memory"); + ret = -EINVAL; + goto cleanup_put; + } + + semaphore = data + (offset & ~PAGE_MASK); + + ret = NVGPU_COND_WAIT_INTERRUPTIBLE( + &ch->semaphore_wq, + *semaphore == payload || ch->has_timedout, + timeout); + + dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); +cleanup_put: + dma_buf_put(dmabuf); + return ret; +} + +static int gk20a_channel_wait(struct channel_gk20a *ch, + struct nvgpu_wait_args *args) +{ + struct dma_buf *dmabuf; + struct gk20a *g = ch->g; + struct notification *notif; + struct timespec tv; + u64 jiffies; + ulong id; + u32 offset; + int remain, ret = 0; + u64 end; + + nvgpu_log_fn(g, " "); + + if (ch->has_timedout) + return -ETIMEDOUT; + + switch (args->type) { + case NVGPU_WAIT_TYPE_NOTIFIER: + id = args->condition.notifier.dmabuf_fd; + offset = args->condition.notifier.offset; + end = offset + sizeof(struct notification); + + dmabuf = dma_buf_get(id); + if (IS_ERR(dmabuf)) { + nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", + id); + return -EINVAL; + } + + if (end > dmabuf->size || end < sizeof(struct notification)) { + dma_buf_put(dmabuf); + nvgpu_err(g, "invalid notifier offset"); + return -EINVAL; + } + + nvgpu_speculation_barrier(); + + notif = dma_buf_vmap(dmabuf); + if (!notif) { + nvgpu_err(g, "failed to map notifier memory"); + return -ENOMEM; + } + + notif = (struct notification *)((uintptr_t)notif + offset); + + /* user should set status pending before + * calling this ioctl */ + remain = NVGPU_COND_WAIT_INTERRUPTIBLE( + &ch->notifier_wq, + notif->status == 0 || ch->has_timedout, + args->timeout); + + if (remain == 0 && notif->status != 0) { + ret = -ETIMEDOUT; + goto notif_clean_up; + } else if (remain < 0) { + ret = -EINTR; + goto notif_clean_up; + } + + /* TBD: fill in correct information */ + jiffies = get_jiffies_64(); + jiffies_to_timespec(jiffies, &tv); + notif->timestamp.nanoseconds[0] = tv.tv_nsec; + notif->timestamp.nanoseconds[1] = tv.tv_sec; + notif->info32 = 0xDEADBEEF; /* should be object name */ + notif->info16 = ch->chid; /* should be method offset */ + +notif_clean_up: + dma_buf_vunmap(dmabuf, notif); + return ret; + + case NVGPU_WAIT_TYPE_SEMAPHORE: + ret = gk20a_channel_wait_semaphore(ch, + args->condition.semaphore.dmabuf_fd, + args->condition.semaphore.offset, + args->condition.semaphore.payload, + args->timeout); + + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, + struct nvgpu_zcull_bind_args *args) +{ + struct gk20a *g = ch->g; + struct gr_gk20a *gr = &g->gr; + + nvgpu_log_fn(gr->g, " "); + + return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, + args->gpu_va, args->mode); +} + +static int gk20a_ioctl_channel_submit_gpfifo( + struct channel_gk20a *ch, + struct nvgpu_submit_gpfifo_args *args) +{ + struct nvgpu_channel_fence fence; + struct gk20a_fence *fence_out; + struct fifo_profile_gk20a *profile = NULL; + u32 submit_flags = 0; + int fd = -1; + struct gk20a *g = ch->g; + + int ret = 0; + nvgpu_log_fn(g, " "); + + profile = gk20a_fifo_profile_acquire(ch->g); + gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); + + if (ch->has_timedout) + return -ETIMEDOUT; + + nvgpu_get_fence_args(&args->fence, &fence); + submit_flags = + nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); + + /* Try and allocate an fd here*/ + if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) + && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; + } + + ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, + submit_flags, &fence, + &fence_out, profile); + + if (ret) { + if (fd != -1) + put_unused_fd(fd); + goto clean_up; + } + + /* Convert fence_out to something we can pass back to user space. */ + if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { + if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { + ret = gk20a_fence_install_fd(fence_out, fd); + if (ret) + put_unused_fd(fd); + else + args->fence.id = fd; + } else { + args->fence.id = fence_out->syncpt_id; + args->fence.value = fence_out->syncpt_value; + } + } + gk20a_fence_put(fence_out); + + gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); + if (profile) + gk20a_fifo_profile_release(ch->g, profile); + +clean_up: + return ret; +} + +/* + * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* + * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* + */ +u32 nvgpu_get_common_runlist_level(u32 level) +{ + switch (level) { + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: + return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; + default: + pr_err("%s: incorrect runlist level\n", __func__); + } + + return level; +} + +static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) +{ + u32 flags = 0; + + if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) + flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; + + if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) + flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; + + return flags; +} + +static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, + u32 class_num, u32 user_flags) +{ + return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, + nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); +} + +/* + * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) +{ + u32 flags = 0; + + if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) + flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; + if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) + flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; + + return flags; +} + +/* + * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) +{ + u32 flags = 0; + + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; + if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) + flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; + + return flags; +} + +/* + * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: + return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; + case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: + return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; + } + + return graphics_preempt_mode; +} + +/* + * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + */ +u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: + return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; + case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: + return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; + case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: + return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; + } + + return compute_preempt_mode; +} + +/* + * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* + * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* + */ +static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) +{ + switch (graphics_preempt_mode) { + case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: + return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; + case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: + return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } + + return graphics_preempt_mode; +} + +/* + * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* + * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* + */ +static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) +{ + switch (compute_preempt_mode) { + case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: + return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; + case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: + return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; + case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: + return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } + + return compute_preempt_mode; +} + +static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, + u32 graphics_preempt_mode, u32 compute_preempt_mode) +{ + int err; + + if (ch->g->ops.gr.set_preemption_mode) { + err = gk20a_busy(ch->g); + if (err) { + nvgpu_err(ch->g, "failed to power on, %d", err); + return err; + } + err = ch->g->ops.gr.set_preemption_mode(ch, + nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), + nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); + gk20a_idle(ch->g); + } else { + err = -EINVAL; + } + + return err; +} + +static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, + struct nvgpu_get_user_syncpoint_args *args) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a *g = ch->g; + int err; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { + nvgpu_err(g, "user syncpoints not supported"); + return -EINVAL; + } + + if (!gk20a_platform_has_syncpoints(g)) { + nvgpu_err(g, "syncpoints not supported"); + return -EINVAL; + } + + if (g->aggressive_sync_destroy_thresh) { + nvgpu_err(g, "sufficient syncpoints not available"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->user_sync) { + nvgpu_mutex_release(&ch->sync_lock); + } else { + ch->user_sync = gk20a_channel_sync_create(ch, true); + if (!ch->user_sync) { + nvgpu_mutex_release(&ch->sync_lock); + return -ENOMEM; + } + nvgpu_mutex_release(&ch->sync_lock); + + if (g->ops.fifo.resetup_ramfc) { + err = g->ops.fifo.resetup_ramfc(ch); + if (err) + return err; + } + } + + args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); + args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, + args->syncpoint_id); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) + args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); + else + args->gpu_va = 0; + + return 0; +#else + return -EINVAL; +#endif +} + +long gk20a_channel_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct channel_priv *priv = filp->private_data; + struct channel_gk20a *ch = priv->c; + struct device *dev = dev_from_gk20a(ch->g); + u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; + int err = 0; + struct gk20a *g = ch->g; + + nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) + return -EINVAL; + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + /* take a ref or return timeout if channel refs can't be taken */ + ch = gk20a_channel_get(ch); + if (!ch) + return -ETIMEDOUT; + + /* protect our sanity for threaded userspace - most of the channel is + * not thread safe */ + nvgpu_mutex_acquire(&ch->ioctl_lock); + + /* this ioctl call keeps a ref to the file which keeps a ref to the + * channel */ + + switch (cmd) { + case NVGPU_IOCTL_CHANNEL_OPEN: + err = gk20a_channel_open_ioctl(ch->g, + (struct nvgpu_channel_open_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: + break; + case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: + { + struct nvgpu_alloc_obj_ctx_args *args = + (struct nvgpu_alloc_obj_ctx_args *)buf; + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: + { + struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = + (struct nvgpu_alloc_gpfifo_ex_args *)buf; + struct nvgpu_gpfifo_args gpfifo_args; + + nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args); + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { + err = -EINVAL; + gk20a_idle(ch->g); + break; + } + err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: + { + struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = + (struct nvgpu_alloc_gpfifo_args *)buf; + struct nvgpu_gpfifo_args gpfifo_args; + + nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args); + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); + gk20a_idle(ch->g); + break; + } + case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: + err = gk20a_ioctl_channel_submit_gpfifo(ch, + (struct nvgpu_submit_gpfifo_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_WAIT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + + /* waiting is thread-safe, not dropping this mutex could + * deadlock in certain conditions */ + nvgpu_mutex_release(&ch->ioctl_lock); + + err = gk20a_channel_wait(ch, + (struct nvgpu_wait_args *)buf); + + nvgpu_mutex_acquire(&ch->ioctl_lock); + + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_zcull_bind(ch, + (struct nvgpu_zcull_bind_args *)buf); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_init_error_notifier(ch, + (struct nvgpu_set_error_notifier *)buf); + gk20a_idle(ch->g); + break; +#ifdef CONFIG_GK20A_CYCLE_STATS + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_cycle_stats(ch, + (struct nvgpu_cycle_stats_args *)buf); + gk20a_idle(ch->g); + break; +#endif + case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: + { + u32 timeout = + (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; + nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->chid); + ch->timeout_ms_max = timeout; + gk20a_channel_trace_sched_param( + trace_gk20a_channel_set_timeout, ch); + break; + } + case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: + { + u32 timeout = + (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; + bool timeout_debug_dump = !((u32) + ((struct nvgpu_set_timeout_ex_args *)buf)->flags & + (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); + nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", + timeout, ch->chid); + ch->timeout_ms_max = timeout; + ch->timeout_debug_dump = timeout_debug_dump; + gk20a_channel_trace_sched_param( + trace_gk20a_channel_set_timeout, ch); + break; + } + case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: + ((struct nvgpu_get_param_args *)buf)->value = + ch->has_timedout; + break; + case NVGPU_IOCTL_CHANNEL_ENABLE: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + if (ch->g->ops.fifo.enable_channel) + ch->g->ops.fifo.enable_channel(ch); + else + err = -ENOSYS; + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_DISABLE: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + if (ch->g->ops.fifo.disable_channel) + ch->g->ops.fifo.disable_channel(ch); + else + err = -ENOSYS; + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_PREEMPT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_fifo_preempt(ch->g, ch); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: + if (!capable(CAP_SYS_NICE)) { + err = -EPERM; + break; + } + if (!ch->g->ops.fifo.reschedule_runlist) { + err = -ENOSYS; + break; + } + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.reschedule_runlist(ch, + NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & + ((struct nvgpu_reschedule_runlist_args *)buf)->flags); + gk20a_idle(ch->g); + break; + case NVGPU_IOCTL_CHANNEL_FORCE_RESET: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.fifo.force_reset_ch(ch, + NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); + gk20a_idle(ch->g); + break; +#ifdef CONFIG_GK20A_CYCLE_STATS + case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_cycle_stats_snapshot(ch, + (struct nvgpu_cycle_stats_snapshot_args *)buf); + gk20a_idle(ch->g); + break; +#endif + case NVGPU_IOCTL_CHANNEL_WDT: + err = gk20a_channel_set_wdt_status(ch, + (struct nvgpu_channel_wdt_args *)buf); + break; + case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: + err = nvgpu_ioctl_channel_set_preemption_mode(ch, + ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, + ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); + break; + case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: + if (ch->g->ops.gr.set_boosted_ctx) { + bool boost = + ((struct nvgpu_boosted_ctx_args *)buf)->boost; + + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.gr.set_boosted_ctx(ch, boost); + gk20a_idle(ch->g); + } else { + err = -EINVAL; + } + break; + case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = nvgpu_ioctl_channel_get_user_syncpoint(ch, + (struct nvgpu_get_user_syncpoint_args *)buf); + gk20a_idle(ch->g); + break; + default: + dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + nvgpu_mutex_release(&ch->ioctl_lock); + + gk20a_channel_put(ch); + + nvgpu_log_fn(g, "end"); + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.h b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h new file mode 100644 index 00000000..48cff1ea --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef __NVGPU_IOCTL_CHANNEL_H__ +#define __NVGPU_IOCTL_CHANNEL_H__ + +#include + +#include "gk20a/css_gr_gk20a.h" + +struct inode; +struct file; +struct gk20a; +struct nvgpu_channel_open_args; + +struct gk20a_cs_snapshot_client_linux { + struct gk20a_cs_snapshot_client cs_client; + + u32 dmabuf_fd; + struct dma_buf *dma_handler; +}; + +int gk20a_channel_open(struct inode *inode, struct file *filp); +int gk20a_channel_release(struct inode *inode, struct file *filp); +long gk20a_channel_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); +int gk20a_channel_open_ioctl(struct gk20a *g, + struct nvgpu_channel_open_args *args); + +int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); +void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); + +extern const struct file_operations gk20a_channel_ops; + +u32 nvgpu_get_common_runlist_level(u32 level); + +u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags); +u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags); +u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode); +u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode); +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c new file mode 100644 index 00000000..501b5f93 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -0,0 +1,562 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_DEBUG_FS +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "clk/clk.h" +#include "pstate/pstate.h" +#include "lpwr/lpwr.h" +#include "volt/volt.h" + +#ifdef CONFIG_DEBUG_FS +#include "os_linux.h" +#endif + +static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, + struct file *filp) +{ + struct nvgpu_clk_dev *dev = filp->private_data; + struct nvgpu_clk_session *session = dev->session; + + + clk_arb_dbg(session->g, " "); + + nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + return 0; +} + +static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask) +{ + unsigned int poll_mask = 0; + + if (nvgpu_poll_mask & NVGPU_POLLIN) + poll_mask |= POLLIN; + if (nvgpu_poll_mask & NVGPU_POLLPRI) + poll_mask |= POLLPRI; + if (nvgpu_poll_mask & NVGPU_POLLOUT) + poll_mask |= POLLOUT; + if (nvgpu_poll_mask & NVGPU_POLLRDNORM) + poll_mask |= POLLRDNORM; + if (nvgpu_poll_mask & NVGPU_POLLHUP) + poll_mask |= POLLHUP; + + return poll_mask; +} + +static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) +{ + struct nvgpu_clk_dev *dev = filp->private_data; + + clk_arb_dbg(dev->session->g, " "); + + poll_wait(filp, &dev->readout_wq.wq, wait); + return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0)); +} + +void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) +{ + nvgpu_cond_broadcast_interruptible(&dev->readout_wq); +} + +static int nvgpu_clk_arb_release_event_dev(struct inode *inode, + struct file *filp) +{ + struct nvgpu_clk_dev *dev = filp->private_data; + struct nvgpu_clk_session *session = dev->session; + struct nvgpu_clk_arb *arb; + + arb = session->g->clk_arb; + + clk_arb_dbg(session->g, " "); + + if (arb) { + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_del(&dev->link); + nvgpu_spinlock_release(&arb->users_lock); + nvgpu_clk_notification_queue_free(arb->g, &dev->queue); + } + + nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + + return 0; +} + +static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event) +{ + u32 nvgpu_gpu_event; + + switch (nvgpu_event) { + case NVGPU_EVENT_VF_UPDATE: + nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE; + break; + case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE; + break; + case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE; + break; + case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED; + break; + case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED; + break; + case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD; + break; + case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD; + break; + case NVGPU_EVENT_ALARM_GPU_LOST: + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST; + break; + default: + /* Control shouldn't come here */ + nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1; + break; + } + return nvgpu_gpu_event; +} + +static inline u32 __pending_event(struct nvgpu_clk_dev *dev, + struct nvgpu_gpu_event_info *info) { + + u32 tail, head; + u32 events = 0; + struct nvgpu_clk_notification *p_notif; + + tail = nvgpu_atomic_read(&dev->queue.tail); + head = nvgpu_atomic_read(&dev->queue.head); + + head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; + + if (_WRAPGTEQ(tail, head) && info) { + head++; + p_notif = &dev->queue.notifications[head % dev->queue.size]; + events |= nvgpu_convert_gpu_event(p_notif->notification); + info->event_id = ffs(events) - 1; + info->timestamp = p_notif->timestamp; + nvgpu_atomic_set(&dev->queue.head, head); + } + + return events; +} + +static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, + size_t size, loff_t *off) +{ + struct nvgpu_clk_dev *dev = filp->private_data; + struct nvgpu_gpu_event_info info; + ssize_t err; + + clk_arb_dbg(dev->session->g, + "filp=%p, buf=%p, size=%zu", filp, buf, size); + + if ((size - *off) < sizeof(info)) + return 0; + + memset(&info, 0, sizeof(info)); + /* Get the oldest event from the queue */ + while (!__pending_event(dev, &info)) { + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, + __pending_event(dev, &info), 0); + if (err) + return err; + if (info.timestamp) + break; + } + + if (copy_to_user(buf + *off, &info, sizeof(info))) + return -EFAULT; + + return sizeof(info); +} + +static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, + struct nvgpu_gpu_set_event_filter_args *args) +{ + struct gk20a *g = dev->session->g; + u32 mask; + + nvgpu_log(g, gpu_dbg_fn, " "); + + if (args->flags) + return -EINVAL; + + if (args->size != 1) + return -EINVAL; + + if (copy_from_user(&mask, (void __user *) args->buffer, + args->size * sizeof(u32))) + return -EFAULT; + + /* update alarm mask */ + nvgpu_atomic_set(&dev->enabled_mask, mask); + + return 0; +} + +static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct nvgpu_clk_dev *dev = filp->private_data; + struct gk20a *g = dev->session->g; + u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) + || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) + return -EINVAL; + + BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + switch (cmd) { + case NVGPU_EVENT_IOCTL_SET_FILTER: + err = nvgpu_clk_arb_set_event_filter(dev, + (struct nvgpu_gpu_set_event_filter_args *)buf); + break; + default: + nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); + + return err; +} + +static const struct file_operations completion_dev_ops = { + .owner = THIS_MODULE, + .release = nvgpu_clk_arb_release_completion_dev, + .poll = nvgpu_clk_arb_poll_dev, +}; + +static const struct file_operations event_dev_ops = { + .owner = THIS_MODULE, + .release = nvgpu_clk_arb_release_event_dev, + .poll = nvgpu_clk_arb_poll_dev, + .read = nvgpu_clk_arb_read_event_dev, +#ifdef CONFIG_COMPAT + .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, +#endif + .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, +}; + +static int nvgpu_clk_arb_install_fd(struct gk20a *g, + struct nvgpu_clk_session *session, + const struct file_operations *fops, + struct nvgpu_clk_dev **_dev) +{ + struct file *file; + int fd; + int err; + int status; + char name[64]; + struct nvgpu_clk_dev *dev; + + clk_arb_dbg(g, " "); + + dev = nvgpu_kzalloc(g, sizeof(*dev)); + if (!dev) + return -ENOMEM; + + status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, + DEFAULT_EVENT_NUMBER); + if (status < 0) { + err = status; + goto fail; + } + + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) { + err = fd; + goto fail; + } + + snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); + file = anon_inode_getfile(name, fops, dev, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto fail_fd; + } + + fd_install(fd, file); + + nvgpu_cond_init(&dev->readout_wq); + + nvgpu_atomic_set(&dev->poll_mask, 0); + + dev->session = session; + nvgpu_ref_init(&dev->refcount); + + nvgpu_ref_get(&session->refcount); + + *_dev = dev; + + return fd; + +fail_fd: + put_unused_fd(fd); +fail: + nvgpu_kfree(g, dev); + + return err; +} + +int nvgpu_clk_arb_install_event_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + struct nvgpu_clk_dev *dev; + int fd; + + clk_arb_dbg(g, " "); + + fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); + if (fd < 0) + return fd; + + /* TODO: alarm mask needs to be set to default value to prevent + * failures of legacy tests. This will be removed when sanity is + * updated + */ + if (alarm_mask) + nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); + else + nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); + + dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); + + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_add_tail(&dev->link, &arb->users); + nvgpu_spinlock_release(&arb->users_lock); + + *event_fd = fd; + + return 0; +} + +int nvgpu_clk_arb_install_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int *request_fd) +{ + struct nvgpu_clk_dev *dev; + int fd; + + clk_arb_dbg(g, " "); + + fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); + if (fd < 0) + return fd; + + *request_fd = fd; + + return 0; +} + +int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int request_fd) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + struct nvgpu_clk_dev *dev; + struct fd fd; + int err = 0; + + clk_arb_dbg(g, " "); + + fd = fdget(request_fd); + if (!fd.file) + return -EINVAL; + + if (fd.file->f_op != &completion_dev_ops) { + err = -EINVAL; + goto fdput_fd; + } + + dev = (struct nvgpu_clk_dev *) fd.file->private_data; + + if (!dev || dev->session != session) { + err = -EINVAL; + goto fdput_fd; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_spinlock_acquire(&session->session_lock); + nvgpu_list_add(&dev->node, &session->targets); + nvgpu_spinlock_release(&session->session_lock); + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + +fdput_fd: + fdput(fd); + return err; +} + +int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, + int request_fd, u32 api_domain, u16 target_mhz) +{ + struct nvgpu_clk_dev *dev; + struct fd fd; + int err = 0; + + clk_arb_dbg(session->g, + "domain=0x%08x target_mhz=%u", api_domain, target_mhz); + + fd = fdget(request_fd); + if (!fd.file) + return -EINVAL; + + if (fd.file->f_op != &completion_dev_ops) { + err = -EINVAL; + goto fdput_fd; + } + + dev = fd.file->private_data; + if (!dev || dev->session != session) { + err = -EINVAL; + goto fdput_fd; + } + + switch (api_domain) { + case NVGPU_CLK_DOMAIN_MCLK: + dev->mclk_target_mhz = target_mhz; + break; + + case NVGPU_CLK_DOMAIN_GPCCLK: + dev->gpc2clk_target_mhz = target_mhz * 2ULL; + break; + + default: + err = -EINVAL; + } + +fdput_fd: + fdput(fd); + return err; +} + +u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) +{ + u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); + u32 api_domains = 0; + + if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) + api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); + + if (clk_domains & CTRL_CLK_DOMAIN_MCLK) + api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); + + return api_domains; +} + +#ifdef CONFIG_DEBUG_FS +static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + struct nvgpu_clk_arb *arb = g->clk_arb; + struct nvgpu_clk_arb_debug *debug; + + u64 num; + s64 tmp, avg, std, max, min; + + debug = NV_ACCESS_ONCE(arb->debug); + /* Make copy of structure and ensure no reordering */ + nvgpu_smp_rmb(); + if (!debug) + return -EINVAL; + + std = debug->switch_std; + avg = debug->switch_avg; + max = debug->switch_max; + min = debug->switch_min; + num = debug->switch_num; + + tmp = std; + do_div(tmp, num); + seq_printf(s, "Number of transitions: %lld\n", + num); + seq_printf(s, "max / min : %lld / %lld usec\n", + max, min); + seq_printf(s, "avg / std : %lld / %ld usec\n", + avg, int_sqrt(tmp)); + + return 0; +} + +static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); +} + +static const struct file_operations nvgpu_clk_arb_stats_fops = { + .open = nvgpu_clk_arb_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +int nvgpu_clk_arb_debugfs_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct dentry *gpu_root = l->debugfs; + struct dentry *d; + + nvgpu_log(g, gpu_dbg_info, "g=%p", g); + + d = debugfs_create_file( + "arb_stats", + S_IRUGO, + gpu_root, + g, + &nvgpu_clk_arb_stats_fops); + if (!d) + return -ENOMEM; + + return 0; +} +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c new file mode 100644 index 00000000..73a8131d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -0,0 +1,1962 @@ +/* + * Copyright (c) 2011-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ioctl_ctrl.h" +#include "ioctl_dbg.h" +#include "ioctl_as.h" +#include "ioctl_tsg.h" +#include "ioctl_channel.h" +#include "gk20a/gk20a.h" +#include "gk20a/fence_gk20a.h" + +#include "platform_gk20a.h" +#include "os_linux.h" +#include "dmabuf.h" +#include "channel.h" + +#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \ + (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) +#define MHZ_TO_HZ(a) ((u64)a * MHZ) + +struct gk20a_ctrl_priv { + struct device *dev; + struct gk20a *g; + struct nvgpu_clk_session *clk_session; +}; + +static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags) +{ + u32 core_flags = 0; + + if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) + core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED; + + return core_flags; +} + +int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l; + struct gk20a *g; + struct gk20a_ctrl_priv *priv; + int err = 0; + + l = container_of(inode->i_cdev, + struct nvgpu_os_linux, ctrl.cdev); + g = gk20a_get(&l->g); + if (!g) + return -ENODEV; + + nvgpu_log_fn(g, " "); + + priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv)); + if (!priv) { + err = -ENOMEM; + goto free_ref; + } + filp->private_data = priv; + priv->dev = dev_from_gk20a(g); + /* + * We dont close the arbiter fd's after driver teardown to support + * GPU_LOST events, so we store g here, instead of dereferencing the + * dev structure on teardown + */ + priv->g = g; + + if (!g->sw_ready) { + err = gk20a_busy(g); + if (err) + goto free_ref; + gk20a_idle(g); + } + + err = nvgpu_clk_arb_init_session(g, &priv->clk_session); +free_ref: + if (err) + gk20a_put(g); + return err; +} +int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_ctrl_priv *priv = filp->private_data; + struct gk20a *g = priv->g; + + nvgpu_log_fn(g, " "); + + if (priv->clk_session) + nvgpu_clk_arb_release_session(g, priv->clk_session); + + gk20a_put(g); + nvgpu_kfree(g, priv); + + return 0; +} + +struct nvgpu_flags_mapping { + u64 ioctl_flag; + int enabled_flag; +}; + +static struct nvgpu_flags_mapping flags_mapping[] = { + {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS, + NVGPU_HAS_SYNCPOINTS}, + {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS, + NVGPU_SUPPORT_PARTIAL_MAPPINGS}, + {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS, + NVGPU_SUPPORT_SPARSE_ALLOCS}, + {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS, + NVGPU_SUPPORT_SYNC_FENCE_FDS}, + {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS, + NVGPU_SUPPORT_CYCLE_STATS}, + {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT, + NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT}, + {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS, + NVGPU_SUPPORT_USERSPACE_MANAGED_AS}, + {NVGPU_GPU_FLAGS_SUPPORT_TSG, + NVGPU_SUPPORT_TSG}, + {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS, + NVGPU_SUPPORT_CLOCK_CONTROLS}, + {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE, + NVGPU_SUPPORT_GET_VOLTAGE}, + {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT, + NVGPU_SUPPORT_GET_CURRENT}, + {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER, + NVGPU_SUPPORT_GET_POWER}, + {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE, + NVGPU_SUPPORT_GET_TEMPERATURE}, + {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT, + NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT}, + {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS, + NVGPU_SUPPORT_DEVICE_EVENTS}, + {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE, + NVGPU_SUPPORT_FECS_CTXSW_TRACE}, + {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, + {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, + NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, + {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, + NVGPU_SUPPORT_DETERMINISTIC_OPTS}, + {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, + NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, + {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, + NVGPU_SUPPORT_USER_SYNCPOINT}, + {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, + NVGPU_SUPPORT_IO_COHERENCE}, + {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, + NVGPU_SUPPORT_RESCHEDULE_RUNLIST}, + {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL, + NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL}, + {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF, + NVGPU_ECC_ENABLED_SM_LRF}, + {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM, + NVGPU_ECC_ENABLED_SM_SHM}, + {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX, + NVGPU_ECC_ENABLED_TEX}, + {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC, + NVGPU_ECC_ENABLED_LTC}, + {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS, + NVGPU_SUPPORT_TSG_SUBCONTEXTS}, + {NVGPU_GPU_FLAGS_SUPPORT_SCG, + NVGPU_SUPPORT_SCG}, + {NVGPU_GPU_FLAGS_SUPPORT_VPR, + NVGPU_SUPPORT_VPR}, +}; + +static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) +{ + unsigned int i; + u64 ioctl_flags = 0; + + for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) { + if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag)) + ioctl_flags |= flags_mapping[i].ioctl_flag; + } + + return ioctl_flags; +} + +static void nvgpu_set_preemption_mode_flags(struct gk20a *g, + struct nvgpu_gpu_characteristics *gpu) +{ + struct nvgpu_preemption_modes_rec preemption_mode_rec; + + g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec); + + gpu->graphics_preemption_mode_flags = + nvgpu_get_ioctl_graphics_preempt_mode_flags( + preemption_mode_rec.graphics_preemption_mode_flags); + gpu->compute_preemption_mode_flags = + nvgpu_get_ioctl_compute_preempt_mode_flags( + preemption_mode_rec.compute_preemption_mode_flags); + + gpu->default_graphics_preempt_mode = + nvgpu_get_ioctl_graphics_preempt_mode( + preemption_mode_rec.default_graphics_preempt_mode); + gpu->default_compute_preempt_mode = + nvgpu_get_ioctl_compute_preempt_mode( + preemption_mode_rec.default_compute_preempt_mode); +} + +static long +gk20a_ctrl_ioctl_gpu_characteristics( + struct gk20a *g, + struct nvgpu_gpu_get_characteristics *request) +{ + struct nvgpu_gpu_characteristics gpu; + long err = 0; + + if (gk20a_busy(g)) { + nvgpu_err(g, "failed to power on gpu"); + return -EINVAL; + } + + memset(&gpu, 0, sizeof(gpu)); + + gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); + gpu.on_board_video_memory_size = 0; /* integrated GPU */ + + gpu.num_gpc = g->gr.gpc_count; + gpu.max_gpc_count = g->gr.max_gpc_count; + + gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; + + gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ + + gpu.compression_page_size = g->ops.fb.compression_page_size(g); + + gpu.gpc_mask = (1 << g->gr.gpc_count)-1; + + gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); + + gpu.arch = g->params.gpu_arch; + gpu.impl = g->params.gpu_impl; + gpu.rev = g->params.gpu_rev; + gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT; + gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ? + NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0; + gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS); + gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS); + gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS); + gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS); + gpu.inline_to_memory_class = + g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS); + gpu.dma_copy_class = + g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); + + gpu.vbios_version = g->bios.vbios_version; + gpu.vbios_oem_version = g->bios.vbios_oem_version; + + gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g); + gpu.pde_coverage_bit_count = + g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0]; + gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g); + + gpu.sm_arch_sm_version = g->params.sm_arch_sm_version; + gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; + gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; + + gpu.max_css_buffer_size = g->gr.max_css_buffer_size; + + gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; + gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; + gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; + gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; + gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; + gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST; + gpu.gpu_va_bit_count = 40; + + strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname)); + gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g); + gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); + gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); + gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); + gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; + gpu.gr_gobs_per_comptagline_per_slice = + g->gr.gobs_per_comptagline_per_slice; + gpu.num_ltc = g->ltc_count; + gpu.lts_per_ltc = g->gr.slices_per_ltc; + gpu.cbc_cache_line_size = g->gr.cacheline_size; + gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; + + if (g->ops.clk.get_maxrate) + gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); + + gpu.local_video_memory_size = g->mm.vidmem.size; + + gpu.pci_vendor_id = g->pci_vendor_id; + gpu.pci_device_id = g->pci_device_id; + gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id; + gpu.pci_subsystem_device_id = g->pci_subsystem_device_id; + gpu.pci_class = g->pci_class; + gpu.pci_revision = g->pci_revision; + + nvgpu_set_preemption_mode_flags(g, &gpu); + + if (request->gpu_characteristics_buf_size > 0) { + size_t write_size = sizeof(gpu); + + if (write_size > request->gpu_characteristics_buf_size) + write_size = request->gpu_characteristics_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + request->gpu_characteristics_buf_addr, + &gpu, write_size); + } + + if (err == 0) + request->gpu_characteristics_buf_size = sizeof(gpu); + + gk20a_idle(g); + + return err; +} + +static int gk20a_ctrl_prepare_compressible_read( + struct gk20a *g, + struct nvgpu_gpu_prepare_compressible_read_args *args) +{ + int ret = -ENOSYS; + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct nvgpu_channel_fence fence; + struct gk20a_fence *fence_out = NULL; + int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( + args->submit_flags); + int fd = -1; + + fence.id = args->fence.syncpt_id; + fence.value = args->fence.syncpt_value; + + /* Try and allocate an fd here*/ + if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) + && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; + } + + ret = gk20a_prepare_compressible_read(l, args->handle, + args->request_compbits, args->offset, + args->compbits_hoffset, args->compbits_voffset, + args->scatterbuffer_offset, + args->width, args->height, args->block_height_log2, + submit_flags, &fence, &args->valid_compbits, + &args->zbc_color, &fence_out); + + if (ret) { + if (fd != -1) + put_unused_fd(fd); + return ret; + } + + /* Convert fence_out to something we can pass back to user space. */ + if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { + if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { + if (fence_out) { + ret = gk20a_fence_install_fd(fence_out, fd); + if (ret) + put_unused_fd(fd); + else + args->fence.fd = fd; + } else { + args->fence.fd = -1; + put_unused_fd(fd); + } + } else { + if (fence_out) { + args->fence.syncpt_id = fence_out->syncpt_id; + args->fence.syncpt_value = + fence_out->syncpt_value; + } else { + args->fence.syncpt_id = -1; + args->fence.syncpt_value = 0; + } + } + } + gk20a_fence_put(fence_out); +#endif + + return ret; +} + +static int gk20a_ctrl_mark_compressible_write( + struct gk20a *g, + struct nvgpu_gpu_mark_compressible_write_args *args) +{ + int ret = -ENOSYS; + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + ret = gk20a_mark_compressible_write(g, args->handle, + args->valid_compbits, args->offset, args->zbc_color); +#endif + + return ret; +} + +static int gk20a_ctrl_alloc_as( + struct gk20a *g, + struct nvgpu_alloc_as_args *args) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_as_share *as_share; + int err; + int fd; + struct file *file; + char name[64]; + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd); + + file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + + err = gk20a_as_alloc_share(g, args->big_page_size, + gk20a_as_translate_as_alloc_flags(g, + args->flags), + &as_share); + if (err) + goto clean_up_file; + + fd_install(fd, file); + file->private_data = as_share; + + args->as_fd = fd; + return 0; + +clean_up_file: + fput(file); +clean_up: + put_unused_fd(fd); + return err; +} + +static int gk20a_ctrl_open_tsg(struct gk20a *g, + struct nvgpu_gpu_open_tsg_args *args) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int err; + int fd; + struct file *file; + char name[64]; + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd); + + file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + + err = nvgpu_ioctl_tsg_open(g, file); + if (err) + goto clean_up_file; + + fd_install(fd, file); + args->tsg_fd = fd; + return 0; + +clean_up_file: + fput(file); +clean_up: + put_unused_fd(fd); + return err; +} + +static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, + struct nvgpu_gpu_get_tpc_masks_args *args) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count; + + if (args->mask_buf_size > 0) { + size_t write_size = gpc_tpc_mask_size; + + if (write_size > args->mask_buf_size) + write_size = args->mask_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + args->mask_buf_addr, + gr->gpc_tpc_mask, write_size); + } + + if (err == 0) + args->mask_buf_size = gpc_tpc_mask_size; + + return err; +} + +static int gk20a_ctrl_get_fbp_l2_masks( + struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; + + if (args->mask_buf_size > 0) { + size_t write_size = fbp_l2_mask_size; + + if (write_size > args->mask_buf_size) + write_size = args->mask_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + args->mask_buf_addr, + gr->fbp_rop_l2_en_mask, write_size); + } + + if (err == 0) + args->mask_buf_size = fbp_l2_mask_size; + + return err; +} + +static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, + struct nvgpu_gpu_l2_fb_args *args) +{ + int err = 0; + + if ((!args->l2_flush && !args->fb_flush) || + (!args->l2_flush && args->l2_invalidate)) + return -EINVAL; + + if (args->l2_flush) + g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); + + if (args->fb_flush) + g->ops.mm.fb_flush(g); + + return err; +} + +/* Invalidate i-cache for kepler & maxwell */ +static int nvgpu_gpu_ioctl_inval_icache( + struct gk20a *g, + struct nvgpu_gpu_inval_icache_args *args) +{ + struct channel_gk20a *ch; + int err; + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) + return -EINVAL; + + /* Take the global lock, since we'll be doing global regops */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = g->ops.gr.inval_icache(g, ch); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_channel_put(ch); + return err; +} + +static int nvgpu_gpu_ioctl_set_mmu_debug_mode( + struct gk20a *g, + struct nvgpu_gpu_mmu_debug_mode_args *args) +{ + if (gk20a_busy(g)) { + nvgpu_err(g, "failed to power on gpu"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + g->ops.fb.set_debug_mode(g, args->state == 1); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_idle(g); + return 0; +} + +static int nvgpu_gpu_ioctl_set_debug_mode( + struct gk20a *g, + struct nvgpu_gpu_sm_debug_mode_args *args) +{ + struct channel_gk20a *ch; + int err; + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) + return -EINVAL; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + if (g->ops.gr.set_sm_debug_mode) + err = g->ops.gr.set_sm_debug_mode(g, ch, + args->sms, !!args->enable); + else + err = -ENOSYS; + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_channel_put(ch); + return err; +} + +static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) +{ + int err; + + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = g->ops.gr.trigger_suspend(g); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, + struct nvgpu_gpu_wait_pause_args *args) +{ + int err; + struct warpstate *ioctl_w_state; + struct nvgpu_warpstate *w_state = NULL; + u32 sm_count, ioctl_size, size, sm_id; + + sm_count = g->gr.gpc_count * g->gr.tpc_count; + + ioctl_size = sm_count * sizeof(struct warpstate); + ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); + if (!ioctl_w_state) + return -ENOMEM; + + size = sm_count * sizeof(struct nvgpu_warpstate); + w_state = nvgpu_kzalloc(g, size); + if (!w_state) { + err = -ENOMEM; + goto out_free; + } + + err = gk20a_busy(g); + if (err) + goto out_free; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + g->ops.gr.wait_for_pause(g, w_state); + + for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { + ioctl_w_state[sm_id].valid_warps[0] = + w_state[sm_id].valid_warps[0]; + ioctl_w_state[sm_id].valid_warps[1] = + w_state[sm_id].valid_warps[1]; + ioctl_w_state[sm_id].trapped_warps[0] = + w_state[sm_id].trapped_warps[0]; + ioctl_w_state[sm_id].trapped_warps[1] = + w_state[sm_id].trapped_warps[1]; + ioctl_w_state[sm_id].paused_warps[0] = + w_state[sm_id].paused_warps[0]; + ioctl_w_state[sm_id].paused_warps[1] = + w_state[sm_id].paused_warps[1]; + } + /* Copy to user space - pointed by "args->pwarpstate" */ + if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, + w_state, ioctl_size)) { + nvgpu_log_fn(g, "copy_to_user failed!"); + err = -EFAULT; + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_idle(g); + +out_free: + nvgpu_kfree(g, w_state); + nvgpu_kfree(g, ioctl_w_state); + + return err; +} + +static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) +{ + int err; + + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = g->ops.gr.resume_from_pause(g); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) +{ + int err; + + err = gk20a_busy(g); + if (err) + return err; + + err = g->ops.gr.clear_sm_errors(g); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_ioctl_has_any_exception( + struct gk20a *g, + struct nvgpu_gpu_tpc_exception_en_status_args *args) +{ + u32 tpc_exception_en; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + args->tpc_exception_en_sm_mask = tpc_exception_en; + + return 0; +} + +static int gk20a_ctrl_get_num_vsms(struct gk20a *g, + struct nvgpu_gpu_num_vsms *args) +{ + struct gr_gk20a *gr = &g->gr; + args->num_vsms = gr->no_of_sm; + return 0; +} + +static int gk20a_ctrl_vsm_mapping(struct gk20a *g, + struct nvgpu_gpu_vsms_mapping *args) +{ + int err = 0; + struct gr_gk20a *gr = &g->gr; + size_t write_size = gr->no_of_sm * + sizeof(struct nvgpu_gpu_vsms_mapping_entry); + struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; + u32 i; + + vsms_buf = nvgpu_kzalloc(g, write_size); + if (vsms_buf == NULL) + return -ENOMEM; + + for (i = 0; i < gr->no_of_sm; i++) { + vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; + if (g->ops.gr.get_nonpes_aware_tpc) + vsms_buf[i].tpc_index = + g->ops.gr.get_nonpes_aware_tpc(g, + gr->sm_to_cluster[i].gpc_index, + gr->sm_to_cluster[i].tpc_index); + else + vsms_buf[i].tpc_index = + gr->sm_to_cluster[i].tpc_index; + } + + err = copy_to_user((void __user *)(uintptr_t) + args->vsms_map_buf_addr, + vsms_buf, write_size); + nvgpu_kfree(g, vsms_buf); + + return err; +} + +static int nvgpu_gpu_get_cpu_time_correlation_info( + struct gk20a *g, + struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) +{ + struct nvgpu_cpu_time_correlation_sample *samples; + int err; + u32 i; + + if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT || + args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) + return -EINVAL; + + samples = nvgpu_kzalloc(g, args->count * + sizeof(struct nvgpu_cpu_time_correlation_sample)); + if (!samples) { + return -ENOMEM; + } + + err = g->ops.ptimer.get_timestamps_zipper(g, + args->source_id, args->count, samples); + if (!err) { + for (i = 0; i < args->count; i++) { + args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; + args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; + } + } + + nvgpu_kfree(g, samples); + + return err; +} + +static int nvgpu_gpu_get_gpu_time( + struct gk20a *g, + struct nvgpu_gpu_get_gpu_time_args *args) +{ + u64 time; + int err; + + err = gk20a_busy(g); + if (err) + return err; + + err = g->ops.ptimer.read_ptimer(g, &time); + if (!err) + args->gpu_timestamp = time; + + gk20a_idle(g); + return err; +} + +static int nvgpu_gpu_get_engine_info( + struct gk20a *g, + struct nvgpu_gpu_get_engine_info_args *args) +{ + int err = 0; + u32 engine_enum = ENGINE_INVAL_GK20A; + u32 report_index = 0; + u32 engine_id_idx; + const u32 max_buffer_engines = args->engine_info_buf_size / + sizeof(struct nvgpu_gpu_get_engine_info_item); + struct nvgpu_gpu_get_engine_info_item __user *dst_item_list = + (void __user *)(uintptr_t)args->engine_info_buf_addr; + + for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; + ++engine_id_idx) { + u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx]; + const struct fifo_engine_info_gk20a *src_info = + &g->fifo.engine_info[active_engine_id]; + struct nvgpu_gpu_get_engine_info_item dst_info; + + memset(&dst_info, 0, sizeof(dst_info)); + + engine_enum = src_info->engine_enum; + + switch (engine_enum) { + case ENGINE_GR_GK20A: + dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR; + break; + + case ENGINE_GRCE_GK20A: + dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY; + break; + + case ENGINE_ASYNC_CE_GK20A: + dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY; + break; + + default: + nvgpu_err(g, "Unmapped engine enum %u", + engine_enum); + continue; + } + + dst_info.engine_instance = src_info->inst_id; + dst_info.runlist_id = src_info->runlist_id; + + if (report_index < max_buffer_engines) { + err = copy_to_user(&dst_item_list[report_index], + &dst_info, sizeof(dst_info)); + if (err) + goto clean_up; + } + + ++report_index; + } + + args->engine_info_buf_size = + report_index * sizeof(struct nvgpu_gpu_get_engine_info_item); + +clean_up: + return err; +} + +static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, + struct nvgpu_gpu_alloc_vidmem_args *args) +{ + u32 align = args->in.alignment ? args->in.alignment : SZ_4K; + int fd; + + nvgpu_log_fn(g, " "); + + /* not yet supported */ + if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) + return -EINVAL; + + /* not yet supported */ + if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) + return -EINVAL; + + if (args->in.size & (SZ_4K - 1)) + return -EINVAL; + + if (!args->in.size) + return -EINVAL; + + if (align & (align - 1)) + return -EINVAL; + + if (align > roundup_pow_of_two(args->in.size)) { + /* log this special case, buddy allocator detail */ + nvgpu_warn(g, + "alignment larger than buffer size rounded up to power of 2 is not supported"); + return -EINVAL; + } + + fd = nvgpu_vidmem_export_linux(g, args->in.size); + if (fd < 0) + return fd; + + args->out.dmabuf_fd = fd; + + nvgpu_log_fn(g, "done, fd=%d", fd); + + return 0; +} + +static int nvgpu_gpu_get_memory_state(struct gk20a *g, + struct nvgpu_gpu_get_memory_state_args *args) +{ + int err; + + nvgpu_log_fn(g, " "); + + if (args->reserved[0] || args->reserved[1] || + args->reserved[2] || args->reserved[3]) + return -EINVAL; + + err = nvgpu_vidmem_get_space(g, &args->total_free_bytes); + + nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes); + + return err; +} + +static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain) +{ + u32 domain = 0; + + if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK) + domain = NVGPU_CLK_DOMAIN_MCLK; + else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK) + domain = NVGPU_CLK_DOMAIN_GPCCLK; + else + domain = NVGPU_CLK_DOMAIN_MAX + 1; + + return domain; +} + +static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_vf_points_args *args) +{ + struct nvgpu_gpu_clk_vf_point clk_point; + struct nvgpu_gpu_clk_vf_point __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + u32 clk_domains = 0; + int err; + u16 last_mhz; + u16 *fpoints; + u32 i; + u32 max_points = 0; + u32 num_points = 0; + u16 min_mhz; + u16 max_mhz; + + nvgpu_log_fn(g, " "); + + if (!session || args->flags) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + args->num_entries = 0; + + if (!nvgpu_clk_arb_is_valid_domain(g, + nvgpu_gpu_convert_clk_domain(args->clk_domain))) + return -EINVAL; + + err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, + nvgpu_gpu_convert_clk_domain(args->clk_domain), + &max_points, NULL); + if (err) + return err; + + if (!args->max_entries) { + args->max_entries = max_points; + return 0; + } + + if (args->max_entries < max_points) + return -EINVAL; + + err = nvgpu_clk_arb_get_arbiter_clk_range(g, + nvgpu_gpu_convert_clk_domain(args->clk_domain), + &min_mhz, &max_mhz); + if (err) + return err; + + fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16)); + if (!fpoints) + return -ENOMEM; + + err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, + nvgpu_gpu_convert_clk_domain(args->clk_domain), + &max_points, fpoints); + if (err) + goto fail; + + entry = (struct nvgpu_gpu_clk_vf_point __user *) + (uintptr_t)args->clk_vf_point_entries; + + last_mhz = 0; + num_points = 0; + for (i = 0; (i < max_points) && !err; i++) { + + /* filter out duplicate frequencies */ + if (fpoints[i] == last_mhz) + continue; + + /* filter out out-of-range frequencies */ + if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) + continue; + + last_mhz = fpoints[i]; + clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]); + + err = copy_to_user((void __user *)entry, &clk_point, + sizeof(clk_point)); + + num_points++; + entry++; + } + + args->num_entries = num_points; + +fail: + nvgpu_kfree(g, fpoints); + return err; +} + +static int nvgpu_gpu_clk_get_range(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_range_args *args) +{ + struct nvgpu_gpu_clk_range clk_range; + struct nvgpu_gpu_clk_range __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + + u32 clk_domains = 0; + u32 num_domains; + u32 num_entries; + u32 i; + int bit; + int err; + u16 min_mhz, max_mhz; + + nvgpu_log_fn(g, " "); + + if (!session) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + num_domains = hweight_long(clk_domains); + + if (!args->flags) { + if (!args->num_entries) { + args->num_entries = num_domains; + return 0; + } + + if (args->num_entries < num_domains) + return -EINVAL; + + args->num_entries = 0; + num_entries = num_domains; + + } else { + if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) + return -EINVAL; + + num_entries = args->num_entries; + if (num_entries > num_domains) + return -EINVAL; + } + + entry = (struct nvgpu_gpu_clk_range __user *) + (uintptr_t)args->clk_range_entries; + + for (i = 0; i < num_entries; i++, entry++) { + + if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { + if (copy_from_user(&clk_range, (void __user *)entry, + sizeof(clk_range))) + return -EFAULT; + } else { + bit = ffs(clk_domains) - 1; + clk_range.clk_domain = bit; + clk_domains &= ~BIT(bit); + } + + clk_range.flags = 0; + err = nvgpu_clk_arb_get_arbiter_clk_range(g, + nvgpu_gpu_convert_clk_domain(clk_range.clk_domain), + &min_mhz, &max_mhz); + clk_range.min_hz = MHZ_TO_HZ(min_mhz); + clk_range.max_hz = MHZ_TO_HZ(max_mhz); + + if (err) + return err; + + err = copy_to_user(entry, &clk_range, sizeof(clk_range)); + if (err) + return -EFAULT; + } + + args->num_entries = num_entries; + + return 0; +} + +static int nvgpu_gpu_clk_set_info(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_set_info_args *args) +{ + struct nvgpu_gpu_clk_info clk_info; + struct nvgpu_gpu_clk_info __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + + int fd; + u32 clk_domains = 0; + u16 freq_mhz; + int i; + int ret; + + nvgpu_log_fn(g, " "); + + if (!session || args->flags) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + if (!clk_domains) + return -EINVAL; + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + for (i = 0; i < args->num_entries; i++, entry++) { + + if (copy_from_user(&clk_info, entry, sizeof(clk_info))) + return -EFAULT; + + if (!nvgpu_clk_arb_is_valid_domain(g, + nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) + return -EINVAL; + } + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + ret = nvgpu_clk_arb_install_request_fd(g, session, &fd); + if (ret < 0) + return ret; + + for (i = 0; i < args->num_entries; i++, entry++) { + + if (copy_from_user(&clk_info, (void __user *)entry, + sizeof(clk_info))) + return -EFAULT; + freq_mhz = HZ_TO_MHZ(clk_info.freq_hz); + + nvgpu_clk_arb_set_session_target_mhz(session, fd, + nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); + } + + ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); + if (ret < 0) + return ret; + + args->completion_fd = fd; + + return ret; +} + +static int nvgpu_gpu_clk_get_info(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_get_info_args *args) +{ + struct nvgpu_gpu_clk_info clk_info; + struct nvgpu_gpu_clk_info __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + u32 clk_domains = 0; + u32 num_domains; + u32 num_entries; + u32 i; + u16 freq_mhz; + int err; + int bit; + + nvgpu_log_fn(g, " "); + + if (!session) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + num_domains = hweight_long(clk_domains); + + if (!args->flags) { + if (!args->num_entries) { + args->num_entries = num_domains; + return 0; + } + + if (args->num_entries < num_domains) + return -EINVAL; + + args->num_entries = 0; + num_entries = num_domains; + + } else { + if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) + return -EINVAL; + + num_entries = args->num_entries; + if (num_entries > num_domains * 3) + return -EINVAL; + } + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + for (i = 0; i < num_entries; i++, entry++) { + + if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { + if (copy_from_user(&clk_info, (void __user *)entry, + sizeof(clk_info))) + return -EFAULT; + } else { + bit = ffs(clk_domains) - 1; + clk_info.clk_domain = bit; + clk_domains &= ~BIT(bit); + clk_info.clk_type = args->clk_type; + } + + switch (clk_info.clk_type) { + case NVGPU_GPU_CLK_TYPE_TARGET: + err = nvgpu_clk_arb_get_session_target_mhz(session, + nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), + &freq_mhz); + break; + case NVGPU_GPU_CLK_TYPE_ACTUAL: + err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, + nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), + &freq_mhz); + break; + case NVGPU_GPU_CLK_TYPE_EFFECTIVE: + err = nvgpu_clk_arb_get_arbiter_effective_mhz(g, + nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), + &freq_mhz); + break; + default: + freq_mhz = 0; + err = -EINVAL; + break; + } + if (err) + return err; + + clk_info.flags = 0; + clk_info.freq_hz = MHZ_TO_HZ(freq_mhz); + + err = copy_to_user((void __user *)entry, &clk_info, + sizeof(clk_info)); + if (err) + return -EFAULT; + } + + args->num_entries = num_entries; + + return 0; +} + +static int nvgpu_gpu_get_event_fd(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_get_event_fd_args *args) +{ + struct nvgpu_clk_session *session = priv->clk_session; + + nvgpu_log_fn(g, " "); + + if (!session) + return -EINVAL; + + return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd, + args->flags); +} + +static int nvgpu_gpu_get_voltage(struct gk20a *g, + struct nvgpu_gpu_get_voltage_args *args) +{ + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + if (args->reserved) + return -EINVAL; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE)) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + switch (args->which) { + case NVGPU_GPU_VOLTAGE_CORE: + err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); + break; + case NVGPU_GPU_VOLTAGE_SRAM: + err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage); + break; + case NVGPU_GPU_VOLTAGE_BUS: + err = pmgr_pwr_devices_get_voltage(g, &args->voltage); + break; + default: + err = -EINVAL; + } + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_get_current(struct gk20a *g, + struct nvgpu_gpu_get_current_args *args) +{ + int err; + + nvgpu_log_fn(g, " "); + + if (args->reserved[0] || args->reserved[1] || args->reserved[2]) + return -EINVAL; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT)) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + err = pmgr_pwr_devices_get_current(g, &args->currnt); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_get_power(struct gk20a *g, + struct nvgpu_gpu_get_power_args *args) +{ + int err; + + nvgpu_log_fn(g, " "); + + if (args->reserved[0] || args->reserved[1] || args->reserved[2]) + return -EINVAL; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER)) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + err = pmgr_pwr_devices_get_power(g, &args->power); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_get_temperature(struct gk20a *g, + struct nvgpu_gpu_get_temperature_args *args) +{ + int err; + u32 temp_f24_8; + + nvgpu_log_fn(g, " "); + + if (args->reserved[0] || args->reserved[1] || args->reserved[2]) + return -EINVAL; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) + return -EINVAL; + + if (!g->ops.therm.get_internal_sensor_curr_temp) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); + + gk20a_idle(g); + + args->temp_f24_8 = (s32)temp_f24_8; + + return err; +} + +static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, + struct nvgpu_gpu_set_therm_alert_limit_args *args) +{ + int err; + + nvgpu_log_fn(g, " "); + + if (args->reserved[0] || args->reserved[1] || args->reserved[2]) + return -EINVAL; + + if (!g->ops.therm.configure_therm_alert) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8); + + gk20a_idle(g); + + return err; +} + +static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, + u32 flags) +{ + int err = 0; + bool allow; + bool disallow; + + allow = flags & + NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; + + disallow = flags & + NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; + + /* Can't be both at the same time */ + if (allow && disallow) + return -EINVAL; + + /* Nothing to do */ + if (!allow && !disallow) + return 0; + + /* + * Moving into explicit idle or back from it? A call that doesn't + * change the status is a no-op. + */ + if (!ch->deterministic_railgate_allowed && + allow) { + gk20a_idle(ch->g); + } else if (ch->deterministic_railgate_allowed && + !allow) { + err = gk20a_busy(ch->g); + if (err) { + nvgpu_warn(ch->g, + "cannot busy to restore deterministic ch"); + return err; + } + } + ch->deterministic_railgate_allowed = allow; + + return err; +} + +static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) +{ + if (!ch->deterministic) + return -EINVAL; + + return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); +} + +static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, + struct nvgpu_gpu_set_deterministic_opts_args *args) +{ + int __user *user_channels; + u32 i = 0; + int err = 0; + + nvgpu_log_fn(g, " "); + + user_channels = (int __user *)(uintptr_t)args->channels; + + /* Upper limit; prevent holding deterministic_busy for long */ + if (args->num_channels > g->fifo.num_channels) { + err = -EINVAL; + goto out; + } + + /* Trivial sanity check first */ + if (!access_ok(VERIFY_READ, user_channels, + args->num_channels * sizeof(int))) { + err = -EFAULT; + goto out; + } + + nvgpu_rwsem_down_read(&g->deterministic_busy); + + /* note: we exit at the first failure */ + for (; i < args->num_channels; i++) { + int ch_fd = 0; + struct channel_gk20a *ch; + + if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { + /* User raced with above access_ok */ + err = -EFAULT; + break; + } + + ch = gk20a_get_channel_from_file(ch_fd); + if (!ch) { + err = -EINVAL; + break; + } + + err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); + + gk20a_channel_put(ch); + + if (err) + break; + } + + nvgpu_rwsem_up_read(&g->deterministic_busy); + +out: + args->num_channels = i; + return err; +} + +static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g, + struct nvgpu_gpu_read_single_sm_error_state_args *args) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr_sm_error_state *sm_error_state; + struct nvgpu_gpu_sm_error_state_record sm_error_state_record; + u32 sm_id; + int err = 0; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + nvgpu_speculation_barrier(); + + sm_error_state = gr->sm_error_states + sm_id; + sm_error_state_record.global_esr = + sm_error_state->hww_global_esr; + sm_error_state_record.warp_esr = + sm_error_state->hww_warp_esr; + sm_error_state_record.warp_esr_pc = + sm_error_state->hww_warp_esr_pc; + sm_error_state_record.global_esr_report_mask = + sm_error_state->hww_global_esr_report_mask; + sm_error_state_record.warp_esr_report_mask = + sm_error_state->hww_warp_esr_report_mask; + + if (args->record_size > 0) { + size_t write_size = sizeof(*sm_error_state); + + if (write_size > args->record_size) + write_size = args->record_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = copy_to_user((void __user *)(uintptr_t) + args->record_mem, + &sm_error_state_record, + write_size); + nvgpu_mutex_release(&g->dbg_sessions_lock); + if (err) { + nvgpu_err(g, "copy_to_user failed!"); + return err; + } + + args->record_size = write_size; + } + + return 0; +} + +long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct gk20a_ctrl_priv *priv = filp->private_data; + struct gk20a *g = priv->g; + struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; + struct nvgpu_gpu_zcull_get_info_args *get_info_args; + struct nvgpu_gpu_zbc_set_table_args *set_table_args; + struct nvgpu_gpu_zbc_query_table_args *query_table_args; + u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; + struct gr_zcull_info *zcull_info; + struct zbc_entry *zbc_val; + struct zbc_query_params *zbc_tbl; + int i, err = 0; + + nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + if (!g->sw_ready) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_idle(g); + } + + switch (cmd) { + case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: + get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; + + get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); + + break; + case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: + get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf; + + memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); + + zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); + if (zcull_info == NULL) + return -ENOMEM; + + err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); + if (err) { + nvgpu_kfree(g, zcull_info); + break; + } + + get_info_args->width_align_pixels = zcull_info->width_align_pixels; + get_info_args->height_align_pixels = zcull_info->height_align_pixels; + get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; + get_info_args->aliquot_total = zcull_info->aliquot_total; + get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; + get_info_args->region_header_size = zcull_info->region_header_size; + get_info_args->subregion_header_size = zcull_info->subregion_header_size; + get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; + get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; + get_info_args->subregion_count = zcull_info->subregion_count; + + nvgpu_kfree(g, zcull_info); + break; + case NVGPU_GPU_IOCTL_ZBC_SET_TABLE: + set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf; + + zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry)); + if (zbc_val == NULL) + return -ENOMEM; + + zbc_val->format = set_table_args->format; + zbc_val->type = set_table_args->type; + + switch (zbc_val->type) { + case GK20A_ZBC_TYPE_COLOR: + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + zbc_val->color_ds[i] = set_table_args->color_ds[i]; + zbc_val->color_l2[i] = set_table_args->color_l2[i]; + } + break; + case GK20A_ZBC_TYPE_DEPTH: + case T19X_ZBC: + zbc_val->depth = set_table_args->depth; + break; + default: + err = -EINVAL; + } + + if (!err) { + err = gk20a_busy(g); + if (!err) { + err = g->ops.gr.zbc_set_table(g, &g->gr, + zbc_val); + gk20a_idle(g); + } + } + + if (zbc_val) + nvgpu_kfree(g, zbc_val); + break; + case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE: + query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf; + + zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params)); + if (zbc_tbl == NULL) + return -ENOMEM; + + zbc_tbl->type = query_table_args->type; + zbc_tbl->index_size = query_table_args->index_size; + + err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl); + + if (!err) { + switch (zbc_tbl->type) { + case GK20A_ZBC_TYPE_COLOR: + for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { + query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; + query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; + } + break; + case GK20A_ZBC_TYPE_DEPTH: + case T19X_ZBC: + query_table_args->depth = zbc_tbl->depth; + break; + case GK20A_ZBC_TYPE_INVALID: + query_table_args->index_size = zbc_tbl->index_size; + break; + default: + err = -EINVAL; + } + if (!err) { + query_table_args->format = zbc_tbl->format; + query_table_args->ref_cnt = zbc_tbl->ref_cnt; + } + } + + if (zbc_tbl) + nvgpu_kfree(g, zbc_tbl); + break; + + case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS: + err = gk20a_ctrl_ioctl_gpu_characteristics( + g, (struct nvgpu_gpu_get_characteristics *)buf); + break; + case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: + err = gk20a_ctrl_prepare_compressible_read(g, + (struct nvgpu_gpu_prepare_compressible_read_args *)buf); + break; + case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: + err = gk20a_ctrl_mark_compressible_write(g, + (struct nvgpu_gpu_mark_compressible_write_args *)buf); + break; + case NVGPU_GPU_IOCTL_ALLOC_AS: + err = gk20a_ctrl_alloc_as(g, + (struct nvgpu_alloc_as_args *)buf); + break; + case NVGPU_GPU_IOCTL_OPEN_TSG: + err = gk20a_ctrl_open_tsg(g, + (struct nvgpu_gpu_open_tsg_args *)buf); + break; + case NVGPU_GPU_IOCTL_GET_TPC_MASKS: + err = gk20a_ctrl_get_tpc_masks(g, + (struct nvgpu_gpu_get_tpc_masks_args *)buf); + break; + case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: + err = gk20a_ctrl_get_fbp_l2_masks(g, + (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); + break; + case NVGPU_GPU_IOCTL_OPEN_CHANNEL: + /* this arg type here, but ..gpu_open_channel_args in nvgpu.h + * for consistency - they are the same */ + err = gk20a_channel_open_ioctl(g, + (struct nvgpu_channel_open_args *)buf); + break; + case NVGPU_GPU_IOCTL_FLUSH_L2: + err = nvgpu_gpu_ioctl_l2_fb_ops(g, + (struct nvgpu_gpu_l2_fb_args *)buf); + break; + case NVGPU_GPU_IOCTL_INVAL_ICACHE: + err = gr_gk20a_elpg_protected_call(g, + nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf)); + break; + + case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: + err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, + (struct nvgpu_gpu_mmu_debug_mode_args *)buf); + break; + + case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: + err = gr_gk20a_elpg_protected_call(g, + nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); + break; + + case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND: + err = nvgpu_gpu_ioctl_trigger_suspend(g); + break; + + case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: + err = nvgpu_gpu_ioctl_wait_for_pause(g, + (struct nvgpu_gpu_wait_pause_args *)buf); + break; + + case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE: + err = nvgpu_gpu_ioctl_resume_from_pause(g); + break; + + case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS: + err = nvgpu_gpu_ioctl_clear_sm_errors(g); + break; + + case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: + err = nvgpu_gpu_ioctl_has_any_exception(g, + (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); + break; + + case NVGPU_GPU_IOCTL_NUM_VSMS: + err = gk20a_ctrl_get_num_vsms(g, + (struct nvgpu_gpu_num_vsms *)buf); + break; + case NVGPU_GPU_IOCTL_VSMS_MAPPING: + err = gk20a_ctrl_vsm_mapping(g, + (struct nvgpu_gpu_vsms_mapping *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: + err = nvgpu_gpu_get_cpu_time_correlation_info(g, + (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_GPU_TIME: + err = nvgpu_gpu_get_gpu_time(g, + (struct nvgpu_gpu_get_gpu_time_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_ENGINE_INFO: + err = nvgpu_gpu_get_engine_info(g, + (struct nvgpu_gpu_get_engine_info_args *)buf); + break; + + case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: + err = nvgpu_gpu_alloc_vidmem(g, + (struct nvgpu_gpu_alloc_vidmem_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_MEMORY_STATE: + err = nvgpu_gpu_get_memory_state(g, + (struct nvgpu_gpu_get_memory_state_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_GET_RANGE: + err = nvgpu_gpu_clk_get_range(g, priv, + (struct nvgpu_gpu_clk_range_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: + err = nvgpu_gpu_clk_get_vf_points(g, priv, + (struct nvgpu_gpu_clk_vf_points_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_SET_INFO: + err = nvgpu_gpu_clk_set_info(g, priv, + (struct nvgpu_gpu_clk_set_info_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_GET_INFO: + err = nvgpu_gpu_clk_get_info(g, priv, + (struct nvgpu_gpu_clk_get_info_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_EVENT_FD: + err = nvgpu_gpu_get_event_fd(g, priv, + (struct nvgpu_gpu_get_event_fd_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_VOLTAGE: + err = nvgpu_gpu_get_voltage(g, + (struct nvgpu_gpu_get_voltage_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_CURRENT: + err = nvgpu_gpu_get_current(g, + (struct nvgpu_gpu_get_current_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_POWER: + err = nvgpu_gpu_get_power(g, + (struct nvgpu_gpu_get_power_args *)buf); + break; + + case NVGPU_GPU_IOCTL_GET_TEMPERATURE: + err = nvgpu_gpu_get_temperature(g, + (struct nvgpu_gpu_get_temperature_args *)buf); + break; + + case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT: + err = nvgpu_gpu_set_therm_alert_limit(g, + (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); + break; + + case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: + err = nvgpu_gpu_set_deterministic_opts(g, + (struct nvgpu_gpu_set_deterministic_opts_args *)buf); + break; + + case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: + err = nvgpu_gpu_read_single_sm_error_state(g, + (struct nvgpu_gpu_read_single_sm_error_state_args *)buf); + break; + + default: + nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h new file mode 100644 index 00000000..8b4a5e59 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __NVGPU_IOCTL_CTRL_H__ +#define __NVGPU_IOCTL_CTRL_H__ + +int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); +int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); +long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c new file mode 100644 index 00000000..31e7e2cb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -0,0 +1,2003 @@ +/* + * Tegra GK20A GPU Debugger/Profiler Driver + * + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/gr_gk20a.h" +#include "gk20a/regops_gk20a.h" +#include "gk20a/dbg_gpu_gk20a.h" +#include "os_linux.h" +#include "platform_gk20a.h" +#include "ioctl_dbg.h" + +/* turn seriously unwieldy names -> something shorter */ +#define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x + +/* silly allocator - just increment id */ +static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); +static int generate_unique_id(void) +{ + return nvgpu_atomic_add_return(1, &unique_id); +} + +static int alloc_profiler(struct gk20a *g, + struct dbg_profiler_object_data **_prof) +{ + struct dbg_profiler_object_data *prof; + *_prof = NULL; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + prof = nvgpu_kzalloc(g, sizeof(*prof)); + if (!prof) + return -ENOMEM; + + prof->prof_handle = generate_unique_id(); + *_prof = prof; + return 0; +} + +static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux) +{ + struct dbg_session_gk20a_linux *dbg_s_linux; + *_dbg_s_linux = NULL; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux)); + if (!dbg_s_linux) + return -ENOMEM; + + dbg_s_linux->dbg_s.id = generate_unique_id(); + *_dbg_s_linux = dbg_s_linux; + return 0; +} + +static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, + struct gr_gk20a *gr); + +static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); + +static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_exec_reg_ops_args *args); + +static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_powergate_args *args); + +static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); + +static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); + +static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); + +static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); + +static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); + +static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_reserve_args *args); + +static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_map_args *args); + +static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); + +static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, + int timeout_mode); + +static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle); + +static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s); + +static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s); + +static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle); + +static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); + +static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, + struct file *filp, bool is_profiler); + +unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) +{ + unsigned int mask = 0; + struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data; + struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait); + + gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); + + if (dbg_s->dbg_events.events_enabled && + dbg_s->dbg_events.num_pending_events > 0) { + nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d", + dbg_s->id); + nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", + dbg_s->dbg_events.num_pending_events); + mask = (POLLPRI | POLLIN); + } + + gk20a_dbg_session_nvgpu_mutex_release(dbg_s); + + return mask; +} + +int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) +{ + struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data; + struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj, *tmp_obj; + + nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name); + + /* unbind channels */ + dbg_unbind_all_channels_gk20a(dbg_s); + + /* Powergate/Timeout enable is called here as possibility of dbg_session + * which called powergate/timeout disable ioctl, to be killed without + * calling powergate/timeout enable ioctl + */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); + nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); + + /* If this session owned the perf buffer, release it */ + if (g->perfbuf.owner == dbg_s) + gk20a_perfbuf_release_locked(g, g->perfbuf.offset); + + /* Per-context profiler objects were released when we called + * dbg_unbind_all_channels. We could still have global ones. + */ + nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if (prof_obj->session_id == dbg_s->id) { + if (prof_obj->has_reservation) + g->ops.dbg_session_ops. + release_profiler_reservation(dbg_s, prof_obj); + nvgpu_list_del(&prof_obj->prof_obj_entry); + nvgpu_kfree(g, prof_obj); + } + } + nvgpu_mutex_release(&g->dbg_sessions_lock); + + nvgpu_mutex_destroy(&dbg_s->ch_list_lock); + nvgpu_mutex_destroy(&dbg_s->ioctl_lock); + + nvgpu_kfree(g, dbg_session_linux); + gk20a_put(g); + + return 0; +} + +int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l = container_of(inode->i_cdev, + struct nvgpu_os_linux, prof.cdev); + struct gk20a *g = &l->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); +} + +static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_timeout_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = nvgpu_dbg_timeout_enable(dbg_s, args->enable); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err; +} + +static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args) +{ + struct gk20a *g = dbg_s->g; + struct gr_gk20a *gr = &g->gr; + u32 sm_id; + struct channel_gk20a *ch; + struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; + struct nvgpu_gr_sm_error_state sm_error_state; + int err = 0; + + /* Not currently supported in the virtual case */ + if (g->is_virtual) + return -ENOSYS; + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) + return -EINVAL; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + nvgpu_speculation_barrier(); + + if (args->sm_error_state_record_size > 0) { + size_t read_size = sizeof(sm_error_state_record); + + if (read_size > args->sm_error_state_record_size) + read_size = args->sm_error_state_record_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = copy_from_user(&sm_error_state_record, + (void __user *)(uintptr_t) + args->sm_error_state_record_mem, + read_size); + nvgpu_mutex_release(&g->dbg_sessions_lock); + if (err) + return -ENOMEM; + } + + err = gk20a_busy(g); + if (err) + return err; + + sm_error_state.hww_global_esr = + sm_error_state_record.hww_global_esr; + sm_error_state.hww_warp_esr = + sm_error_state_record.hww_warp_esr; + sm_error_state.hww_warp_esr_pc = + sm_error_state_record.hww_warp_esr_pc; + sm_error_state.hww_global_esr_report_mask = + sm_error_state_record.hww_global_esr_report_mask; + sm_error_state.hww_warp_esr_report_mask = + sm_error_state_record.hww_warp_esr_report_mask; + + err = gr_gk20a_elpg_protected_call(g, + g->ops.gr.update_sm_error_state(g, ch, + sm_id, &sm_error_state)); + + gk20a_idle(g); + + return err; +} + + +static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) +{ + struct gk20a *g = dbg_s->g; + struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr_sm_error_state *sm_error_state; + struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; + u32 sm_id; + int err = 0; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + nvgpu_speculation_barrier(); + + sm_error_state = gr->sm_error_states + sm_id; + sm_error_state_record.hww_global_esr = + sm_error_state->hww_global_esr; + sm_error_state_record.hww_warp_esr = + sm_error_state->hww_warp_esr; + sm_error_state_record.hww_warp_esr_pc = + sm_error_state->hww_warp_esr_pc; + sm_error_state_record.hww_global_esr_report_mask = + sm_error_state->hww_global_esr_report_mask; + sm_error_state_record.hww_warp_esr_report_mask = + sm_error_state->hww_warp_esr_report_mask; + + if (args->sm_error_state_record_size > 0) { + size_t write_size = sizeof(*sm_error_state); + + if (write_size > args->sm_error_state_record_size) + write_size = args->sm_error_state_record_size; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + err = copy_to_user((void __user *)(uintptr_t) + args->sm_error_state_record_mem, + &sm_error_state_record, + write_size); + nvgpu_mutex_release(&g->dbg_sessions_lock); + if (err) { + nvgpu_err(g, "copy_to_user failed!"); + return err; + } + + args->sm_error_state_record_size = write_size; + } + + return 0; +} + + +static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args) +{ + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); + + dbg_s->broadcast_stop_trigger = (args->broadcast != 0); + + gk20a_dbg_session_nvgpu_mutex_release(dbg_s); + + return 0; +} + +static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, + int timeout_mode) +{ + struct gk20a *g = dbg_s->g; + int err = 0; + + nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", + timeout_mode); + + switch (timeout_mode) { + case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: + if (dbg_s->is_timeout_disabled == true) + nvgpu_atomic_dec(&g->timeouts_disabled_refcount); + dbg_s->is_timeout_disabled = false; + break; + + case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE: + if (dbg_s->is_timeout_disabled == false) + nvgpu_atomic_inc(&g->timeouts_disabled_refcount); + dbg_s->is_timeout_disabled = true; + break; + + default: + nvgpu_err(g, + "unrecognized dbg gpu timeout mode : 0x%x", + timeout_mode); + err = -EINVAL; + break; + } + + if (!err) + nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, " + "timeouts disabled refcount %d", + dbg_s->is_timeout_disabled ? "true" : "false", + nvgpu_atomic_read(&g->timeouts_disabled_refcount)); + return err; +} + +static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, + struct file *filp, bool is_profiler) +{ + struct nvgpu_os_linux *l; + struct dbg_session_gk20a_linux *dbg_session_linux; + struct dbg_session_gk20a *dbg_s; + struct gk20a *g; + + struct device *dev; + + int err; + + if (!is_profiler) + l = container_of(inode->i_cdev, + struct nvgpu_os_linux, dbg.cdev); + else + l = container_of(inode->i_cdev, + struct nvgpu_os_linux, prof.cdev); + g = gk20a_get(&l->g); + if (!g) + return -ENODEV; + + dev = dev_from_gk20a(g); + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name); + + err = alloc_session(g, &dbg_session_linux); + if (err) + goto free_ref; + + dbg_s = &dbg_session_linux->dbg_s; + + filp->private_data = dbg_session_linux; + dbg_session_linux->dev = dev; + dbg_s->g = g; + dbg_s->is_profiler = is_profiler; + dbg_s->is_pg_disabled = false; + dbg_s->is_timeout_disabled = false; + + nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); + nvgpu_init_list_node(&dbg_s->ch_list); + err = nvgpu_mutex_init(&dbg_s->ch_list_lock); + if (err) + goto err_free_session; + err = nvgpu_mutex_init(&dbg_s->ioctl_lock); + if (err) + goto err_destroy_lock; + dbg_s->dbg_events.events_enabled = false; + dbg_s->dbg_events.num_pending_events = 0; + + return 0; + +err_destroy_lock: + nvgpu_mutex_destroy(&dbg_s->ch_list_lock); +err_free_session: + nvgpu_kfree(g, dbg_session_linux); +free_ref: + gk20a_put(g); + return err; +} + +void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) +{ + nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue); +} + +static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, + struct dbg_session_channel_data *ch_data) +{ + struct gk20a *g = dbg_s->g; + int chid; + struct dbg_session_data *session_data; + struct dbg_profiler_object_data *prof_obj, *tmp_obj; + struct dbg_session_channel_data_linux *ch_data_linux; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + chid = ch_data->chid; + + /* If there's a profiler ctx reservation record associated with this + * session/channel pair, release it. + */ + nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if ((prof_obj->session_id == dbg_s->id) && + (prof_obj->ch->chid == chid)) { + if (prof_obj->has_reservation) { + g->ops.dbg_session_ops. + release_profiler_reservation(dbg_s, prof_obj); + } + nvgpu_list_del(&prof_obj->prof_obj_entry); + nvgpu_kfree(g, prof_obj); + } + } + + nvgpu_list_del(&ch_data->ch_entry); + + session_data = ch_data->session_data; + nvgpu_list_del(&session_data->dbg_s_entry); + nvgpu_kfree(dbg_s->g, session_data); + + ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux, + ch_data); + + fput(ch_data_linux->ch_f); + nvgpu_kfree(dbg_s->g, ch_data_linux); + + return 0; +} + +static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_bind_channel_args *args) +{ + struct file *f; + struct gk20a *g = dbg_s->g; + struct channel_gk20a *ch; + struct dbg_session_channel_data_linux *ch_data_linux; + struct dbg_session_data *session_data; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", + g->name, args->channel_fd); + + /* + * Although gk20a_get_channel_from_file gives us a channel ref, need to + * hold a ref to the file during the session lifetime. See comment in + * struct dbg_session_channel_data. + */ + f = fget(args->channel_fd); + if (!f) + return -ENODEV; + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) { + nvgpu_log_fn(g, "no channel found for fd"); + err = -EINVAL; + goto out_fput; + } + + nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + nvgpu_mutex_acquire(&ch->dbg_s_lock); + + ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux)); + if (!ch_data_linux) { + err = -ENOMEM; + goto out_chput; + } + ch_data_linux->ch_f = f; + ch_data_linux->ch_data.channel_fd = args->channel_fd; + ch_data_linux->ch_data.chid = ch->chid; + ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a; + nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry); + + session_data = nvgpu_kzalloc(g, sizeof(*session_data)); + if (!session_data) { + err = -ENOMEM; + goto out_kfree; + } + session_data->dbg_s = dbg_s; + nvgpu_init_list_node(&session_data->dbg_s_entry); + ch_data_linux->ch_data.session_data = session_data; + + nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list); + + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list); + nvgpu_mutex_release(&dbg_s->ch_list_lock); + + nvgpu_mutex_release(&ch->dbg_s_lock); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + gk20a_channel_put(ch); + + return 0; + +out_kfree: + nvgpu_kfree(g, ch_data_linux); +out_chput: + gk20a_channel_put(ch); + nvgpu_mutex_release(&ch->dbg_s_lock); + nvgpu_mutex_release(&g->dbg_sessions_lock); +out_fput: + fput(f); + return err; +} + +static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) +{ + struct dbg_session_channel_data *ch_data, *tmp; + struct gk20a *g = dbg_s->g; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) + ch_data->unbind_single_channel(dbg_s, ch_data); + nvgpu_mutex_release(&dbg_s->ch_list_lock); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return 0; +} + +/* + * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* + * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* + */ +static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) +{ + switch (regops_op) { + case REGOP(READ_32): + return REGOP_LINUX(READ_32); + case REGOP(WRITE_32): + return REGOP_LINUX(WRITE_32); + case REGOP(READ_64): + return REGOP_LINUX(READ_64); + case REGOP(WRITE_64): + return REGOP_LINUX(WRITE_64); + case REGOP(READ_08): + return REGOP_LINUX(READ_08); + case REGOP(WRITE_08): + return REGOP_LINUX(WRITE_08); + } + + return regops_op; +} + +/* + * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* + * into common regops op values of the form of NVGPU_DBG_REG_OP_* + */ +static u32 nvgpu_get_regops_op_values_common(u32 regops_op) +{ + switch (regops_op) { + case REGOP_LINUX(READ_32): + return REGOP(READ_32); + case REGOP_LINUX(WRITE_32): + return REGOP(WRITE_32); + case REGOP_LINUX(READ_64): + return REGOP(READ_64); + case REGOP_LINUX(WRITE_64): + return REGOP(WRITE_64); + case REGOP_LINUX(READ_08): + return REGOP(READ_08); + case REGOP_LINUX(WRITE_08): + return REGOP(WRITE_08); + } + + return regops_op; +} + +/* + * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* + * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* + */ +static u32 nvgpu_get_regops_type_values_linux(u32 regops_type) +{ + switch (regops_type) { + case REGOP(TYPE_GLOBAL): + return REGOP_LINUX(TYPE_GLOBAL); + case REGOP(TYPE_GR_CTX): + return REGOP_LINUX(TYPE_GR_CTX); + case REGOP(TYPE_GR_CTX_TPC): + return REGOP_LINUX(TYPE_GR_CTX_TPC); + case REGOP(TYPE_GR_CTX_SM): + return REGOP_LINUX(TYPE_GR_CTX_SM); + case REGOP(TYPE_GR_CTX_CROP): + return REGOP_LINUX(TYPE_GR_CTX_CROP); + case REGOP(TYPE_GR_CTX_ZROP): + return REGOP_LINUX(TYPE_GR_CTX_ZROP); + case REGOP(TYPE_GR_CTX_QUAD): + return REGOP_LINUX(TYPE_GR_CTX_QUAD); + } + + return regops_type; +} + +/* + * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* + * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* + */ +static u32 nvgpu_get_regops_type_values_common(u32 regops_type) +{ + switch (regops_type) { + case REGOP_LINUX(TYPE_GLOBAL): + return REGOP(TYPE_GLOBAL); + case REGOP_LINUX(TYPE_GR_CTX): + return REGOP(TYPE_GR_CTX); + case REGOP_LINUX(TYPE_GR_CTX_TPC): + return REGOP(TYPE_GR_CTX_TPC); + case REGOP_LINUX(TYPE_GR_CTX_SM): + return REGOP(TYPE_GR_CTX_SM); + case REGOP_LINUX(TYPE_GR_CTX_CROP): + return REGOP(TYPE_GR_CTX_CROP); + case REGOP_LINUX(TYPE_GR_CTX_ZROP): + return REGOP(TYPE_GR_CTX_ZROP); + case REGOP_LINUX(TYPE_GR_CTX_QUAD): + return REGOP(TYPE_GR_CTX_QUAD); + } + + return regops_type; +} + +/* + * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* + * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* + */ +static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) +{ + switch (regops_status) { + case REGOP(STATUS_SUCCESS): + return REGOP_LINUX(STATUS_SUCCESS); + case REGOP(STATUS_INVALID_OP): + return REGOP_LINUX(STATUS_INVALID_OP); + case REGOP(STATUS_INVALID_TYPE): + return REGOP_LINUX(STATUS_INVALID_TYPE); + case REGOP(STATUS_INVALID_OFFSET): + return REGOP_LINUX(STATUS_INVALID_OFFSET); + case REGOP(STATUS_UNSUPPORTED_OP): + return REGOP_LINUX(STATUS_UNSUPPORTED_OP); + case REGOP(STATUS_INVALID_MASK ): + return REGOP_LINUX(STATUS_INVALID_MASK); + } + + return regops_status; +} + +/* + * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* + * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* + */ +static u32 nvgpu_get_regops_status_values_common(u32 regops_status) +{ + switch (regops_status) { + case REGOP_LINUX(STATUS_SUCCESS): + return REGOP(STATUS_SUCCESS); + case REGOP_LINUX(STATUS_INVALID_OP): + return REGOP(STATUS_INVALID_OP); + case REGOP_LINUX(STATUS_INVALID_TYPE): + return REGOP(STATUS_INVALID_TYPE); + case REGOP_LINUX(STATUS_INVALID_OFFSET): + return REGOP(STATUS_INVALID_OFFSET); + case REGOP_LINUX(STATUS_UNSUPPORTED_OP): + return REGOP(STATUS_UNSUPPORTED_OP); + case REGOP_LINUX(STATUS_INVALID_MASK ): + return REGOP(STATUS_INVALID_MASK); + } + + return regops_status; +} + +static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in, + struct nvgpu_dbg_reg_op *out, u32 num_ops) +{ + u32 i; + + if(in == NULL || out == NULL) + return -ENOMEM; + + for (i = 0; i < num_ops; i++) { + out[i].op = nvgpu_get_regops_op_values_common(in[i].op); + out[i].type = nvgpu_get_regops_type_values_common(in[i].type); + out[i].status = nvgpu_get_regops_status_values_common(in[i].status); + out[i].quad = in[i].quad; + out[i].group_mask = in[i].group_mask; + out[i].sub_group_mask = in[i].sub_group_mask; + out[i].offset = in[i].offset; + out[i].value_lo = in[i].value_lo; + out[i].value_hi = in[i].value_hi; + out[i].and_n_mask_lo = in[i].and_n_mask_lo; + out[i].and_n_mask_hi = in[i].and_n_mask_hi; + } + + return 0; +} + +static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in, + struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops) +{ + u32 i; + + if(in == NULL || out == NULL) + return -ENOMEM; + + for (i = 0; i < num_ops; i++) { + out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); + out[i].type = nvgpu_get_regops_type_values_linux(in[i].type); + out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); + out[i].quad = in[i].quad; + out[i].group_mask = in[i].group_mask; + out[i].sub_group_mask = in[i].sub_group_mask; + out[i].offset = in[i].offset; + out[i].value_lo = in[i].value_lo; + out[i].value_hi = in[i].value_hi; + out[i].and_n_mask_lo = in[i].and_n_mask_lo; + out[i].and_n_mask_hi = in[i].and_n_mask_hi; + } + + return 0; +} + +static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_exec_reg_ops_args *args) +{ + int err = 0, powergate_err = 0; + bool is_pg_disabled = false; + + struct gk20a *g = dbg_s->g; + struct channel_gk20a *ch; + + nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); + + if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { + nvgpu_err(g, "regops limit exceeded"); + return -EINVAL; + } + + if (args->num_ops == 0) { + /* Nothing to do */ + return 0; + } + + if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { + nvgpu_err(g, "reg ops work buffer not allocated"); + return -ENODEV; + } + + if (!dbg_s->id) { + nvgpu_err(g, "can't call reg_ops on an unbound debugger session"); + return -EINVAL; + } + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!dbg_s->is_profiler && !ch) { + nvgpu_err(g, "bind a channel before regops for a debugging session"); + return -EINVAL; + } + + /* be sure that ctx info is in place */ + if (!g->is_virtual && + !gr_context_info_available(dbg_s, &g->gr)) { + nvgpu_err(g, "gr context data not available"); + return -ENODEV; + } + + /* since exec_reg_ops sends methods to the ucode, it must take the + * global gpu lock to protect against mixing methods from debug sessions + * on other channels */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + if (!dbg_s->is_pg_disabled && !g->is_virtual) { + /* In the virtual case, the server will handle + * disabling/enabling powergating when processing reg ops + */ + powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, + true); + is_pg_disabled = true; + } + + if (!powergate_err) { + u64 ops_offset = 0; /* index offset */ + + struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL; + + linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops * + sizeof(struct nvgpu_dbg_gpu_reg_op)); + + if (!linux_fragment) + return -ENOMEM; + + while (ops_offset < args->num_ops && !err) { + const u64 num_ops = + min(args->num_ops - ops_offset, + (u64)(g->dbg_regops_tmp_buf_ops)); + const u64 fragment_size = + num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op); + + void __user *const fragment = + (void __user *)(uintptr_t) + (args->ops + + ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op)); + + nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu", + ops_offset, num_ops); + + nvgpu_log_fn(g, "Copying regops from userspace"); + + if (copy_from_user(linux_fragment, + fragment, fragment_size)) { + nvgpu_err(g, "copy_from_user failed!"); + err = -EFAULT; + break; + } + + err = nvgpu_get_regops_data_common(linux_fragment, + g->dbg_regops_tmp_buf, num_ops); + + if (err) + break; + + err = g->ops.dbg_session_ops.exec_reg_ops( + dbg_s, g->dbg_regops_tmp_buf, num_ops); + + err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, + linux_fragment, num_ops); + + if (err) + break; + + nvgpu_log_fn(g, "Copying result to userspace"); + + if (copy_to_user(fragment, linux_fragment, + fragment_size)) { + nvgpu_err(g, "copy_to_user failed!"); + err = -EFAULT; + break; + } + + ops_offset += num_ops; + } + + nvgpu_kfree(g, linux_fragment); + + /* enable powergate, if previously disabled */ + if (is_pg_disabled) { + powergate_err = + g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, + false); + } + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + if (!err && powergate_err) + err = powergate_err; + + if (err) + nvgpu_err(g, "dbg regops failed"); + + return err; +} + +static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_powergate_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + nvgpu_log_fn(g, "%s powergate mode = %d", + g->name, args->mode); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) { + err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true); + } else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) { + err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); + } else { + nvgpu_err(g, "invalid powergate mode"); + err = -EINVAL; + } + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + struct channel_gk20a *ch_gk20a; + + nvgpu_log_fn(g, "%s smpc ctxsw mode = %d", + g->name, args->mode); + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + /* Take the global lock, since we'll be doing global regops */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch_gk20a) { + nvgpu_err(g, + "no bound channel for smpc ctxsw mode update"); + err = -EINVAL; + goto clean_up; + } + + err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, + args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); + if (err) { + nvgpu_err(g, + "error (%d) during smpc ctxsw mode update", err); + goto clean_up; + } + + err = g->ops.regops.apply_smpc_war(dbg_s); + clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + gk20a_idle(g); + return err; +} + +static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) +{ + int err; + struct gk20a *g = dbg_s->g; + struct channel_gk20a *ch_gk20a; + + nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); + + /* Must have a valid reservation to enable/disable hwpm cxtsw. + * Just print an error message for now, but eventually this should + * return an error, at the point where all client sw has been + * cleaned up. + */ + if (!dbg_s->has_profiler_reservation) { + nvgpu_err(g, + "session doesn't have a valid reservation"); + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + /* Take the global lock, since we'll be doing global regops */ + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch_gk20a) { + nvgpu_err(g, + "no bound channel for pm ctxsw mode update"); + err = -EINVAL; + goto clean_up; + } + if (!dbg_s->is_pg_disabled) { + nvgpu_err(g, "powergate is not disabled"); + err = -ENOSYS; + goto clean_up; + } + err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, + args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); + if (err) + nvgpu_err(g, + "error (%d) during pm ctxsw mode update", err); + /* gk20a would require a WAR to set the core PM_ENABLE bit, not + * added here with gk20a being deprecated + */ + clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + gk20a_idle(g); + return err; +} + +static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) +{ + struct gk20a *g = dbg_s->g; + struct channel_gk20a *ch; + int err = 0, action = args->mode; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) + return -EINVAL; + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to poweron"); + return err; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Suspend GPU context switching */ + err = gr_gk20a_disable_ctxsw(g); + if (err) { + nvgpu_err(g, "unable to stop gr ctxsw"); + /* this should probably be ctx-fatal... */ + goto clean_up; + } + + switch (action) { + case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: + gr_gk20a_suspend_context(ch); + break; + + case NVGPU_DBG_GPU_RESUME_ALL_SMS: + gr_gk20a_resume_context(ch); + break; + } + + err = gr_gk20a_enable_ctxsw(g); + if (err) + nvgpu_err(g, "unable to restart ctxsw!"); + +clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + gk20a_idle(g); + + return err; +} + +static int nvgpu_ioctl_allocate_profiler_object( + struct dbg_session_gk20a_linux *dbg_session_linux, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) +{ + int err = 0; + struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; + struct gk20a *g = get_gk20a(dbg_session_linux->dev); + struct dbg_profiler_object_data *prof_obj; + + nvgpu_log_fn(g, "%s", g->name); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + err = alloc_profiler(g, &prof_obj); + if (err) + goto clean_up; + + prof_obj->session_id = dbg_s->id; + + if (dbg_s->is_profiler) + prof_obj->ch = NULL; + else { + prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (prof_obj->ch == NULL) { + nvgpu_err(g, + "bind a channel for dbg session"); + nvgpu_kfree(g, prof_obj); + err = -EINVAL; + goto clean_up; + } + } + + /* Return handle to client */ + args->profiler_handle = prof_obj->prof_handle; + + nvgpu_init_list_node(&prof_obj->prof_obj_entry); + + nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); +clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_ioctl_free_profiler_object( + struct dbg_session_gk20a_linux *dbg_s_linux, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) +{ + int err = 0; + struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; + struct gk20a *g = get_gk20a(dbg_s_linux->dev); + struct dbg_profiler_object_data *prof_obj, *tmp_obj; + bool obj_found = false; + + nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x", + g->name, dbg_s->id, args->profiler_handle); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Remove profiler object from the list, if a match is found */ + nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if (prof_obj->prof_handle == args->profiler_handle) { + if (prof_obj->session_id != dbg_s->id) { + nvgpu_err(g, + "invalid handle %x", + args->profiler_handle); + err = -EINVAL; + break; + } + if (prof_obj->has_reservation) + g->ops.dbg_session_ops. + release_profiler_reservation(dbg_s, prof_obj); + nvgpu_list_del(&prof_obj->prof_obj_entry); + nvgpu_kfree(g, prof_obj); + obj_found = true; + break; + } + } + if (!obj_found) { + nvgpu_err(g, "profiler %x not found", + args->profiler_handle); + err = -EINVAL; + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static struct dbg_profiler_object_data *find_matching_prof_obj( + struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj; + + nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if (prof_obj->prof_handle == profiler_handle) { + if (prof_obj->session_id != dbg_s->id) { + nvgpu_err(g, + "invalid handle %x", + profiler_handle); + return NULL; + } + return prof_obj; + } + } + return NULL; +} + +/* used in scenarios where the debugger session can take just the inter-session + * lock for performance, but the profiler session must take the per-gpu lock + * since it might not have an associated channel. */ +static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s) +{ + struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + + if (dbg_s->is_profiler || !ch) + nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock); + else + nvgpu_mutex_acquire(&ch->dbg_s_lock); +} + +static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s) +{ + struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + + if (dbg_s->is_profiler || !ch) + nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock); + else + nvgpu_mutex_release(&ch->dbg_s_lock); +} + +static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) +{ + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); + + dbg_s->dbg_events.events_enabled = true; + dbg_s->dbg_events.num_pending_events = 0; + + gk20a_dbg_session_nvgpu_mutex_release(dbg_s); +} + +static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) +{ + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); + + dbg_s->dbg_events.events_enabled = false; + dbg_s->dbg_events.num_pending_events = 0; + + gk20a_dbg_session_nvgpu_mutex_release(dbg_s); +} + +static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) +{ + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); + + if (dbg_s->dbg_events.events_enabled && + dbg_s->dbg_events.num_pending_events > 0) + dbg_s->dbg_events.num_pending_events--; + + gk20a_dbg_session_nvgpu_mutex_release(dbg_s); +} + + +static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_events_ctrl_args *args) +{ + int ret = 0; + struct channel_gk20a *ch; + struct gk20a *g = dbg_s->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) { + nvgpu_err(g, "no channel bound to dbg session"); + return -EINVAL; + } + + switch (args->cmd) { + case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: + gk20a_dbg_gpu_events_enable(dbg_s); + break; + + case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: + gk20a_dbg_gpu_events_disable(dbg_s); + break; + + case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: + gk20a_dbg_gpu_events_clear(dbg_s); + break; + + default: + nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x", + args->cmd); + ret = -EINVAL; + break; + } + + return ret; +} + +static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_map_args *args) +{ + struct gk20a *g = dbg_s->g; + struct mm_gk20a *mm = &g->mm; + int err; + u32 virt_size; + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + if (g->perfbuf.owner) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return -EBUSY; + } + + mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, + big_page_size << 10, + NV_MM_DEFAULT_KERNEL_SIZE, + NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, + false, false, "perfbuf"); + if (!mm->perfbuf.vm) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return -ENOMEM; + } + + err = nvgpu_vm_map_buffer(mm->perfbuf.vm, + args->dmabuf_fd, + &args->offset, + 0, + 0, + 0, + 0, + args->mapping_size, + NULL); + if (err) + goto err_remove_vm; + + /* perf output buffer may not cross a 4GB boundary */ + virt_size = u64_lo32(args->mapping_size); + if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { + err = -EINVAL; + goto err_unmap; + } + + err = g->ops.dbg_session_ops.perfbuffer_enable(g, + args->offset, virt_size); + if (err) + goto err_unmap; + + g->perfbuf.owner = dbg_s; + g->perfbuf.offset = args->offset; + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return 0; + +err_unmap: + nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); +err_remove_vm: + nvgpu_vm_put(mm->perfbuf.vm); + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) +{ + struct gk20a *g = dbg_s->g; + int err; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + if ((g->perfbuf.owner != dbg_s) || + (g->perfbuf.offset != args->offset)) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return -EINVAL; + } + + err = gk20a_perfbuf_release_locked(g, args->offset); + + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err; +} + +static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_pc_sampling_args *args) +{ + struct channel_gk20a *ch; + struct gk20a *g = dbg_s->g; + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) + return -EINVAL; + + nvgpu_log_fn(g, " "); + + return g->ops.gr.update_pc_sampling ? + g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; +} + +static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) +{ + struct gk20a *g = dbg_s->g; + struct gr_gk20a *gr = &g->gr; + u32 sm_id; + struct channel_gk20a *ch; + int err = 0; + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (!ch) + return -EINVAL; + + sm_id = args->sm_id; + if (sm_id >= gr->no_of_sm) + return -EINVAL; + + nvgpu_speculation_barrier(); + + err = gk20a_busy(g); + if (err) + return err; + + err = gr_gk20a_elpg_protected_call(g, + g->ops.gr.clear_sm_error_state(g, ch, sm_id)); + + gk20a_idle(g); + + return err; +} + +static int +nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args) +{ + struct gk20a *g = dbg_s->g; + int err = 0; + int ctx_resident_ch_fd = -1; + + err = gk20a_busy(g); + if (err) + return err; + + switch (args->action) { + case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: + err = g->ops.gr.suspend_contexts(g, dbg_s, + &ctx_resident_ch_fd); + break; + + case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS: + err = g->ops.gr.resume_contexts(g, dbg_s, + &ctx_resident_ch_fd); + break; + } + + if (ctx_resident_ch_fd < 0) { + args->is_resident_context = 0; + } else { + args->is_resident_context = 1; + args->resident_context_fd = ctx_resident_ch_fd; + } + + gk20a_idle(g); + + return err; +} + +static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_access_fb_memory_args *args) +{ + struct gk20a *g = dbg_s->g; + struct dma_buf *dmabuf; + void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; + void *buffer; + u64 size, access_size, offset; + u64 access_limit_size = SZ_4K; + int err = 0; + + if ((args->offset & 3) || (!args->size) || (args->size & 3)) + return -EINVAL; + + dmabuf = dma_buf_get(args->dmabuf_fd); + if (IS_ERR(dmabuf)) + return -EINVAL; + + if ((args->offset > dmabuf->size) || + (args->size > dmabuf->size) || + (args->offset + args->size > dmabuf->size)) { + err = -EINVAL; + goto fail_dmabuf_put; + } + + buffer = nvgpu_big_zalloc(g, access_limit_size); + if (!buffer) { + err = -ENOMEM; + goto fail_dmabuf_put; + } + + size = args->size; + offset = 0; + + err = gk20a_busy(g); + if (err) + goto fail_free_buffer; + + while (size) { + /* Max access size of access_limit_size in one loop */ + access_size = min(access_limit_size, size); + + if (args->cmd == + NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) { + err = copy_from_user(buffer, user_buffer + offset, + access_size); + if (err) + goto fail_idle; + } + + err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer, + args->offset + offset, access_size, + args->cmd); + if (err) + goto fail_idle; + + if (args->cmd == + NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) { + err = copy_to_user(user_buffer + offset, + buffer, access_size); + if (err) + goto fail_idle; + } + + size -= access_size; + offset += access_size; + } + +fail_idle: + gk20a_idle(g); +fail_free_buffer: + nvgpu_big_free(g, buffer); +fail_dmabuf_put: + dma_buf_put(dmabuf); + + return err; +} + +static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_reserve_args *args) +{ + if (args->acquire) + return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); + + return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); +} + +static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_timeout_args *args) +{ + bool status; + struct gk20a *g = dbg_s->g; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + status = nvgpu_is_timeouts_enabled(g); + nvgpu_mutex_release(&g->dbg_sessions_lock); + + if (status) + args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE; + else + args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; +} + +/* In order to perform a context relative op the context has + * to be created already... which would imply that the + * context switch mechanism has already been put in place. + * So by the time we perform such an opertation it should always + * be possible to query for the appropriate context offsets, etc. + * + * But note: while the dbg_gpu bind requires the a channel fd, + * it doesn't require an allocated gr/compute obj at that point... + */ +static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, + struct gr_gk20a *gr) +{ + int err; + + nvgpu_mutex_acquire(&gr->ctx_mutex); + err = !gr->ctx_vars.golden_image_initialized; + nvgpu_mutex_release(&gr->ctx_mutex); + if (err) + return false; + return true; + +} + +static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = mm->perfbuf.vm; + int err; + + err = g->ops.dbg_session_ops.perfbuffer_disable(g); + + nvgpu_vm_unmap(vm, offset, NULL); + nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); + nvgpu_vm_put(vm); + + g->perfbuf.owner = NULL; + g->perfbuf.offset = 0; + return err; +} + +static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj; + int err = 0; + + nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Find matching object. */ + prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); + + if (!prof_obj) { + nvgpu_err(g, "object not found"); + err = -EINVAL; + goto exit; + } + + if (prof_obj->has_reservation) + g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj); + else { + nvgpu_err(g, "No reservation found"); + err = -EINVAL; + goto exit; + } +exit: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj, *my_prof_obj; + int err = 0; + + nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); + + if (g->profiler_reservation_count < 0) { + nvgpu_err(g, "Negative reservation count!"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Find matching object. */ + my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); + + if (!my_prof_obj) { + nvgpu_err(g, "object not found"); + err = -EINVAL; + goto exit; + } + + /* If we already have the reservation, we're done */ + if (my_prof_obj->has_reservation) { + err = 0; + goto exit; + } + + if (my_prof_obj->ch == NULL) { + /* Global reservations are only allowed if there are no other + * global or per-context reservations currently held + */ + if (!g->ops.dbg_session_ops.check_and_set_global_reservation( + dbg_s, my_prof_obj)) { + nvgpu_err(g, + "global reserve: have existing reservation"); + err = -EBUSY; + } + } else if (g->global_profiler_reservation_held) { + /* If there's a global reservation, + * we can't take a per-context one. + */ + nvgpu_err(g, + "per-ctxt reserve: global reservation in effect"); + err = -EBUSY; + } else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) { + /* TSG: check that another channel in the TSG + * doesn't already have the reservation + */ + int my_tsgid = my_prof_obj->ch->tsgid; + + nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if (prof_obj->has_reservation && + (prof_obj->ch->tsgid == my_tsgid)) { + nvgpu_err(g, + "per-ctxt reserve (tsg): already reserved"); + err = -EBUSY; + goto exit; + } + } + + if (!g->ops.dbg_session_ops.check_and_set_context_reservation( + dbg_s, my_prof_obj)) { + /* Another guest OS has the global reservation */ + nvgpu_err(g, + "per-ctxt reserve: global reservation in effect"); + err = -EBUSY; + } + } else { + /* channel: check that some other profiler object doesn't + * already have the reservation. + */ + struct channel_gk20a *my_ch = my_prof_obj->ch; + + nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, + dbg_profiler_object_data, prof_obj_entry) { + if (prof_obj->has_reservation && + (prof_obj->ch == my_ch)) { + nvgpu_err(g, + "per-ctxt reserve (ch): already reserved"); + err = -EBUSY; + goto exit; + } + } + + if (!g->ops.dbg_session_ops.check_and_set_context_reservation( + dbg_s, my_prof_obj)) { + /* Another guest OS has the global reservation */ + nvgpu_err(g, + "per-ctxt reserve: global reservation in effect"); + err = -EBUSY; + } + } +exit: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_unbind_channel_args *args) +{ + struct dbg_session_channel_data *ch_data; + struct gk20a *g = dbg_s->g; + bool channel_found = false; + struct channel_gk20a *ch; + int err; + + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", + g->name, args->channel_fd); + + ch = gk20a_get_channel_from_file(args->channel_fd); + if (!ch) { + nvgpu_log_fn(g, "no channel found for fd"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, + dbg_session_channel_data, ch_entry) { + if (ch->chid == ch_data->chid) { + channel_found = true; + break; + } + } + nvgpu_mutex_release(&dbg_s->ch_list_lock); + + if (!channel_found) { + nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd); + err = -EINVAL; + goto out; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + nvgpu_mutex_acquire(&dbg_s->ch_list_lock); + err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data); + nvgpu_mutex_release(&dbg_s->ch_list_lock); + nvgpu_mutex_release(&g->dbg_sessions_lock); + +out: + gk20a_channel_put(ch); + return err; +} + +int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l = container_of(inode->i_cdev, + struct nvgpu_os_linux, dbg.cdev); + struct gk20a *g = &l->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); +} + +long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data; + struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; + struct gk20a *g = dbg_s->g; + u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); + + if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + if (!g->sw_ready) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_idle(g); + } + + /* protect from threaded user space calls */ + nvgpu_mutex_acquire(&dbg_s->ioctl_lock); + + switch (cmd) { + case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: + err = dbg_bind_channel_gk20a(dbg_s, + (struct nvgpu_dbg_gpu_bind_channel_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_REG_OPS: + err = nvgpu_ioctl_channel_reg_ops(dbg_s, + (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_POWERGATE: + err = nvgpu_ioctl_powergate_gk20a(dbg_s, + (struct nvgpu_dbg_gpu_powergate_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: + err = gk20a_dbg_gpu_events_ctrl(dbg_s, + (struct nvgpu_dbg_gpu_events_ctrl_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: + err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, + (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: + err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, + (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: + err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, + (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: + err = gk20a_perfbuf_map(dbg_s, + (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: + err = gk20a_perfbuf_unmap(dbg_s, + (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: + err = gk20a_dbg_pc_sampling(dbg_s, + (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE: + err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s, + (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_TIMEOUT: + err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s, + (struct nvgpu_dbg_gpu_timeout_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT: + nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s, + (struct nvgpu_dbg_gpu_timeout_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: + err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, + (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE: + err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s, + (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE: + err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s, + (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL: + err = dbg_unbind_channel_gk20a(dbg_s, + (struct nvgpu_dbg_gpu_unbind_channel_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS: + err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s, + (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY: + err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s, + (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: + err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux, + (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: + err = nvgpu_ioctl_free_profiler_object(dbg_s_linux, + (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: + err = nvgpu_ioctl_profiler_reserve(dbg_s, + (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); + break; + + default: + nvgpu_err(g, + "unrecognized dbg gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + break; + } + + nvgpu_mutex_release(&dbg_s->ioctl_lock); + + nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, + buf, _IOC_SIZE(cmd)); + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h new file mode 100644 index 00000000..bd76045b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h @@ -0,0 +1,54 @@ +/* + * Tegra GK20A GPU Debugger Driver + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef DBG_GPU_IOCTL_GK20A_H +#define DBG_GPU_IOCTL_GK20A_H +#include + +#include "gk20a/dbg_gpu_gk20a.h" + +/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number + * of regops */ +#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024 + +struct dbg_session_gk20a_linux { + struct device *dev; + struct dbg_session_gk20a dbg_s; +}; + +struct dbg_session_channel_data_linux { + /* + * We have to keep a ref to the _file_, not the channel, because + * close(channel_fd) is synchronous and would deadlock if we had an + * open debug session fd holding a channel ref at that time. Holding a + * ref to the file makes close(channel_fd) just drop a kernel ref to + * the file; the channel will close when the last file ref is dropped. + */ + struct file *ch_f; + struct dbg_session_channel_data ch_data; +}; + +/* module debug driver interface */ +int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); +int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); +long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); + +/* used by profiler driver interface */ +int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); + +#endif \ No newline at end of file diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c new file mode 100644 index 00000000..4ef99ded --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -0,0 +1,677 @@ +/* + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/tsg_gk20a.h" +#include "gv11b/fifo_gv11b.h" +#include "platform_gk20a.h" +#include "ioctl_tsg.h" +#include "ioctl_channel.h" +#include "os_linux.h" + +struct tsg_private { + struct gk20a *g; + struct tsg_gk20a *tsg; +}; + +static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) +{ + struct channel_gk20a *ch; + int err; + + ch = gk20a_get_channel_from_file(ch_fd); + if (!ch) + return -EINVAL; + + err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); + + gk20a_channel_put(ch); + return err; +} + +static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + struct channel_gk20a *ch; + struct gr_gk20a *gr = &g->gr; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + nvgpu_mutex_acquire(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto mutex_release; + } + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu"); + goto mutex_release; + } + + ch = gk20a_get_channel_from_file(arg->channel_fd); + if (!ch) { + err = -EINVAL; + goto idle; + } + + if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { + if ((arg->num_active_tpcs > gr->max_tpc_count) || + !(arg->num_active_tpcs)) { + nvgpu_err(g, "Invalid num of active TPCs"); + err = -EINVAL; + goto ch_put; + } + tsg->tpc_num_initialized = true; + tsg->num_active_tpcs = arg->num_active_tpcs; + tsg->tpc_pg_enabled = true; + } else { + tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); + } + + if (arg->subcontext_id < g->fifo.max_subctx_count) { + ch->subctx_id = arg->subcontext_id; + } else { + err = -EINVAL; + goto ch_put; + } + + nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", + ch->chid, ch->subctx_id); + + /* Use runqueue selector 1 for all ASYNC ids */ + if (ch->subctx_id > CHANNEL_INFO_VEID0) + ch->runqueue_sel = 1; + + err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); +ch_put: + gk20a_channel_put(ch); +idle: + gk20a_idle(g); +mutex_release: + nvgpu_mutex_release(&sched->control_lock); + return err; +} + +static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) +{ + struct channel_gk20a *ch; + int err = 0; + + ch = gk20a_get_channel_from_file(ch_fd); + if (!ch) + return -EINVAL; + + if (ch->tsgid != tsg->tsgid) { + err = -EINVAL; + goto out; + } + + err = gk20a_tsg_unbind_channel(ch); + + /* + * Mark the channel timedout since channel unbound from TSG + * has no context of its own so it can't serve any job + */ + ch->has_timedout = true; + +out: + gk20a_channel_put(ch); + return err; +} + +static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, + unsigned int event_id, + struct gk20a_event_id_data **event_id_data) +{ + struct gk20a_event_id_data *local_event_id_data; + bool event_found = false; + + nvgpu_mutex_acquire(&tsg->event_id_list_lock); + nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, + gk20a_event_id_data, event_id_node) { + if (local_event_id_data->event_id == event_id) { + event_found = true; + break; + } + } + nvgpu_mutex_release(&tsg->event_id_list_lock); + + if (event_found) { + *event_id_data = local_event_id_data; + return 0; + } else { + return -1; + } +} + +/* + * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific + * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs + */ +static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) +{ + switch (event_id) { + case NVGPU_EVENT_ID_BPT_INT: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; + case NVGPU_EVENT_ID_BPT_PAUSE: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; + case NVGPU_EVENT_ID_BLOCKING_SYNC: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; + case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; + case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; + case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: + return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; + } + + return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; +} + +void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, + int __event_id) +{ + struct gk20a_event_id_data *event_id_data; + u32 event_id; + int err = 0; + struct gk20a *g = tsg->g; + + event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); + if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) + return; + + err = gk20a_tsg_get_event_data_from_id(tsg, event_id, + &event_id_data); + if (err) + return; + + nvgpu_mutex_acquire(&event_id_data->lock); + + nvgpu_log_info(g, + "posting event for event_id=%d on tsg=%d\n", + event_id, tsg->tsgid); + event_id_data->event_posted = true; + + nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); + + nvgpu_mutex_release(&event_id_data->lock); +} + +static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) +{ + unsigned int mask = 0; + struct gk20a_event_id_data *event_id_data = filep->private_data; + struct gk20a *g = event_id_data->g; + u32 event_id = event_id_data->event_id; + struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); + + poll_wait(filep, &event_id_data->event_id_wq.wq, wait); + + nvgpu_mutex_acquire(&event_id_data->lock); + + if (event_id_data->event_posted) { + nvgpu_log_info(g, + "found pending event_id=%d on TSG=%d\n", + event_id, tsg->tsgid); + mask = (POLLPRI | POLLIN); + event_id_data->event_posted = false; + } + + nvgpu_mutex_release(&event_id_data->lock); + + return mask; +} + +static int gk20a_event_id_release(struct inode *inode, struct file *filp) +{ + struct gk20a_event_id_data *event_id_data = filp->private_data; + struct gk20a *g = event_id_data->g; + struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; + + nvgpu_mutex_acquire(&tsg->event_id_list_lock); + nvgpu_list_del(&event_id_data->event_id_node); + nvgpu_mutex_release(&tsg->event_id_list_lock); + + nvgpu_mutex_destroy(&event_id_data->lock); + gk20a_put(g); + nvgpu_kfree(g, event_id_data); + filp->private_data = NULL; + + return 0; +} + +const struct file_operations gk20a_event_id_ops = { + .owner = THIS_MODULE, + .poll = gk20a_event_id_poll, + .release = gk20a_event_id_release, +}; + +static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, + int event_id, + int *fd) +{ + int err = 0; + int local_fd; + struct file *file; + char name[64]; + struct gk20a_event_id_data *event_id_data; + struct gk20a *g; + + g = gk20a_get(tsg->g); + if (!g) + return -ENODEV; + + err = gk20a_tsg_get_event_data_from_id(tsg, + event_id, &event_id_data); + if (err == 0) { + /* We already have event enabled */ + err = -EINVAL; + goto free_ref; + } + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + goto free_ref; + local_fd = err; + + snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", + event_id, local_fd); + + file = anon_inode_getfile(name, &gk20a_event_id_ops, + NULL, O_RDWR); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + + event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); + if (!event_id_data) { + err = -ENOMEM; + goto clean_up_file; + } + event_id_data->g = g; + event_id_data->id = tsg->tsgid; + event_id_data->event_id = event_id; + + nvgpu_cond_init(&event_id_data->event_id_wq); + err = nvgpu_mutex_init(&event_id_data->lock); + if (err) + goto clean_up_free; + + nvgpu_init_list_node(&event_id_data->event_id_node); + + nvgpu_mutex_acquire(&tsg->event_id_list_lock); + nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); + nvgpu_mutex_release(&tsg->event_id_list_lock); + + fd_install(local_fd, file); + file->private_data = event_id_data; + + *fd = local_fd; + + return 0; + +clean_up_free: + nvgpu_kfree(g, event_id_data); +clean_up_file: + fput(file); +clean_up: + put_unused_fd(local_fd); +free_ref: + gk20a_put(g); + return err; +} + +static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, + struct nvgpu_event_id_ctrl_args *args) +{ + int err = 0; + int fd = -1; + + if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) + return -EINVAL; + + switch (args->cmd) { + case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: + err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); + if (!err) + args->event_fd = fd; + break; + + default: + nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", + args->cmd); + err = -EINVAL; + break; + } + + return err; +} + +int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) +{ + struct tsg_private *priv; + struct tsg_gk20a *tsg; + struct device *dev; + int err; + + g = gk20a_get(g); + if (!g) + return -ENODEV; + + dev = dev_from_gk20a(g); + + nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); + + priv = nvgpu_kmalloc(g, sizeof(*priv)); + if (!priv) { + err = -ENOMEM; + goto free_ref; + } + + tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); + if (!tsg) { + nvgpu_kfree(g, priv); + err = -ENOMEM; + goto free_ref; + } + + priv->g = g; + priv->tsg = tsg; + filp->private_data = priv; + + gk20a_sched_ctrl_tsg_added(g, tsg); + + return 0; + +free_ref: + gk20a_put(g); + return err; +} + +int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l; + struct gk20a *g; + int ret; + + l = container_of(inode->i_cdev, + struct nvgpu_os_linux, tsg.cdev); + g = &l->g; + + nvgpu_log_fn(g, " "); + + ret = gk20a_busy(g); + if (ret) { + nvgpu_err(g, "failed to power on, %d", ret); + return ret; + } + + ret = nvgpu_ioctl_tsg_open(&l->g, filp); + + gk20a_idle(g); + nvgpu_log_fn(g, "done"); + return ret; +} + +void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) +{ + struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); + struct gk20a *g = tsg->g; + + gk20a_sched_ctrl_tsg_removed(g, tsg); + + gk20a_tsg_release(ref); + gk20a_put(g); +} + +int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) +{ + struct tsg_private *priv = filp->private_data; + struct tsg_gk20a *tsg = priv->tsg; + + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + nvgpu_kfree(tsg->g, priv); + return 0; +} + +static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + u32 level = arg->level; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + nvgpu_mutex_acquire(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu"); + goto done; + } + + level = nvgpu_get_common_runlist_level(level); + err = gk20a_tsg_set_runlist_interleave(tsg, level); + + gk20a_idle(g); +done: + nvgpu_mutex_release(&sched->control_lock); + return err; +} + +static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + nvgpu_mutex_acquire(&sched->control_lock); + if (sched->control_locked) { + err = -EPERM; + goto done; + } + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu"); + goto done; + } + err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); + gk20a_idle(g); +done: + nvgpu_mutex_release(&sched->control_lock); + return err; +} + +static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, + struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) +{ + arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); + return 0; +} + +long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct tsg_private *priv = filp->private_data; + struct tsg_gk20a *tsg = priv->tsg; + struct gk20a *g = tsg->g; + u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + if (!g->sw_ready) { + err = gk20a_busy(g); + if (err) + return err; + + gk20a_idle(g); + } + + switch (cmd) { + case NVGPU_TSG_IOCTL_BIND_CHANNEL: + { + int ch_fd = *(int *)buf; + if (ch_fd < 0) { + err = -EINVAL; + break; + } + err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); + break; + } + + case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: + { + err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, + (struct nvgpu_tsg_bind_channel_ex_args *)buf); + break; + } + + case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: + { + int ch_fd = *(int *)buf; + + if (ch_fd < 0) { + err = -EINVAL; + break; + } + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to host gk20a for ioctl cmd: 0x%x", cmd); + break; + } + err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); + gk20a_idle(g); + break; + } + + case NVGPU_IOCTL_TSG_ENABLE: + { + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to host gk20a for ioctl cmd: 0x%x", cmd); + return err; + } + g->ops.fifo.enable_tsg(tsg); + gk20a_idle(g); + break; + } + + case NVGPU_IOCTL_TSG_DISABLE: + { + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to host gk20a for ioctl cmd: 0x%x", cmd); + return err; + } + g->ops.fifo.disable_tsg(tsg); + gk20a_idle(g); + break; + } + + case NVGPU_IOCTL_TSG_PREEMPT: + { + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to host gk20a for ioctl cmd: 0x%x", cmd); + return err; + } + /* preempt TSG */ + err = g->ops.fifo.preempt_tsg(g, tsg->tsgid); + gk20a_idle(g); + break; + } + + case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: + { + err = gk20a_tsg_event_id_ctrl(g, tsg, + (struct nvgpu_event_id_ctrl_args *)buf); + break; + } + + case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: + err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, + (struct nvgpu_runlist_interleave_args *)buf); + break; + + case NVGPU_IOCTL_TSG_SET_TIMESLICE: + { + err = gk20a_tsg_ioctl_set_timeslice(g, tsg, + (struct nvgpu_timeslice_args *)buf); + break; + } + case NVGPU_IOCTL_TSG_GET_TIMESLICE: + { + err = gk20a_tsg_ioctl_get_timeslice(g, tsg, + (struct nvgpu_timeslice_args *)buf); + break; + } + + default: + nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", + cmd); + err = -ENOTTY; + break; + } + + if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) + err = copy_to_user((void __user *)arg, + buf, _IOC_SIZE(cmd)); + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h new file mode 100644 index 00000000..67399fd4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef NVGPU_IOCTL_TSG_H +#define NVGPU_IOCTL_TSG_H + +struct inode; +struct file; +struct gk20a; +struct nvgpu_ref; + +int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp); +int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp); +int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp); +long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); +void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/kmem.c b/drivers/gpu/nvgpu/os/linux/kmem.c new file mode 100644 index 00000000..10946a08 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/kmem.c @@ -0,0 +1,654 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "gk20a/gk20a.h" + +#include "kmem_priv.h" + +/* + * Statically declared because this needs to be shared across all nvgpu driver + * instances. This makes sure that all kmem caches are _definitely_ uniquely + * named. + */ +static atomic_t kmem_cache_id; + +void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) +{ + void *p; + + if (size > PAGE_SIZE) { + if (clear) + p = nvgpu_vzalloc(g, size); + else + p = nvgpu_vmalloc(g, size); + } else { + if (clear) + p = nvgpu_kzalloc(g, size); + else + p = nvgpu_kmalloc(g, size); + } + + return p; +} + +void nvgpu_big_free(struct gk20a *g, void *p) +{ + /* + * This will have to be fixed eventually. Allocs that use + * nvgpu_big_[mz]alloc() will need to remember the size of the alloc + * when freeing. + */ + if (is_vmalloc_addr(p)) + nvgpu_vfree(g, p); + else + nvgpu_kfree(g, p); +} + +void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + alloc = __nvgpu_track_kmalloc(g, size, ip); +#else + alloc = kmalloc(size, GFP_KERNEL); +#endif + + kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", + size, alloc, GFP_KERNEL); + + return alloc; +} + +void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + alloc = __nvgpu_track_kzalloc(g, size, ip); +#else + alloc = kzalloc(size, GFP_KERNEL); +#endif + + kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", + size, alloc, GFP_KERNEL); + + return alloc; +} + +void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip) +{ + void *alloc; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + alloc = __nvgpu_track_kcalloc(g, n, size, ip); +#else + alloc = kcalloc(n, size, GFP_KERNEL); +#endif + + kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", + n * size, alloc, GFP_KERNEL); + + return alloc; +} + +void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip) +{ + void *alloc; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + alloc = __nvgpu_track_vmalloc(g, size, ip); +#else + alloc = vmalloc(size); +#endif + + kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc); + + return alloc; +} + +void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip) +{ + void *alloc; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + alloc = __nvgpu_track_vzalloc(g, size, ip); +#else + alloc = vzalloc(size); +#endif + + kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc); + + return alloc; +} + +void __nvgpu_kfree(struct gk20a *g, void *addr) +{ + kmem_dbg(g, "kfree: addr=0x%p", addr); +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + __nvgpu_track_kfree(g, addr); +#else + kfree(addr); +#endif +} + +void __nvgpu_vfree(struct gk20a *g, void *addr) +{ + kmem_dbg(g, "vfree: addr=0x%p", addr); +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + __nvgpu_track_vfree(g, addr); +#else + vfree(addr); +#endif +} + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + +void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +{ + nvgpu_mutex_acquire(&tracker->lock); +} + +void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) +{ + nvgpu_mutex_release(&tracker->lock); +} + +void kmem_print_mem_alloc(struct gk20a *g, + struct nvgpu_mem_alloc *alloc, + struct seq_file *s) +{ +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + int i; + + __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", + alloc->addr, alloc->size); + for (i = 0; i < alloc->stack_length; i++) + __pstat(s, " %3d [<%p>] %pS\n", i, + (void *)alloc->stack[i], + (void *)alloc->stack[i]); + __pstat(s, "\n"); +#else + __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", + alloc->addr, alloc->size, alloc->ip); +#endif +} + +static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, + struct nvgpu_mem_alloc *alloc) +{ + alloc->allocs_entry.key_start = alloc->addr; + alloc->allocs_entry.key_end = alloc->addr + alloc->size; + + nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs); + return 0; +} + +static struct nvgpu_mem_alloc *nvgpu_rem_alloc( + struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) +{ + struct nvgpu_mem_alloc *alloc; + struct nvgpu_rbtree_node *node = NULL; + + nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs); + if (!node) + return NULL; + + alloc = nvgpu_mem_alloc_from_rbtree_node(node); + + nvgpu_rbtree_unlink(node, &tracker->allocs); + + return alloc; +} + +static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, + unsigned long size, unsigned long real_size, + u64 addr, unsigned long ip) +{ + int ret; + struct nvgpu_mem_alloc *alloc; +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + struct stack_trace stack_trace; +#endif + + alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); + if (!alloc) + return -ENOMEM; + + alloc->owner = tracker; + alloc->size = size; + alloc->real_size = real_size; + alloc->addr = addr; + alloc->ip = (void *)(uintptr_t)ip; + +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + stack_trace.max_entries = MAX_STACK_TRACE; + stack_trace.nr_entries = 0; + stack_trace.entries = alloc->stack; + /* + * This 4 here skips the 2 function calls that happen for all traced + * allocs due to nvgpu: + * + * __nvgpu_save_kmem_alloc+0x7c/0x128 + * __nvgpu_track_kzalloc+0xcc/0xf8 + * + * And the function calls that get made by the stack trace code itself. + * If the trace savings code changes this will likely have to change + * as well. + */ + stack_trace.skip = 4; + save_stack_trace(&stack_trace); + alloc->stack_length = stack_trace.nr_entries; +#endif + + nvgpu_lock_tracker(tracker); + tracker->bytes_alloced += size; + tracker->bytes_alloced_real += real_size; + tracker->nr_allocs++; + + /* Keep track of this for building a histogram later on. */ + if (tracker->max_alloc < size) + tracker->max_alloc = size; + if (tracker->min_alloc > size) + tracker->min_alloc = size; + + ret = nvgpu_add_alloc(tracker, alloc); + if (ret) { + WARN(1, "Duplicate alloc??? 0x%llx\n", addr); + kfree(alloc); + nvgpu_unlock_tracker(tracker); + return ret; + } + nvgpu_unlock_tracker(tracker); + + return 0; +} + +static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, + u64 addr) +{ + struct nvgpu_mem_alloc *alloc; + + nvgpu_lock_tracker(tracker); + alloc = nvgpu_rem_alloc(tracker, addr); + if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { + nvgpu_unlock_tracker(tracker); + return -EINVAL; + } + + memset((void *)alloc->addr, 0, alloc->size); + + tracker->nr_frees++; + tracker->bytes_freed += alloc->size; + tracker->bytes_freed_real += alloc->real_size; + nvgpu_unlock_tracker(tracker); + + return 0; +} + +static void __nvgpu_check_valloc_size(unsigned long size) +{ + WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); +} + +static void __nvgpu_check_kalloc_size(size_t size) +{ + WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); +} + +void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ + void *alloc = vmalloc(size); + + if (!alloc) + return NULL; + + __nvgpu_check_valloc_size(size); + + /* + * Ignore the return message. If this fails let's not cause any issues + * for the rest of the driver. + */ + __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, + unsigned long ip) +{ + void *alloc = vzalloc(size); + + if (!alloc) + return NULL; + + __nvgpu_check_valloc_size(size); + + /* + * Ignore the return message. If this fails let's not cause any issues + * for the rest of the driver. + */ + __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc = kmalloc(size, GFP_KERNEL); + + if (!alloc) + return NULL; + + __nvgpu_check_kalloc_size(size); + + __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip) +{ + void *alloc = kzalloc(size, GFP_KERNEL); + + if (!alloc) + return NULL; + + __nvgpu_check_kalloc_size(size); + + __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, + unsigned long ip) +{ + void *alloc = kcalloc(n, size, GFP_KERNEL); + + if (!alloc) + return NULL; + + __nvgpu_check_kalloc_size(n * size); + + __nvgpu_save_kmem_alloc(g->kmallocs, n * size, + roundup_pow_of_two(n * size), + (u64)(uintptr_t)alloc, ip); + + return alloc; +} + +void __nvgpu_track_vfree(struct gk20a *g, void *addr) +{ + /* + * Often it is accepted practice to pass NULL pointers into free + * functions to save code. + */ + if (!addr) + return; + + __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); + + vfree(addr); +} + +void __nvgpu_track_kfree(struct gk20a *g, void *addr) +{ + if (!addr) + return; + + __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); + + kfree(addr); +} + +static int __do_check_for_outstanding_allocs( + struct gk20a *g, + struct nvgpu_mem_alloc_tracker *tracker, + const char *type, bool silent) +{ + struct nvgpu_rbtree_node *node; + int count = 0; + + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + if (!silent) + kmem_print_mem_alloc(g, alloc, NULL); + + count++; + nvgpu_rbtree_enum_next(&node, node); + } + + return count; +} + +/** + * check_for_outstanding_allocs - Count and display outstanding allocs + * + * @g - The GPU. + * @silent - If set don't print anything about the allocs. + * + * Dump (or just count) the number of allocations left outstanding. + */ +static int check_for_outstanding_allocs(struct gk20a *g, bool silent) +{ + int count = 0; + + count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", + silent); + count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", + silent); + + return count; +} + +static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, + void (*force_free_func)(const void *)) +{ + struct nvgpu_rbtree_node *node; + + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + while (node) { + struct nvgpu_mem_alloc *alloc = + nvgpu_mem_alloc_from_rbtree_node(node); + + if (force_free_func) + force_free_func((void *)alloc->addr); + + nvgpu_rbtree_unlink(node, &tracker->allocs); + kfree(alloc); + + nvgpu_rbtree_enum_start(0, &node, tracker->allocs); + } +} + +/** + * nvgpu_kmem_cleanup - Cleanup the kmem tracking + * + * @g - The GPU. + * @force_free - If set will also free leaked objects if possible. + * + * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free + * is non-zero then the allocation made by nvgpu is also freed. This is risky, + * though, as it is possible that the memory is still in use by other parts of + * the GPU driver not aware that this has happened. + * + * In theory it should be fine if the GPU driver has been deinitialized and + * there are no bugs in that code. However, if there are any bugs in that code + * then they could likely manifest as odd crashes indeterminate amounts of time + * in the future. So use @force_free at your own risk. + */ +static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) +{ + do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); + do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); +} + +void nvgpu_kmem_fini(struct gk20a *g, int flags) +{ + int count; + bool silent, force_free; + + if (!flags) + return; + + silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); + force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); + + count = check_for_outstanding_allocs(g, silent); + nvgpu_kmem_cleanup(g, force_free); + + /* + * If we leak objects we can either BUG() out or just WARN(). In general + * it doesn't make sense to BUG() on here since leaking a few objects + * won't crash the kernel but it can be helpful for development. + * + * If neither flag is set then we just silently do nothing. + */ + if (count > 0) { + if (flags & NVGPU_KMEM_FINI_WARN) { + WARN(1, "Letting %d allocs leak!!\n", count); + } else if (flags & NVGPU_KMEM_FINI_BUG) { + nvgpu_err(g, "Letting %d allocs leak!!", count); + BUG(); + } + } +} + +int nvgpu_kmem_init(struct gk20a *g) +{ + int err; + + g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); + g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); + + if (!g->vmallocs || !g->kmallocs) { + err = -ENOMEM; + goto fail; + } + + g->vmallocs->name = "vmalloc"; + g->kmallocs->name = "kmalloc"; + + g->vmallocs->allocs = NULL; + g->kmallocs->allocs = NULL; + + nvgpu_mutex_init(&g->vmallocs->lock); + nvgpu_mutex_init(&g->kmallocs->lock); + + g->vmallocs->min_alloc = PAGE_SIZE; + g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; + + /* + * This needs to go after all the other initialization since they use + * the nvgpu_kzalloc() API. + */ + g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_mem_alloc)); + g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, + sizeof(struct nvgpu_mem_alloc)); + + if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { + err = -ENOMEM; + if (g->vmallocs->allocs_cache) + nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); + if (g->kmallocs->allocs_cache) + nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); + goto fail; + } + + return 0; + +fail: + if (g->vmallocs) + kfree(g->vmallocs); + if (g->kmallocs) + kfree(g->kmallocs); + return err; +} + +#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ + +int nvgpu_kmem_init(struct gk20a *g) +{ + return 0; +} + +void nvgpu_kmem_fini(struct gk20a *g, int flags) +{ +} +#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ + +struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) +{ + struct nvgpu_kmem_cache *cache = + nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); + + if (!cache) + return NULL; + + cache->g = g; + + snprintf(cache->name, sizeof(cache->name), + "nvgpu-cache-0x%p-%d-%d", g, (int)size, + atomic_inc_return(&kmem_cache_id)); + cache->cache = kmem_cache_create(cache->name, + size, size, 0, NULL); + if (!cache->cache) { + nvgpu_kfree(g, cache); + return NULL; + } + + return cache; +} + +void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) +{ + struct gk20a *g = cache->g; + + kmem_cache_destroy(cache->cache); + nvgpu_kfree(g, cache); +} + +void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) +{ + return kmem_cache_alloc(cache->cache, GFP_KERNEL); +} + +void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr) +{ + kmem_cache_free(cache->cache, ptr); +} diff --git a/drivers/gpu/nvgpu/os/linux/kmem_priv.h b/drivers/gpu/nvgpu/os/linux/kmem_priv.h new file mode 100644 index 00000000..a41762af --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/kmem_priv.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __KMEM_PRIV_H__ +#define __KMEM_PRIV_H__ + +#include +#include + +struct seq_file; + +#define __pstat(s, fmt, msg...) \ + do { \ + if (s) \ + seq_printf(s, fmt, ##msg); \ + else \ + pr_info(fmt, ##msg); \ + } while (0) + +#define MAX_STACK_TRACE 20 + +/* + * Linux specific version of the nvgpu_kmem_cache struct. This type is + * completely opaque to the rest of the driver. + */ +struct nvgpu_kmem_cache { + struct gk20a *g; + struct kmem_cache *cache; + + /* + * Memory to hold the kmem_cache unique name. Only necessary on our + * k3.10 kernel when not using the SLUB allocator but it's easier to + * just carry this on to newer kernels. + */ + char name[128]; +}; + +#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE + +struct nvgpu_mem_alloc { + struct nvgpu_mem_alloc_tracker *owner; + + void *ip; +#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES + unsigned long stack[MAX_STACK_TRACE]; + int stack_length; +#endif + + u64 addr; + + unsigned long size; + unsigned long real_size; + + struct nvgpu_rbtree_node allocs_entry; +}; + +static inline struct nvgpu_mem_alloc * +nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) +{ + return (struct nvgpu_mem_alloc *) + ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry)); +}; + +/* + * Linux specific tracking of vmalloc, kmalloc, etc. + */ +struct nvgpu_mem_alloc_tracker { + const char *name; + struct nvgpu_kmem_cache *allocs_cache; + struct nvgpu_rbtree_node *allocs; + struct nvgpu_mutex lock; + + u64 bytes_alloced; + u64 bytes_freed; + u64 bytes_alloced_real; + u64 bytes_freed_real; + u64 nr_allocs; + u64 nr_frees; + + unsigned long min_alloc; + unsigned long max_alloc; +}; + +void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); +void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); + +void kmem_print_mem_alloc(struct gk20a *g, + struct nvgpu_mem_alloc *alloc, + struct seq_file *s); +#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ + +#endif /* __KMEM_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/log.c b/drivers/gpu/nvgpu/os/linux/log.c new file mode 100644 index 00000000..ca29e0f3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/log.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "os_linux.h" + +/* + * Define a length for log buffers. This is the buffer that the 'fmt, ...' part + * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it + * needs to not be overly sized since we have limited kernel stack space. But at + * the same time we don't want it to be restrictive either. + */ +#define LOG_BUFFER_LENGTH 160 + +/* + * Annoying quirk of Linux: this has to be a string literal since the printk() + * function and friends use the preprocessor to concatenate stuff to the start + * of this string when printing. + */ +#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n" + +static const char *log_types[] = { + "ERR", + "WRN", + "DBG", + "INFO", +}; + +int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) +{ + return !!(g->log_mask & log_mask); +} + +static inline const char *nvgpu_log_name(struct gk20a *g) +{ + return dev_name(dev_from_gk20a(g)); +} + +#ifdef CONFIG_GK20A_TRACE_PRINTK +static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name, + const char *func_name, int line, + const char *log_type, const char *log) +{ + trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log); +} +#endif + +static void __nvgpu_really_print_log(u32 trace, const char *gpu_name, + const char *func_name, int line, + enum nvgpu_log_type type, const char *log) +{ + const char *name = gpu_name ? gpu_name : ""; + const char *log_type = log_types[type]; + +#ifdef CONFIG_GK20A_TRACE_PRINTK + if (trace) + return __nvgpu_trace_printk_log(trace, name, func_name, + line, log_type, log); +#endif + switch (type) { + case NVGPU_DEBUG: + /* + * We could use pr_debug() here but we control debug enablement + * separately from the Linux kernel. Perhaps this is a bug in + * nvgpu. + */ + pr_info(LOG_FMT, name, func_name, line, log_type, log); + break; + case NVGPU_INFO: + pr_info(LOG_FMT, name, func_name, line, log_type, log); + break; + case NVGPU_WARNING: + pr_warn(LOG_FMT, name, func_name, line, log_type, log); + break; + case NVGPU_ERROR: + pr_err(LOG_FMT, name, func_name, line, log_type, log); + break; + } +} + +__attribute__((format (printf, 5, 6))) +void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line, + enum nvgpu_log_type type, const char *fmt, ...) +{ + char log[LOG_BUFFER_LENGTH]; + va_list args; + + va_start(args, fmt); + vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); + va_end(args); + + __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "", + func_name, line, type, log); +} + +__attribute__((format (printf, 5, 6))) +void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask, + const char *func_name, int line, + const char *fmt, ...) +{ + char log[LOG_BUFFER_LENGTH]; + va_list args; + + if ((log_mask & g->log_mask) == 0) + return; + + va_start(args, fmt); + vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); + va_end(args); + + __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g), + func_name, line, NVGPU_DEBUG, log); +} diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c new file mode 100644 index 00000000..af71cc81 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -0,0 +1,1365 @@ +/* + * GK20A Graphics + * + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "platform_gk20a.h" +#include "sysfs.h" +#include "vgpu/vgpu_linux.h" +#include "scale.h" +#include "pci.h" +#include "module.h" +#include "module_usermode.h" +#include "intr.h" +#include "ioctl.h" + +#include "os_linux.h" +#include "ctxsw_trace.h" +#include "driver_common.h" +#include "channel.h" + +#ifdef CONFIG_NVGPU_SUPPORT_CDE +#include "cde.h" +#endif + +#define CLASS_NAME "nvidia-gpu" +/* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ + +#define GK20A_WAIT_FOR_IDLE_MS 2000 + +#define CREATE_TRACE_POINTS +#include + + +struct device_node *nvgpu_get_node(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + if (dev_is_pci(dev)) { + struct pci_bus *bus = to_pci_dev(dev)->bus; + + while (!pci_is_root_bus(bus)) + bus = bus->parent; + + return bus->bridge->parent->of_node; + } + + return dev->of_node; +} + +void gk20a_busy_noresume(struct gk20a *g) +{ + pm_runtime_get_noresume(dev_from_gk20a(g)); +} + +int gk20a_busy(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int ret = 0; + struct device *dev; + + if (!g) + return -ENODEV; + + atomic_inc(&g->usage_count.atomic_var); + + down_read(&l->busy_lock); + + if (!gk20a_can_busy(g)) { + ret = -ENODEV; + atomic_dec(&g->usage_count.atomic_var); + goto fail; + } + + dev = dev_from_gk20a(g); + + if (pm_runtime_enabled(dev)) { + /* Increment usage count and attempt to resume device */ + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + /* Mark suspended so runtime pm will retry later */ + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + atomic_dec(&g->usage_count.atomic_var); + goto fail; + } + } else { + nvgpu_mutex_acquire(&g->poweron_lock); + if (!g->power_on) { + ret = gk20a_gpu_is_virtual(dev) ? + vgpu_pm_finalize_poweron(dev) + : gk20a_pm_finalize_poweron(dev); + if (ret) { + atomic_dec(&g->usage_count.atomic_var); + nvgpu_mutex_release(&g->poweron_lock); + goto fail; + } + } + nvgpu_mutex_release(&g->poweron_lock); + } + +fail: + up_read(&l->busy_lock); + + return ret < 0 ? ret : 0; +} + +void gk20a_idle_nosuspend(struct gk20a *g) +{ + pm_runtime_put_noidle(dev_from_gk20a(g)); +} + +void gk20a_idle(struct gk20a *g) +{ + struct device *dev; + + atomic_dec(&g->usage_count.atomic_var); + + dev = dev_from_gk20a(g); + + if (!(dev && gk20a_can_busy(g))) + return; + + if (pm_runtime_enabled(dev)) { + pm_runtime_mark_last_busy(dev); + pm_runtime_put_sync_autosuspend(dev); + } +} + +/* + * Undoes gk20a_lockout_registers(). + */ +static int gk20a_restore_registers(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->regs = l->regs_saved; + l->bar1 = l->bar1_saved; + + nvgpu_restore_usermode_registers(g); + + return 0; +} + +static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l) +{ + int err = 0; + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + err = nvgpu_cde_init_ops(l); +#endif + + return err; +} + +int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l) +{ + struct gk20a *g = &l->g; + int err; + + if (l->init_done) + return 0; + + err = nvgpu_init_channel_support_linux(l); + if (err) { + nvgpu_err(g, "failed to init linux channel support"); + return err; + } + + l->init_done = true; + + return 0; +} + +int gk20a_pm_finalize_poweron(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_platform *platform = gk20a_get_platform(dev); + int err; + + nvgpu_log_fn(g, " "); + + if (g->power_on) + return 0; + + trace_gk20a_finalize_poweron(dev_name(dev)); + + /* Increment platform power refcount */ + if (platform->busy) { + err = platform->busy(dev); + if (err < 0) { + nvgpu_err(g, "failed to poweron platform dependency"); + return err; + } + } + + err = gk20a_restore_registers(g); + if (err) + return err; + + /* Enable interrupt workqueue */ + if (!l->nonstall_work_queue) { + l->nonstall_work_queue = alloc_workqueue("%s", + WQ_HIGHPRI, 1, "mc_nonstall"); + INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb); + } + + err = gk20a_detect_chip(g); + if (err) + return err; + + if (g->sim) { + if (g->sim->sim_init_late) + g->sim->sim_init_late(g); + } + + err = gk20a_finalize_poweron(g); + if (err) + goto done; + + err = nvgpu_finalize_poweron_linux(l); + if (err) + goto done; + + nvgpu_init_mm_ce_context(g); + + nvgpu_vidmem_thread_unpause(&g->mm); + + /* Initialise scaling: it will initialize scaling drive only once */ + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && + nvgpu_platform_is_silicon(g)) { + gk20a_scale_init(dev); + if (platform->initscale) + platform->initscale(dev); + } + + trace_gk20a_finalize_poweron_done(dev_name(dev)); + + err = nvgpu_init_os_linux_ops(l); + if (err) + goto done; + + enable_irq(g->irq_stall); + if (g->irq_stall != g->irq_nonstall) + enable_irq(g->irq_nonstall); + g->irqs_enabled = 1; + + gk20a_scale_resume(dev_from_gk20a(g)); + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + if (platform->has_cde) + gk20a_init_cde_support(l); +#endif + + err = gk20a_sched_ctrl_init(g); + if (err) { + nvgpu_err(g, "failed to init sched control"); + return err; + } + + g->sw_ready = true; + +done: + if (err) + g->power_on = false; + + return err; +} + +/* + * Locks out the driver from accessing GPU registers. This prevents access to + * thse registers after the GPU has been clock or power gated. This should help + * find annoying bugs where register reads and writes are silently dropped + * after the GPU has been turned off. On older chips these reads and writes can + * also lock the entire CPU up. + */ +static int gk20a_lockout_registers(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->regs = NULL; + l->bar1 = NULL; + + nvgpu_lockout_usermode_registers(g); + + return 0; +} + +static int gk20a_pm_prepare_poweroff(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); +#ifdef CONFIG_NVGPU_SUPPORT_CDE + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); +#endif + int ret = 0; + struct gk20a_platform *platform = gk20a_get_platform(dev); + bool irqs_enabled; + + nvgpu_log_fn(g, " "); + + nvgpu_mutex_acquire(&g->poweroff_lock); + + if (!g->power_on) + goto done; + + /* disable IRQs and wait for completion */ + irqs_enabled = g->irqs_enabled; + if (irqs_enabled) { + disable_irq(g->irq_stall); + if (g->irq_stall != g->irq_nonstall) + disable_irq(g->irq_nonstall); + g->irqs_enabled = 0; + } + + gk20a_scale_suspend(dev); + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + gk20a_cde_suspend(l); +#endif + + ret = gk20a_prepare_poweroff(g); + if (ret) + goto error; + + /* Decrement platform power refcount */ + if (platform->idle) + platform->idle(dev); + + /* Stop CPU from accessing the GPU registers. */ + gk20a_lockout_registers(g); + + nvgpu_mutex_release(&g->poweroff_lock); + return 0; + +error: + /* re-enabled IRQs if previously enabled */ + if (irqs_enabled) { + enable_irq(g->irq_stall); + if (g->irq_stall != g->irq_nonstall) + enable_irq(g->irq_nonstall); + g->irqs_enabled = 1; + } + + gk20a_scale_resume(dev); +done: + nvgpu_mutex_release(&g->poweroff_lock); + + return ret; +} + +static struct of_device_id tegra_gk20a_of_match[] = { +#ifdef CONFIG_TEGRA_GK20A + { .compatible = "nvidia,tegra210-gm20b", + .data = &gm20b_tegra_platform }, + { .compatible = "nvidia,tegra186-gp10b", + .data = &gp10b_tegra_platform }, + { .compatible = "nvidia,gv11b", + .data = &gv11b_tegra_platform }, +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + { .compatible = "nvidia,gv11b-vgpu", + .data = &gv11b_vgpu_tegra_platform}, +#endif +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + { .compatible = "nvidia,tegra124-gk20a-vgpu", + .data = &vgpu_tegra_platform }, +#endif +#endif + + { }, +}; + +#ifdef CONFIG_PM +/** + * __gk20a_do_idle() - force the GPU to idle and railgate + * + * In success, this call MUST be balanced by caller with __gk20a_do_unidle() + * + * Acquires two locks : &l->busy_lock and &platform->railgate_lock + * In success, we hold these locks and return + * In failure, we release these locks and return + */ +int __gk20a_do_idle(struct gk20a *g, bool force_reset) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct nvgpu_timeout timeout; + int ref_cnt; + int target_ref_cnt = 0; + bool is_railgated; + int err = 0; + + /* + * Hold back deterministic submits and changes to deterministic + * channels - this must be outside the power busy locks. + */ + gk20a_channel_deterministic_idle(g); + + /* acquire busy lock to block other busy() calls */ + down_write(&l->busy_lock); + + /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ + nvgpu_mutex_acquire(&platform->railgate_lock); + + /* check if it is already railgated ? */ + if (platform->is_railgated(dev)) + return 0; + + /* + * release railgate_lock, prevent suspend by incrementing usage counter, + * re-acquire railgate_lock + */ + nvgpu_mutex_release(&platform->railgate_lock); + pm_runtime_get_sync(dev); + + /* + * One refcount taken in this API + * If User disables rail gating, we take one more + * extra refcount + */ + if (g->can_railgate) + target_ref_cnt = 1; + else + target_ref_cnt = 2; + nvgpu_mutex_acquire(&platform->railgate_lock); + + nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, + NVGPU_TIMER_CPU_TIMER); + + /* check and wait until GPU is idle (with a timeout) */ + do { + nvgpu_usleep_range(1000, 1100); + ref_cnt = atomic_read(&dev->power.usage_count); + } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); + + if (ref_cnt != target_ref_cnt) { + nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", + ref_cnt); + goto fail_drop_usage_count; + } + + /* check if global force_reset flag is set */ + force_reset |= platform->force_reset_in_do_idle; + + nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, + NVGPU_TIMER_CPU_TIMER); + + if (g->can_railgate && !force_reset) { + /* + * Case 1 : GPU railgate is supported + * + * if GPU is now idle, we will have only one ref count, + * drop this ref which will rail gate the GPU + */ + pm_runtime_put_sync(dev); + + /* add sufficient delay to allow GPU to rail gate */ + nvgpu_msleep(g->railgate_delay); + + /* check in loop if GPU is railgated or not */ + do { + nvgpu_usleep_range(1000, 1100); + is_railgated = platform->is_railgated(dev); + } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); + + if (is_railgated) { + return 0; + } else { + nvgpu_err(g, "failed to idle in timeout"); + goto fail_timeout; + } + } else { + /* + * Case 2 : GPU railgate is not supported or we explicitly + * do not want to depend on runtime PM + * + * if GPU is now idle, call prepare_poweroff() to save the + * state and then do explicit railgate + * + * __gk20a_do_unidle() needs to unrailgate, call + * finalize_poweron(), and then call pm_runtime_put_sync() + * to balance the GPU usage counter + */ + + /* Save the GPU state */ + err = gk20a_pm_prepare_poweroff(dev); + if (err) + goto fail_drop_usage_count; + + /* railgate GPU */ + platform->railgate(dev); + + nvgpu_udelay(10); + + g->forced_reset = true; + return 0; + } + +fail_drop_usage_count: + pm_runtime_put_noidle(dev); +fail_timeout: + nvgpu_mutex_release(&platform->railgate_lock); + up_write(&l->busy_lock); + gk20a_channel_deterministic_unidle(g); + return -EBUSY; +} + +/** + * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called + * from outside of GPU driver + * + * In success, this call MUST be balanced by caller with gk20a_do_unidle() + */ +static int gk20a_do_idle(void *_g) +{ + struct gk20a *g = (struct gk20a *)_g; + + return __gk20a_do_idle(g, true); +} + +/** + * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() + */ +int __gk20a_do_unidle(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev); + int err; + + if (g->forced_reset) { + /* + * If we did a forced-reset/railgate + * then unrailgate the GPU here first + */ + platform->unrailgate(dev); + + /* restore the GPU state */ + err = gk20a_pm_finalize_poweron(dev); + if (err) + return err; + + /* balance GPU usage counter */ + pm_runtime_put_sync(dev); + + g->forced_reset = false; + } + + /* release the lock and open up all other busy() calls */ + nvgpu_mutex_release(&platform->railgate_lock); + up_write(&l->busy_lock); + + gk20a_channel_deterministic_unidle(g); + + return 0; +} + +/** + * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() + */ +static int gk20a_do_unidle(void *_g) +{ + struct gk20a *g = (struct gk20a *)_g; + + return __gk20a_do_unidle(g); +} +#endif + +void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, + struct resource **out) +{ + struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); + + if (!r) + return NULL; + if (out) + *out = r; + return devm_ioremap_resource(&dev->dev, r); +} + +static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + + return nvgpu_intr_stall(g); +} + +static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + + return nvgpu_intr_nonstall(g); +} + +static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + + return nvgpu_intr_thread_stall(g); +} + +void gk20a_remove_support(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct sim_nvgpu_linux *sim_linux; + + tegra_unregister_idle_unidle(gk20a_do_idle); + + nvgpu_kfree(g, g->dbg_regops_tmp_buf); + + nvgpu_remove_channel_support_linux(l); + + if (g->pmu.remove_support) + g->pmu.remove_support(&g->pmu); + + if (g->gr.remove_support) + g->gr.remove_support(&g->gr); + + if (g->mm.remove_ce_support) + g->mm.remove_ce_support(&g->mm); + + if (g->fifo.remove_support) + g->fifo.remove_support(&g->fifo); + + if (g->mm.remove_support) + g->mm.remove_support(&g->mm); + + if (g->sim) { + sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); + if (g->sim->remove_support) + g->sim->remove_support(g); + if (sim_linux->remove_support_linux) + sim_linux->remove_support_linux(g); + } + + /* free mappings to registers, etc */ + if (l->regs) { + iounmap(l->regs); + l->regs = NULL; + } + if (l->bar1) { + iounmap(l->bar1); + l->bar1 = NULL; + } + + nvgpu_remove_usermode_support(g); + + nvgpu_free_enabled_flags(g); +} + +static int gk20a_init_support(struct platform_device *dev) +{ + int err = -ENOMEM; + struct gk20a *g = get_gk20a(&dev->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g); + + l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, + &l->reg_mem); + if (IS_ERR(l->regs)) { + nvgpu_err(g, "failed to remap gk20a registers"); + err = PTR_ERR(l->regs); + goto fail; + } + + l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, + &l->bar1_mem); + if (IS_ERR(l->bar1)) { + nvgpu_err(g, "failed to remap gk20a bar1"); + err = PTR_ERR(l->bar1); + goto fail; + } + + err = nvgpu_init_sim_support_linux(g, dev); + if (err) + goto fail; + err = nvgpu_init_sim_support(g); + if (err) + goto fail_sim; + + nvgpu_init_usermode_support(g); + return 0; + +fail_sim: + nvgpu_remove_sim_support_linux(g); +fail: + if (l->regs) { + iounmap(l->regs); + l->regs = NULL; + } + if (l->bar1) { + iounmap(l->bar1); + l->bar1 = NULL; + } + + return err; +} + +static int gk20a_pm_railgate(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int ret = 0; + struct gk20a *g = get_gk20a(dev); + + /* if platform is already railgated, then just return */ + if (platform->is_railgated && platform->is_railgated(dev)) + return ret; + +#ifdef CONFIG_DEBUG_FS + g->pstats.last_rail_gate_start = jiffies; + + if (g->pstats.railgating_cycle_count >= 1) + g->pstats.total_rail_ungate_time_ms = + g->pstats.total_rail_ungate_time_ms + + jiffies_to_msecs(g->pstats.last_rail_gate_start - + g->pstats.last_rail_ungate_complete); +#endif + + if (platform->railgate) + ret = platform->railgate(dev); + if (ret) { + nvgpu_err(g, "failed to railgate platform, err=%d", ret); + return ret; + } + +#ifdef CONFIG_DEBUG_FS + g->pstats.last_rail_gate_complete = jiffies; +#endif + ret = tegra_fuse_clock_disable(); + if (ret) + nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret); + + return ret; +} + +static int gk20a_pm_unrailgate(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int ret = 0; + struct gk20a *g = get_gk20a(dev); + + ret = tegra_fuse_clock_enable(); + if (ret) { + nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret); + return ret; + } +#ifdef CONFIG_DEBUG_FS + g->pstats.last_rail_ungate_start = jiffies; + if (g->pstats.railgating_cycle_count >= 1) + g->pstats.total_rail_gate_time_ms = + g->pstats.total_rail_gate_time_ms + + jiffies_to_msecs(g->pstats.last_rail_ungate_start - + g->pstats.last_rail_gate_complete); + + g->pstats.railgating_cycle_count++; +#endif + + trace_gk20a_pm_unrailgate(dev_name(dev)); + + if (platform->unrailgate) { + nvgpu_mutex_acquire(&platform->railgate_lock); + ret = platform->unrailgate(dev); + nvgpu_mutex_release(&platform->railgate_lock); + } + +#ifdef CONFIG_DEBUG_FS + g->pstats.last_rail_ungate_complete = jiffies; +#endif + + return ret; +} + +/* + * Remove association of the driver with OS interrupt handler + */ +void nvgpu_free_irq(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + devm_free_irq(dev, g->irq_stall, g); + if (g->irq_stall != g->irq_nonstall) + devm_free_irq(dev, g->irq_nonstall, g); +} + +/* + * Idle the GPU in preparation of shutdown/remove. + * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW + * state to prevent further activity on the driver SW side. + * On driver removal quiesce() should be called after start_unload() + */ +int nvgpu_quiesce(struct gk20a *g) +{ + int err; + struct device *dev = dev_from_gk20a(g); + + if (g->power_on) { + err = gk20a_wait_for_idle(g); + if (err) { + nvgpu_err(g, "failed to idle GPU, err=%d", err); + return err; + } + + err = gk20a_fifo_disable_all_engine_activity(g, true); + if (err) { + nvgpu_err(g, + "failed to disable engine activity, err=%d", + err); + return err; + } + + err = gk20a_fifo_wait_engine_idle(g); + if (err) { + nvgpu_err(g, "failed to idle engines, err=%d", + err); + return err; + } + } + + if (gk20a_gpu_is_virtual(dev)) + err = vgpu_pm_prepare_poweroff(dev); + else + err = gk20a_pm_prepare_poweroff(dev); + + if (err) + nvgpu_err(g, "failed to prepare for poweroff, err=%d", + err); + + return err; +} + +static void gk20a_pm_shutdown(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct gk20a *g = platform->g; + int err; + + nvgpu_info(g, "shutting down"); + + /* vgpu has nothing to clean up currently */ + if (gk20a_gpu_is_virtual(&pdev->dev)) + return; + + if (!g->power_on) + goto finish; + + gk20a_driver_start_unload(g); + + /* If GPU is already railgated, + * just prevent more requests, and return */ + if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { + __pm_runtime_disable(&pdev->dev, false); + nvgpu_info(g, "already railgated, shut down complete"); + return; + } + + /* Prevent more requests by disabling Runtime PM */ + __pm_runtime_disable(&pdev->dev, false); + + err = nvgpu_quiesce(g); + if (err) + goto finish; + + err = gk20a_pm_railgate(&pdev->dev); + if (err) + nvgpu_err(g, "failed to railgate, err=%d", err); + +finish: + nvgpu_info(g, "shut down complete"); +} + +#ifdef CONFIG_PM +static int gk20a_pm_runtime_resume(struct device *dev) +{ + int err = 0; + + err = gk20a_pm_unrailgate(dev); + if (err) + goto fail; + + if (gk20a_gpu_is_virtual(dev)) + err = vgpu_pm_finalize_poweron(dev); + else + err = gk20a_pm_finalize_poweron(dev); + if (err) + goto fail_poweron; + + return 0; + +fail_poweron: + gk20a_pm_railgate(dev); +fail: + return err; +} + +static int gk20a_pm_runtime_suspend(struct device *dev) +{ + int err = 0; + struct gk20a *g = get_gk20a(dev); + + if (gk20a_gpu_is_virtual(dev)) + err = vgpu_pm_prepare_poweroff(dev); + else + err = gk20a_pm_prepare_poweroff(dev); + if (err) { + nvgpu_err(g, "failed to power off, err=%d", err); + goto fail; + } + + err = gk20a_pm_railgate(dev); + if (err) + goto fail; + + return 0; + +fail: + gk20a_pm_finalize_poweron(dev); + pm_runtime_mark_last_busy(dev); + return err; +} + +static int gk20a_pm_suspend(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = get_gk20a(dev); + int ret = 0; + int idle_usage_count = 0; + + if (!g->power_on) { + if (!pm_runtime_enabled(dev)) + gk20a_pm_railgate(dev); + return 0; + } + + if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count) + return -EBUSY; + + ret = gk20a_pm_runtime_suspend(dev); + if (ret) + return ret; + + if (platform->suspend) + platform->suspend(dev); + + g->suspended = true; + + return 0; +} + +static int gk20a_pm_resume(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int ret = 0; + + if (!g->suspended) { + if (!pm_runtime_enabled(dev)) + gk20a_pm_unrailgate(dev); + return 0; + } + + ret = gk20a_pm_runtime_resume(dev); + + g->suspended = false; + + return ret; +} + +static const struct dev_pm_ops gk20a_pm_ops = { + .runtime_resume = gk20a_pm_runtime_resume, + .runtime_suspend = gk20a_pm_runtime_suspend, + .resume = gk20a_pm_resume, + .suspend = gk20a_pm_suspend, +}; +#endif + +static int gk20a_pm_init(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int err = 0; + + nvgpu_log_fn(g, " "); + + /* + * Initialise pm runtime. For railgate disable + * case, set autosuspend delay to negative which + * will suspend runtime pm + */ + if (g->railgate_delay && g->can_railgate) + pm_runtime_set_autosuspend_delay(dev, + g->railgate_delay); + else + pm_runtime_set_autosuspend_delay(dev, -1); + + pm_runtime_use_autosuspend(dev); + pm_runtime_enable(dev); + + return err; +} + +/* + * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING. + */ +void gk20a_driver_start_unload(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n"); + + down_write(&l->busy_lock); + __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); + /* GR SW ready needs to be invalidated at this time with the busy lock + * held to prevent a racing condition on the gr/mm code */ + g->gr.sw_ready = false; + g->sw_ready = false; + up_write(&l->busy_lock); + + if (g->is_virtual) + return; + + gk20a_wait_for_idle(g); + + nvgpu_wait_for_deferred_interrupts(g); + + if (l->nonstall_work_queue) { + cancel_work_sync(&l->nonstall_fn_work); + destroy_workqueue(l->nonstall_work_queue); + l->nonstall_work_queue = NULL; + } +} + +static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) +{ + gk20a_get_platform(&pdev->dev)->g = gk20a; +} + +static int nvgpu_read_fuse_overrides(struct gk20a *g) +{ + struct device_node *np = nvgpu_get_node(g); + u32 *fuses; + int count, i; + + if (!np) /* may be pcie device */ + return 0; + + count = of_property_count_elems_of_size(np, "fuse-overrides", 8); + if (count <= 0) + return count; + + fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2); + if (!fuses) + return -ENOMEM; + of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); + for (i = 0; i < count; i++) { + u32 fuse, value; + + fuse = fuses[2 * i]; + value = fuses[2 * i + 1]; + switch (fuse) { + case GM20B_FUSE_OPT_TPC_DISABLE: + g->tpc_fs_mask_user = ~value; + break; + case GP10B_FUSE_OPT_ECC_EN: + g->gr.fecs_feature_override_ecc_val = value; + break; + default: + nvgpu_err(g, "ignore unknown fuse override %08x", fuse); + break; + } + } + + nvgpu_kfree(g, fuses); + + return 0; +} + +static int gk20a_probe(struct platform_device *dev) +{ + struct nvgpu_os_linux *l = NULL; + struct gk20a *gk20a; + int err; + struct gk20a_platform *platform = NULL; + struct device_node *np; + + if (dev->dev.of_node) { + const struct of_device_id *match; + + match = of_match_device(tegra_gk20a_of_match, &dev->dev); + if (match) + platform = (struct gk20a_platform *)match->data; + } else + platform = (struct gk20a_platform *)dev->dev.platform_data; + + if (!platform) { + dev_err(&dev->dev, "no platform data\n"); + return -ENODATA; + } + + platform_set_drvdata(dev, platform); + + if (gk20a_gpu_is_virtual(&dev->dev)) + return vgpu_probe(dev); + + l = kzalloc(sizeof(*l), GFP_KERNEL); + if (!l) { + dev_err(&dev->dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + + hash_init(l->ecc_sysfs_stats_htable); + + gk20a = &l->g; + + nvgpu_log_fn(gk20a, " "); + + nvgpu_init_gk20a(gk20a); + set_gk20a(dev, gk20a); + l->dev = &dev->dev; + gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK; + + nvgpu_kmem_init(gk20a); + + err = nvgpu_init_enabled_flags(gk20a); + if (err) + goto return_err; + + np = nvgpu_get_node(gk20a); + if (of_dma_is_coherent(np)) { + __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); + __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); + } + + if (nvgpu_platform_is_simulation(gk20a)) + __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); + + gk20a->irq_stall = platform_get_irq(dev, 0); + gk20a->irq_nonstall = platform_get_irq(dev, 1); + if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) { + err = -ENXIO; + goto return_err; + } + + err = devm_request_threaded_irq(&dev->dev, + gk20a->irq_stall, + gk20a_intr_isr_stall, + gk20a_intr_thread_stall, + 0, "gk20a_stall", gk20a); + if (err) { + dev_err(&dev->dev, + "failed to request stall intr irq @ %d\n", + gk20a->irq_stall); + goto return_err; + } + err = devm_request_irq(&dev->dev, + gk20a->irq_nonstall, + gk20a_intr_isr_nonstall, + 0, "gk20a_nonstall", gk20a); + if (err) { + dev_err(&dev->dev, + "failed to request non-stall intr irq @ %d\n", + gk20a->irq_nonstall); + goto return_err; + } + disable_irq(gk20a->irq_stall); + if (gk20a->irq_stall != gk20a->irq_nonstall) + disable_irq(gk20a->irq_nonstall); + + err = gk20a_init_support(dev); + if (err) + goto return_err; + + err = nvgpu_read_fuse_overrides(gk20a); + +#ifdef CONFIG_RESET_CONTROLLER + platform->reset_control = devm_reset_control_get(&dev->dev, NULL); + if (IS_ERR(platform->reset_control)) + platform->reset_control = NULL; +#endif + + err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); + if (err) + goto return_err; + + err = gk20a_pm_init(&dev->dev); + if (err) { + dev_err(&dev->dev, "pm init failed"); + goto return_err; + } + + gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); + + return 0; + +return_err: + nvgpu_free_enabled_flags(gk20a); + + /* + * Last since the above allocs may use data structures in here. + */ + nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); + + kfree(l); + + return err; +} + +int nvgpu_remove(struct device *dev, struct class *class) +{ + struct gk20a *g = get_gk20a(dev); +#ifdef CONFIG_NVGPU_SUPPORT_CDE + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); +#endif + struct gk20a_platform *platform = gk20a_get_platform(dev); + int err; + + nvgpu_log_fn(g, " "); + + err = nvgpu_quiesce(g); + WARN(err, "gpu failed to idle during driver removal"); + + if (nvgpu_mem_is_valid(&g->syncpt_mem)) + nvgpu_dma_free(g, &g->syncpt_mem); + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + if (platform->has_cde) + gk20a_cde_destroy(l); +#endif + +#ifdef CONFIG_GK20A_CTXSW_TRACE + gk20a_ctxsw_trace_cleanup(g); +#endif + + gk20a_sched_ctrl_cleanup(g); + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) + gk20a_scale_exit(dev); + + nvgpu_clk_arb_cleanup_arbiter(g); + + gk20a_user_deinit(dev, class); + + gk20a_debug_deinit(g); + + nvgpu_remove_sysfs(dev); + + if (platform->secure_buffer.destroy) + platform->secure_buffer.destroy(g, + &platform->secure_buffer); + + if (pm_runtime_enabled(dev)) + pm_runtime_disable(dev); + + if (platform->remove) + platform->remove(dev); + + nvgpu_log_fn(g, "removed"); + + return err; +} + +static int __exit gk20a_remove(struct platform_device *pdev) +{ + int err; + struct device *dev = &pdev->dev; + struct gk20a *g = get_gk20a(dev); + + if (gk20a_gpu_is_virtual(dev)) + return vgpu_remove(pdev); + + err = nvgpu_remove(dev, &nvgpu_class); + + set_gk20a(pdev, NULL); + gk20a_put(g); + + return err; +} + +static struct platform_driver gk20a_driver = { + .probe = gk20a_probe, + .remove = __exit_p(gk20a_remove), + .shutdown = gk20a_pm_shutdown, + .driver = { + .owner = THIS_MODULE, + .name = "gk20a", + .probe_type = PROBE_PREFER_ASYNCHRONOUS, +#ifdef CONFIG_OF + .of_match_table = tegra_gk20a_of_match, +#endif +#ifdef CONFIG_PM + .pm = &gk20a_pm_ops, +#endif + .suppress_bind_attrs = true, + } +}; + +struct class nvgpu_class = { + .owner = THIS_MODULE, + .name = CLASS_NAME, +}; + +static int __init gk20a_init(void) +{ + + int ret; + + ret = class_register(&nvgpu_class); + if (ret) + return ret; + + ret = nvgpu_pci_init(); + if (ret) + return ret; + + return platform_driver_register(&gk20a_driver); +} + +static void __exit gk20a_exit(void) +{ + nvgpu_pci_exit(); + platform_driver_unregister(&gk20a_driver); + class_unregister(&nvgpu_class); +} + +MODULE_LICENSE("GPL v2"); +module_init(gk20a_init); +module_exit(gk20a_exit); diff --git a/drivers/gpu/nvgpu/os/linux/module.h b/drivers/gpu/nvgpu/os/linux/module.h new file mode 100644 index 00000000..ab4bca03 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef __NVGPU_COMMON_LINUX_MODULE_H__ +#define __NVGPU_COMMON_LINUX_MODULE_H__ + +struct gk20a; +struct device; +struct nvgpu_os_linux; + +int gk20a_pm_finalize_poweron(struct device *dev); +int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l); +void gk20a_remove_support(struct gk20a *g); +void gk20a_driver_start_unload(struct gk20a *g); +int nvgpu_quiesce(struct gk20a *g); +int nvgpu_remove(struct device *dev, struct class *class); +void nvgpu_free_irq(struct gk20a *g); +struct device_node *nvgpu_get_node(struct gk20a *g); +void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, + struct resource **out); +extern struct class nvgpu_class; + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.c b/drivers/gpu/nvgpu/os/linux/module_usermode.c new file mode 100644 index 00000000..ea01c1b2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module_usermode.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +#include "os_linux.h" + +/* + * Locks out the driver from accessing GPU registers. This prevents access to + * thse registers after the GPU has been clock or power gated. This should help + * find annoying bugs where register reads and writes are silently dropped + * after the GPU has been turned off. On older chips these reads and writes can + * also lock the entire CPU up. + */ +void nvgpu_lockout_usermode_registers(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->usermode_regs = NULL; +} + +/* + * Undoes t19x_lockout_registers(). + */ +void nvgpu_restore_usermode_registers(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->usermode_regs = l->usermode_regs_saved; +} + +void nvgpu_remove_usermode_support(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (l->usermode_regs) { + l->usermode_regs = NULL; + } +} + +void nvgpu_init_usermode_support(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->usermode_regs = l->regs + usermode_cfg0_r(); + l->usermode_regs_saved = l->usermode_regs; +} diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.h b/drivers/gpu/nvgpu/os/linux/module_usermode.h new file mode 100644 index 00000000..b17053ca --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module_usermode.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_MODULE_T19X_H__ +#define __NVGPU_MODULE_T19X_H__ + +struct gk20a; + +void nvgpu_init_usermode_support(struct gk20a *g); +void nvgpu_remove_usermode_support(struct gk20a *g); +void nvgpu_lockout_usermode_registers(struct gk20a *g); +void nvgpu_restore_usermode_registers(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c new file mode 100644 index 00000000..93925803 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c @@ -0,0 +1,613 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "os_linux.h" + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" +#include "platform_gk20a.h" + +static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) +{ + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = gk20a_get_platform(dev); + u64 ipa = sg_phys((struct scatterlist *)sgl); + + if (platform->phys_addr) + return platform->phys_addr(g, ipa); + + return ipa; +} + +int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) +{ + void *cpu_va; + pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? + PAGE_KERNEL : + pgprot_writecombine(PAGE_KERNEL); + + if (mem->aperture != APERTURE_SYSMEM) + return 0; + + /* + * WAR for bug 2040115: we already will always have a coherent vmap() + * for all sysmem buffers. The prot settings are left alone since + * eventually this should be deleted. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + return 0; + + /* + * A CPU mapping is implicitly made for all SYSMEM DMA allocations that + * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make + * another CPU mapping. + */ + if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) + return 0; + + if (WARN_ON(mem->cpu_va)) { + nvgpu_warn(g, "nested"); + return -EBUSY; + } + + cpu_va = vmap(mem->priv.pages, + PAGE_ALIGN(mem->size) >> PAGE_SHIFT, + 0, prot); + + if (WARN_ON(!cpu_va)) + return -ENOMEM; + + mem->cpu_va = cpu_va; + return 0; +} + +void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) +{ + if (mem->aperture != APERTURE_SYSMEM) + return; + + /* + * WAR for bug 2040115: skip this since the map will be taken care of + * during the free in the DMA API. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + return; + + /* + * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping + * already made by the DMA API. + */ + if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) + return; + + vunmap(mem->cpu_va); + mem->cpu_va = NULL; +} + +static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 r = start, *dest_u32 = *arg; + + if (!l->regs) { + __gk20a_warn_on_no_regs(); + return; + } + + while (words--) { + *dest_u32++ = gk20a_readl(g, r); + r += sizeof(u32); + } + + *arg = dest_u32; +} + +u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) +{ + u32 data = 0; + + if (mem->aperture == APERTURE_SYSMEM) { + u32 *ptr = mem->cpu_va; + + WARN_ON(!ptr); + data = ptr[w]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); +#endif + } else if (mem->aperture == APERTURE_VIDMEM) { + u32 value; + u32 *p = &value; + + nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), + sizeof(u32), pramin_access_batch_rd_n, &p); + + data = value; + + } else { + WARN_ON("Accessing unallocated nvgpu_mem"); + } + + return data; +} + +u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) +{ + WARN_ON(offset & 3); + return nvgpu_mem_rd32(g, mem, offset / sizeof(u32)); +} + +void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, + u32 offset, void *dest, u32 size) +{ + WARN_ON(offset & 3); + WARN_ON(size & 3); + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *src = (u8 *)mem->cpu_va + offset; + + WARN_ON(!mem->cpu_va); + memcpy(dest, src, size); +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + if (size) + nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", + src, *dest, size); +#endif + } else if (mem->aperture == APERTURE_VIDMEM) { + u32 *dest_u32 = dest; + + nvgpu_pramin_access_batched(g, mem, offset, size, + pramin_access_batch_rd_n, &dest_u32); + } else { + WARN_ON("Accessing unallocated nvgpu_mem"); + } +} + +static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 r = start, *src_u32 = *arg; + + if (!l->regs) { + __gk20a_warn_on_no_regs(); + return; + } + + while (words--) { + writel_relaxed(*src_u32++, l->regs + r); + r += sizeof(u32); + } + + *arg = src_u32; +} + +void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) +{ + if (mem->aperture == APERTURE_SYSMEM) { + u32 *ptr = mem->cpu_va; + + WARN_ON(!ptr); +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); +#endif + ptr[w] = data; + } else if (mem->aperture == APERTURE_VIDMEM) { + u32 value = data; + u32 *p = &value; + + nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), + sizeof(u32), pramin_access_batch_wr_n, &p); + if (!mem->skip_wmb) + wmb(); + } else { + WARN_ON("Accessing unallocated nvgpu_mem"); + } +} + +void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) +{ + WARN_ON(offset & 3); + nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data); +} + +void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, + void *src, u32 size) +{ + WARN_ON(offset & 3); + WARN_ON(size & 3); + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *dest = (u8 *)mem->cpu_va + offset; + + WARN_ON(!mem->cpu_va); +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + if (size) + nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", + dest, *src, size); +#endif + memcpy(dest, src, size); + } else if (mem->aperture == APERTURE_VIDMEM) { + u32 *src_u32 = src; + + nvgpu_pramin_access_batched(g, mem, offset, size, + pramin_access_batch_wr_n, &src_u32); + if (!mem->skip_wmb) + wmb(); + } else { + WARN_ON("Accessing unallocated nvgpu_mem"); + } +} + +static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + u32 r = start, repeat = **arg; + + if (!l->regs) { + __gk20a_warn_on_no_regs(); + return; + } + + while (words--) { + writel_relaxed(repeat, l->regs + r); + r += sizeof(u32); + } +} + +void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, + u32 c, u32 size) +{ + WARN_ON(offset & 3); + WARN_ON(size & 3); + WARN_ON(c & ~0xff); + + c &= 0xff; + + if (mem->aperture == APERTURE_SYSMEM) { + u8 *dest = (u8 *)mem->cpu_va + offset; + + WARN_ON(!mem->cpu_va); +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + if (size) + nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]", + dest, c, size); +#endif + memset(dest, c, size); + } else if (mem->aperture == APERTURE_VIDMEM) { + u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); + u32 *p = &repeat_value; + + nvgpu_pramin_access_batched(g, mem, offset, size, + pramin_access_batch_set, &p); + if (!mem->skip_wmb) + wmb(); + } else { + WARN_ON("Accessing unallocated nvgpu_mem"); + } +} + +/* + * Obtain a SYSMEM address from a Linux SGL. This should eventually go away + * and/or become private to this file once all bad usages of Linux SGLs are + * cleaned up in the driver. + */ +u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) +{ + if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || + !nvgpu_iommuable(g)) + return g->ops.mm.gpu_phys_addr(g, NULL, + __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); + + if (sg_dma_address(sgl) == 0) + return g->ops.mm.gpu_phys_addr(g, NULL, + __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); + + if (sg_dma_address(sgl) == DMA_ERROR_CODE) + return 0; + + return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); +} + +/* + * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM + * allocation. + */ +static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) +{ + return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); +} + +/* + * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM + * allocation. + * + * Note: this API does not make sense to use for _VIDMEM_ buffers with greater + * than one scatterlist chunk. If there's more than one scatterlist chunk then + * the buffer will not be contiguous. As such the base address probably isn't + * very useful. This is true for SYSMEM as well, if there's no IOMMU. + * + * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's + * an IOMMU present and enabled for the GPU. + * + * %attrs can be NULL. If it is not NULL then it may be inspected to determine + * if the address needs to be modified before writing into a PTE. + */ +u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) +{ + struct nvgpu_page_alloc *alloc; + + if (mem->aperture == APERTURE_SYSMEM) + return nvgpu_mem_get_addr_sysmem(g, mem); + + /* + * Otherwise get the vidmem address. + */ + alloc = mem->vidmem_alloc; + + /* This API should not be used with > 1 chunks */ + WARN_ON(alloc->nr_chunks != 1); + + return alloc->base; +} + +/* + * This should only be used on contiguous buffers regardless of whether + * there's an IOMMU present/enabled. This applies to both SYSMEM and + * VIDMEM. + */ +u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) +{ + /* + * For a VIDMEM buf, this is identical to simply get_addr() so just fall + * back to that. + */ + if (mem->aperture == APERTURE_VIDMEM) + return nvgpu_mem_get_addr(g, mem); + + return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); +} + +/* + * Be careful how you use this! You are responsible for correctly freeing this + * memory. + */ +int nvgpu_mem_create_from_mem(struct gk20a *g, + struct nvgpu_mem *dest, struct nvgpu_mem *src, + int start_page, int nr_pages) +{ + int ret; + u64 start = start_page * PAGE_SIZE; + u64 size = nr_pages * PAGE_SIZE; + dma_addr_t new_iova; + + if (src->aperture != APERTURE_SYSMEM) + return -EINVAL; + + /* Some silly things a caller might do... */ + if (size > src->size) + return -EINVAL; + if ((start + size) > src->size) + return -EINVAL; + + dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; + dest->aperture = src->aperture; + dest->skip_wmb = src->skip_wmb; + dest->size = size; + + /* + * Re-use the CPU mapping only if the mapping was made by the DMA API. + * + * Bug 2040115: the DMA API wrapper makes the mapping that we should + * re-use. + */ + if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || + nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); + + dest->priv.pages = src->priv.pages + start_page; + dest->priv.flags = src->priv.flags; + + new_iova = sg_dma_address(src->priv.sgt->sgl) ? + sg_dma_address(src->priv.sgt->sgl) + start : 0; + + /* + * Make a new SG table that is based only on the subset of pages that + * is passed to us. This table gets freed by the dma free routines. + */ + if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) + ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, + src->priv.pages + start_page, + new_iova, size); + else + ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, + new_iova, size); + + return ret; +} + +int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, + struct page **pages, int nr_pages) +{ + struct sg_table *sgt; + struct page **our_pages = + nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); + + if (!our_pages) + return -ENOMEM; + + memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); + + if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, + nr_pages * PAGE_SIZE)) { + nvgpu_kfree(g, our_pages); + return -ENOMEM; + } + + /* + * If we are making an SGT from physical pages we can be reasonably + * certain that this should bypass the SMMU - thus we set the DMA (aka + * IOVA) address to 0. This tells the GMMU mapping code to not make a + * mapping directed to the SMMU. + */ + sg_dma_address(sgt->sgl) = 0; + + dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; + dest->aperture = APERTURE_SYSMEM; + dest->skip_wmb = 0; + dest->size = PAGE_SIZE * nr_pages; + + dest->priv.flags = 0; + dest->priv.pages = our_pages; + dest->priv.sgt = sgt; + + return 0; +} + +#ifdef CONFIG_TEGRA_GK20A_NVHOST +int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, + u64 src_phys, int nr_pages) +{ + struct page **pages = + nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); + int i, ret = 0; + + if (!pages) + return -ENOMEM; + + for (i = 0; i < nr_pages; i++) + pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); + + ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); + nvgpu_kfree(g, pages); + + return ret; +} +#endif + +static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) +{ + return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); +} + +static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) +{ + return (u64)__nvgpu_sgl_phys(g, sgl); +} + +static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) +{ + return (u64)sg_dma_address((struct scatterlist *)sgl); +} + +static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) +{ + return (u64)((struct scatterlist *)sgl)->length; +} + +static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, + struct nvgpu_sgl *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + if (sg_dma_address((struct scatterlist *)sgl) == 0) + return g->ops.mm.gpu_phys_addr(g, attrs, + __nvgpu_sgl_phys(g, sgl)); + + if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) + return 0; + + return nvgpu_mem_iommu_translate(g, + sg_dma_address((struct scatterlist *)sgl)); +} + +static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, + struct nvgpu_sgt *sgt) +{ + if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) + return false; + return true; +} + +static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + /* + * Free this SGT. All we do is free the passed SGT. The actual Linux + * SGT/SGL needs to be freed separately. + */ + nvgpu_kfree(g, sgt); +} + +static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { + .sgl_next = nvgpu_mem_linux_sgl_next, + .sgl_phys = nvgpu_mem_linux_sgl_phys, + .sgl_dma = nvgpu_mem_linux_sgl_dma, + .sgl_length = nvgpu_mem_linux_sgl_length, + .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, + .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, + .sgt_free = nvgpu_mem_linux_sgl_free, +}; + +static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( + struct gk20a *g, + struct scatterlist *linux_sgl) +{ + struct nvgpu_page_alloc *vidmem_alloc; + + vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); + if (!vidmem_alloc) + return NULL; + + return &vidmem_alloc->sgt; +} + +struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) +{ + struct nvgpu_sgt *nvgpu_sgt; + struct scatterlist *linux_sgl = sgt->sgl; + + if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) + return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); + + nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); + if (!nvgpu_sgt) + return NULL; + + nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); + + nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; + nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; + + return nvgpu_sgt; +} + +struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, + struct nvgpu_mem *mem) +{ + return nvgpu_linux_sgt_create(g, mem->priv.sgt); +} diff --git a/drivers/gpu/nvgpu/os/linux/nvhost.c b/drivers/gpu/nvgpu/os/linux/nvhost.c new file mode 100644 index 00000000..6ab60248 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvhost.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include "nvhost_priv.h" + +#include "gk20a/gk20a.h" +#include "os_linux.h" +#include "module.h" + +int nvgpu_get_nvhost_dev(struct gk20a *g) +{ + struct device_node *np = nvgpu_get_node(g); + struct platform_device *host1x_pdev = NULL; + const __be32 *host1x_ptr; + + host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); + if (host1x_ptr) { + struct device_node *host1x_node = + of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); + + host1x_pdev = of_find_device_by_node(host1x_node); + if (!host1x_pdev) { + nvgpu_warn(g, "host1x device not available"); + return -EPROBE_DEFER; + } + + } else { + if (g->has_syncpoints) { + nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support"); + g->has_syncpoints = false; + } + return 0; + } + + g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev)); + if (!g->nvhost_dev) + return -ENOMEM; + + g->nvhost_dev->host1x_pdev = host1x_pdev; + + return 0; +} + +void nvgpu_free_nvhost_dev(struct gk20a *g) +{ + nvgpu_kfree(g, g->nvhost_dev); +} + +int nvgpu_nvhost_module_busy_ext( + struct nvgpu_nvhost_dev *nvhost_dev) +{ + return nvhost_module_busy_ext(nvhost_dev->host1x_pdev); +} + +void nvgpu_nvhost_module_idle_ext( + struct nvgpu_nvhost_dev *nvhost_dev) +{ + nvhost_module_idle_ext(nvhost_dev->host1x_pdev); +} + +void nvgpu_nvhost_debug_dump_device( + struct nvgpu_nvhost_dev *nvhost_dev) +{ + nvhost_debug_dump_device(nvhost_dev->host1x_pdev); +} + +const char *nvgpu_nvhost_syncpt_get_name( + struct nvgpu_nvhost_dev *nvhost_dev, int id) +{ + return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id); +} + +bool nvgpu_nvhost_syncpt_is_valid_pt_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id); +} + +int nvgpu_nvhost_syncpt_is_expired_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) +{ + return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev, + id, thresh); +} + +u32 nvgpu_nvhost_syncpt_incr_max_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs) +{ + return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs); +} + +int nvgpu_nvhost_intr_register_notifier( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, + void (*callback)(void *, int), void *private_data) +{ + return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev, + id, thresh, + callback, private_data); +} + +void nvgpu_nvhost_syncpt_set_min_eq_max_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id); +} + +void nvgpu_nvhost_syncpt_put_ref_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id); +} + +u32 nvgpu_nvhost_get_syncpt_host_managed( + struct nvgpu_nvhost_dev *nvhost_dev, + u32 param, const char *syncpt_name) +{ + return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev, + param, syncpt_name); +} + +u32 nvgpu_nvhost_get_syncpt_client_managed( + struct nvgpu_nvhost_dev *nvhost_dev, + const char *syncpt_name) +{ + return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev, + syncpt_name); +} + +int nvgpu_nvhost_syncpt_wait_timeout_ext( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, + u32 thresh, u32 timeout, u32 *value, struct timespec *ts) +{ + return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev, + id, thresh, timeout, value, ts); +} + +int nvgpu_nvhost_syncpt_read_ext_check( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val) +{ + return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); +} + +u32 nvgpu_nvhost_syncpt_read_maxval( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); +} + +void nvgpu_nvhost_syncpt_set_safe_state( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + u32 val; + + /* + * Add large number of increments to current value + * so that all waiters on this syncpoint are released + * + * We don't expect any case where more than 0x10000 increments + * are pending + */ + val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id); + val += 0x10000; + + nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val); + nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val); +} + +int nvgpu_nvhost_create_symlink(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + int err = 0; + + if (g->nvhost_dev && + (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { + err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj, + &dev->kobj, + dev_name(dev)); + } + + return err; +} + +void nvgpu_nvhost_remove_symlink(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + if (g->nvhost_dev && + (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { + sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj, + dev_name(dev)); + } +} + +#ifdef CONFIG_SYNC +u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt) +{ + return nvhost_sync_pt_id(pt); +} + +u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt) +{ + return nvhost_sync_pt_thresh(pt); +} + +struct sync_fence *nvgpu_nvhost_sync_fdget(int fd) +{ + return nvhost_sync_fdget(fd); +} + +int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence) +{ + return nvhost_sync_num_pts(fence); +} + +struct sync_fence *nvgpu_nvhost_sync_create_fence( + struct nvgpu_nvhost_dev *nvhost_dev, + u32 id, u32 thresh, const char *name) +{ + struct nvhost_ctrl_sync_fence_info pt = { + .id = id, + .thresh = thresh, + }; + + return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name); +} +#endif /* CONFIG_SYNC */ + +#ifdef CONFIG_TEGRA_T19X_GRHOST +int nvgpu_nvhost_syncpt_unit_interface_get_aperture( + struct nvgpu_nvhost_dev *nvhost_dev, + u64 *base, size_t *size) +{ + return nvhost_syncpt_unit_interface_get_aperture( + nvhost_dev->host1x_pdev, (phys_addr_t *)base, size); +} + +u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id) +{ + return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id); +} + +int nvgpu_nvhost_syncpt_init(struct gk20a *g) +{ + int err = 0; + + if (!g->has_syncpoints) + return -ENOSYS; + + err = nvgpu_get_nvhost_dev(g); + if (err) { + nvgpu_err(g, "host1x device not available"); + g->has_syncpoints = false; + return -ENOSYS; + } + + err = nvgpu_nvhost_syncpt_unit_interface_get_aperture( + g->nvhost_dev, + &g->syncpt_unit_base, + &g->syncpt_unit_size); + if (err) { + nvgpu_err(g, "Failed to get syncpt interface"); + g->has_syncpoints = false; + return -ENOSYS; + } + + g->syncpt_size = + nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); + nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", + g->syncpt_unit_base, g->syncpt_unit_size, + g->syncpt_size); + + return 0; +} +#endif diff --git a/drivers/gpu/nvgpu/os/linux/nvhost_priv.h b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h new file mode 100644 index 00000000..c03390a7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_NVHOST_PRIV_H__ +#define __NVGPU_NVHOST_PRIV_H__ + +struct nvgpu_nvhost_dev { + struct platform_device *host1x_pdev; +}; + +#endif /* __NVGPU_NVHOST_PRIV_H__ */ diff --git a/drivers/gpu/nvgpu/os/linux/nvlink.c b/drivers/gpu/nvgpu/os/linux/nvlink.c new file mode 100644 index 00000000..c93514c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvlink.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include "module.h" + +#ifdef CONFIG_TEGRA_NVLINK +int nvgpu_nvlink_read_dt_props(struct gk20a *g) +{ + struct device_node *np; + struct nvlink_device *ndev = g->nvlink.priv; + u32 local_dev_id; + u32 local_link_id; + u32 remote_dev_id; + u32 remote_link_id; + bool is_master; + + /* Parse DT */ + np = nvgpu_get_node(g); + if (!np) + goto fail; + + np = of_get_child_by_name(np, "nvidia,nvlink"); + if (!np) + goto fail; + + np = of_get_child_by_name(np, "endpoint"); + if (!np) + goto fail; + + /* Parse DT structure to detect endpoint topology */ + of_property_read_u32(np, "local_dev_id", &local_dev_id); + of_property_read_u32(np, "local_link_id", &local_link_id); + of_property_read_u32(np, "remote_dev_id", &remote_dev_id); + of_property_read_u32(np, "remote_link_id", &remote_link_id); + is_master = of_property_read_bool(np, "is_master"); + + /* Check that we are in dGPU mode */ + if (local_dev_id != NVLINK_ENDPT_GV100) { + nvgpu_err(g, "Local nvlink device is not dGPU"); + return -EINVAL; + } + + ndev->is_master = is_master; + ndev->device_id = local_dev_id; + ndev->link.link_id = local_link_id; + ndev->link.remote_dev_info.device_id = remote_dev_id; + ndev->link.remote_dev_info.link_id = remote_link_id; + + return 0; + +fail: + nvgpu_info(g, "nvlink endpoint not found or invaling in DT"); + return -ENODEV; +} +#endif /* CONFIG_TEGRA_NVLINK */ + +void nvgpu_mss_nvlink_init_credits(struct gk20a *g) +{ + /* MSS_NVLINK_1_BASE */ + void __iomem *soc1 = ioremap(0x01f20010, 4096); + /* MSS_NVLINK_2_BASE */ + void __iomem *soc2 = ioremap(0x01f40010, 4096); + /* MSS_NVLINK_3_BASE */ + void __iomem *soc3 = ioremap(0x01f60010, 4096); + /* MSS_NVLINK_4_BASE */ + void __iomem *soc4 = ioremap(0x01f80010, 4096); + u32 val; + + nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits"); + + val = readl_relaxed(soc1); + writel_relaxed(val, soc1); + val = readl_relaxed(soc1 + 4); + writel_relaxed(val, soc1 + 4); + + val = readl_relaxed(soc2); + writel_relaxed(val, soc2); + val = readl_relaxed(soc2 + 4); + writel_relaxed(val, soc2 + 4); + + val = readl_relaxed(soc3); + writel_relaxed(val, soc3); + val = readl_relaxed(soc3 + 4); + writel_relaxed(val, soc3 + 4); + + val = readl_relaxed(soc4); + writel_relaxed(val, soc4); + val = readl_relaxed(soc4 + 4); + writel_relaxed(val, soc4 + 4); +} diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android.c b/drivers/gpu/nvgpu/os/linux/os_fence_android.c new file mode 100644 index 00000000..9be8c6c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include + +#include "gk20a/gk20a.h" + +#include "../drivers/staging/android/sync.h" + +inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s) +{ + struct sync_fence *fence = (struct sync_fence *)s->priv; + return fence; +} + +static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out) +{ + fence_out->priv = NULL; + fence_out->g = NULL; + fence_out->ops = NULL; +} + +void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, + struct gk20a *g, const struct nvgpu_os_fence_ops *fops, + struct sync_fence *fence) +{ + fence_out->g = g; + fence_out->ops = fops; + fence_out->priv = (void *)fence; +} + +void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) +{ + struct sync_fence *fence = nvgpu_get_sync_fence(s); + + sync_fence_put(fence); + + nvgpu_os_fence_clear(s); +} + +void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd) +{ + struct sync_fence *fence = nvgpu_get_sync_fence(s); + + sync_fence_get(fence); + sync_fence_install(fence, fd); +} + +int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, int fd) +{ + int err = -ENOSYS; + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); +#endif + + if (err) + err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); + + if (err) + nvgpu_err(c->g, "error obtaining fence from fd %d", fd); + + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c new file mode 100644 index 00000000..25832417 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include + +#include "gk20a/channel_sync_gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "sync_sema_android.h" + +#include "../drivers/staging/android/sync.h" + +int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, + struct priv_cmd_entry *wait_cmd, + struct channel_gk20a *c, + int max_wait_cmds) +{ + int err; + int wait_cmd_size; + int num_wait_cmds; + int i; + struct nvgpu_semaphore *sema; + struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); + + wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); + + num_wait_cmds = sync_fence->num_fences; + if (num_wait_cmds == 0) + return 0; + + if (max_wait_cmds && num_wait_cmds > max_wait_cmds) + return -EINVAL; + + err = gk20a_channel_alloc_priv_cmdbuf(c, + wait_cmd_size * num_wait_cmds, + wait_cmd); + if (err) { + nvgpu_err(c->g, "not enough priv cmd buffer space"); + return err; + } + + for (i = 0; i < num_wait_cmds; i++) { + struct fence *f = sync_fence->cbs[i].sync_pt; + struct sync_pt *pt = sync_pt_from_fence(f); + + sema = gk20a_sync_pt_sema(pt); + gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd, + wait_cmd_size, i); + } + + return 0; +} + +static const struct nvgpu_os_fence_ops sema_ops = { + .program_waits = nvgpu_os_fence_sema_wait_gen_cmd, + .drop_ref = nvgpu_os_fence_android_drop_ref, + .install_fence = nvgpu_os_fence_android_install_fd, +}; + +int nvgpu_os_fence_sema_create( + struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, + struct nvgpu_semaphore *sema) +{ + struct sync_fence *fence; + + fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x", + nvgpu_semaphore_gpu_ro_va(sema)); + + if (!fence) { + nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x", + (u32)nvgpu_semaphore_gpu_ro_va(sema)); + + return -ENOMEM; + } + + nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); + + return 0; +} + +int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, int fd) +{ + struct sync_fence *fence = gk20a_sync_fence_fdget(fd); + + if (!fence) + return -EINVAL; + + nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); + + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c new file mode 100644 index 00000000..d7a72fcd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/channel_sync_gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "../drivers/staging/android/sync.h" + +int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, + struct priv_cmd_entry *wait_cmd, + struct channel_gk20a *c, + int max_wait_cmds) +{ + int err; + int wait_cmd_size; + int num_wait_cmds; + int i; + u32 wait_id; + struct sync_pt *pt; + + struct sync_fence *sync_fence = (struct sync_fence *)s->priv; + + if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) + return -EINVAL; + + /* validate syncpt ids */ + for (i = 0; i < sync_fence->num_fences; i++) { + pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); + wait_id = nvgpu_nvhost_sync_pt_id(pt); + if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( + c->g->nvhost_dev, wait_id)) { + return -EINVAL; + } + } + + num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); + if (num_wait_cmds == 0) + return 0; + + wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); + err = gk20a_channel_alloc_priv_cmdbuf(c, + wait_cmd_size * num_wait_cmds, wait_cmd); + if (err) { + nvgpu_err(c->g, + "not enough priv cmd buffer space"); + return err; + } + + for (i = 0; i < sync_fence->num_fences; i++) { + struct fence *f = sync_fence->cbs[i].sync_pt; + struct sync_pt *pt = sync_pt_from_fence(f); + u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); + u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); + + err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value, + wait_cmd, wait_cmd_size, i, true); + } + + WARN_ON(i != num_wait_cmds); + + return 0; +} + +static const struct nvgpu_os_fence_ops syncpt_ops = { + .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, + .drop_ref = nvgpu_os_fence_android_drop_ref, + .install_fence = nvgpu_os_fence_android_install_fd, +}; + +int nvgpu_os_fence_syncpt_create( + struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, + struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) +{ + struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( + nvhost_dev, id, thresh, "fence"); + + if (!fence) { + nvgpu_err(c->g, "error constructing fence %s", "fence"); + return -ENOMEM; + } + + nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); + + return 0; +} + +int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, + struct channel_gk20a *c, int fd) +{ + struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); + + if (!fence) + return -ENOMEM; + + nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); + + return 0; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h new file mode 100644 index 00000000..4dcce322 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVGPU_OS_LINUX_H +#define NVGPU_OS_LINUX_H + +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "cde.h" +#include "sched.h" + +struct nvgpu_os_linux_ops { + struct { + void (*get_program_numbers)(struct gk20a *g, + u32 block_height_log2, + u32 shader_parameter, + int *hprog, int *vprog); + bool (*need_scatter_buffer)(struct gk20a *g); + int (*populate_scatter_buffer)(struct gk20a *g, + struct sg_table *sgt, + size_t surface_size, + void *scatter_buffer_ptr, + size_t scatter_buffer_size); + } cde; +}; + +struct nvgpu_os_linux { + struct gk20a g; + struct device *dev; + + struct { + struct cdev cdev; + struct device *node; + } channel; + + struct { + struct cdev cdev; + struct device *node; + } ctrl; + + struct { + struct cdev cdev; + struct device *node; + } as_dev; + + struct { + struct cdev cdev; + struct device *node; + } dbg; + + struct { + struct cdev cdev; + struct device *node; + } prof; + + struct { + struct cdev cdev; + struct device *node; + } tsg; + + struct { + struct cdev cdev; + struct device *node; + } ctxsw; + + struct { + struct cdev cdev; + struct device *node; + } sched; + + dev_t cdev_region; + + struct devfreq *devfreq; + + struct device_dma_parameters dma_parms; + + atomic_t hw_irq_stall_count; + atomic_t hw_irq_nonstall_count; + + struct nvgpu_cond sw_irq_stall_last_handled_wq; + atomic_t sw_irq_stall_last_handled; + + atomic_t nonstall_ops; + + struct nvgpu_cond sw_irq_nonstall_last_handled_wq; + atomic_t sw_irq_nonstall_last_handled; + + struct work_struct nonstall_fn_work; + struct workqueue_struct *nonstall_work_queue; + + struct resource *reg_mem; + void __iomem *regs; + void __iomem *regs_saved; + + struct resource *bar1_mem; + void __iomem *bar1; + void __iomem *bar1_saved; + + void __iomem *usermode_regs; + void __iomem *usermode_regs_saved; + + struct nvgpu_os_linux_ops ops; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; + struct dentry *debugfs_alias; + + struct dentry *debugfs_ltc_enabled; + struct dentry *debugfs_timeouts_enabled; + struct dentry *debugfs_gr_idle_timeout_default; + struct dentry *debugfs_disable_bigpage; + struct dentry *debugfs_gr_default_attrib_cb_size; + + struct dentry *debugfs_timeslice_low_priority_us; + struct dentry *debugfs_timeslice_medium_priority_us; + struct dentry *debugfs_timeslice_high_priority_us; + struct dentry *debugfs_runlist_interleave; + struct dentry *debugfs_allocators; + struct dentry *debugfs_xve; + struct dentry *debugfs_kmem; + struct dentry *debugfs_hal; + + struct dentry *debugfs_force_preemption_cilp; + struct dentry *debugfs_force_preemption_gfxp; + struct dentry *debugfs_dump_ctxsw_stats; +#endif + DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); + + struct gk20a_cde_app cde_app; + + struct rw_semaphore busy_lock; + + struct gk20a_sched_ctrl sched_ctrl; + + bool init_done; +}; + +static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) +{ + return container_of(g, struct nvgpu_os_linux, g); +} + +static inline struct device *dev_from_gk20a(struct gk20a *g) +{ + return nvgpu_os_linux_from_gk20a(g)->dev; +} + +#define INTERFACE_NAME "nvhost%s-gpu" + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/os_sched.c b/drivers/gpu/nvgpu/os/linux/os_sched.c new file mode 100644 index 00000000..586b35eb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_sched.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include + +int nvgpu_current_tid(struct gk20a *g) +{ + return current->pid; +} + +int nvgpu_current_pid(struct gk20a *g) +{ + return current->tgid; +} diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c new file mode 100644 index 00000000..1011b441 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -0,0 +1,861 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "clk/clk.h" +#include "clk/clk_mclk.h" +#include "module.h" +#include "intr.h" +#include "sysfs.h" +#include "os_linux.h" +#include "platform_gk20a.h" +#include + +#include "pci.h" +#include "pci_usermode.h" + +#include "os_linux.h" +#include "driver_common.h" + +#define PCI_INTERFACE_NAME "card-%s%%s" + +static int nvgpu_pci_tegra_probe(struct device *dev) +{ + return 0; +} + +static int nvgpu_pci_tegra_remove(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + if (g->ops.gr.remove_gr_sysfs) + g->ops.gr.remove_gr_sysfs(g); + + return 0; +} + +static bool nvgpu_pci_tegra_is_railgated(struct device *pdev) +{ + return false; +} + +static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate) +{ + long ret = (long)rate; + + if (rate == UINT_MAX) + ret = BOOT_GPC2CLK_MHZ * 1000000UL; + + return ret; +} + +static struct gk20a_platform nvgpu_pci_device[] = { + { /* DEVICE=0x1c35 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = true, + .enable_elpg = true, + .enable_elcg = false, + .enable_slcg = true, + .enable_blcg = true, + .enable_mscg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x86063000, + .hardcode_sw_threshold = true, + .ina3221_dcb_index = 0, + .ina3221_i2c_address = 0x84, + .ina3221_i2c_port = 0x2, + }, + { /* DEVICE=0x1c36 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = true, + .enable_elpg = true, + .enable_elcg = false, + .enable_slcg = true, + .enable_blcg = true, + .enable_mscg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x86062d00, + .hardcode_sw_threshold = true, + .ina3221_dcb_index = 0, + .ina3221_i2c_address = 0x84, + .ina3221_i2c_port = 0x2, + }, + { /* DEVICE=0x1c37 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = true, + .enable_elpg = true, + .enable_elcg = false, + .enable_slcg = true, + .enable_blcg = true, + .enable_mscg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x86063000, + .hardcode_sw_threshold = true, + .ina3221_dcb_index = 0, + .ina3221_i2c_address = 0x84, + .ina3221_i2c_port = 0x2, + }, + { /* DEVICE=0x1c75 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = true, + .enable_elpg = true, + .enable_elcg = false, + .enable_slcg = true, + .enable_blcg = true, + .enable_mscg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x86065300, + .hardcode_sw_threshold = false, + .ina3221_dcb_index = 1, + .ina3221_i2c_address = 0x80, + .ina3221_i2c_port = 0x1, + }, + { /* DEVICE=PG503 SKU 201 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_mscg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x88001e00, + .hardcode_sw_threshold = false, + .run_preos = true, + }, + { /* DEVICE=PG503 SKU 200 ES */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_mscg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x88001e00, + .hardcode_sw_threshold = false, + .run_preos = true, + }, + { + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_mscg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x88000126, + .hardcode_sw_threshold = false, + .run_preos = true, + .has_syncpoints = true, + }, + { /* SKU250 */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = true, + .enable_slcg = true, + .enable_blcg = true, + .enable_mscg = false, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x1, + .hardcode_sw_threshold = false, + .run_preos = true, + .has_syncpoints = true, + }, + { /* SKU 0x1e3f */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_mscg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + /* + * WAR: PCIE X1 is very slow, set to very high value till nvlink is up + */ + .ch_wdt_timeout_ms = 30000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x1, + .hardcode_sw_threshold = false, + .unified_memory = false, + }, + { /* 0x1eba */ + /* ptimer src frequency in hz */ + .ptimer_src_freq = 31250000, + + .probe = nvgpu_pci_tegra_probe, + .remove = nvgpu_pci_tegra_remove, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = false, + .can_elpg_init = false, + .enable_elpg = false, + .enable_elcg = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_mscg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .disable_aspm = true, + + /* power management callbacks */ + .is_railgated = nvgpu_pci_tegra_is_railgated, + .clk_round_rate = nvgpu_pci_clk_round_rate, + + .ch_wdt_timeout_ms = 7000, + + .honors_aperture = true, + .dma_mask = DMA_BIT_MASK(40), + .vbios_min_version = 0x90040109, + .hardcode_sw_threshold = false, + .has_syncpoints = true, + }, +}; + +static struct pci_device_id nvgpu_pci_table[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 0, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 1, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 2, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 3, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 4, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 5, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 6, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 7, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 8, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba), + .class = PCI_BASE_CLASS_DISPLAY << 16, + .class_mask = 0xff << 16, + .driver_data = 9, + }, + {} +}; + +static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + irqreturn_t ret_stall; + irqreturn_t ret_nonstall; + + ret_stall = nvgpu_intr_stall(g); + ret_nonstall = nvgpu_intr_nonstall(g); + +#if defined(CONFIG_PCI_MSI) + /* Send MSI EOI */ + if (g->ops.xve.rearm_msi && g->msi_enabled) + g->ops.xve.rearm_msi(g); +#endif + + return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD; +} + +static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id) +{ + struct gk20a *g = dev_id; + + return nvgpu_intr_thread_stall(g); +} + +static int nvgpu_pci_init_support(struct pci_dev *pdev) +{ + int err = 0; + struct gk20a *g = get_gk20a(&pdev->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + l->regs = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (IS_ERR(l->regs)) { + nvgpu_err(g, "failed to remap gk20a registers"); + err = PTR_ERR(l->regs); + goto fail; + } + + l->bar1 = ioremap(pci_resource_start(pdev, 1), + pci_resource_len(pdev, 1)); + if (IS_ERR(l->bar1)) { + nvgpu_err(g, "failed to remap gk20a bar1"); + err = PTR_ERR(l->bar1); + goto fail; + } + + err = nvgpu_init_sim_support_linux_pci(g); + if (err) + goto fail; + err = nvgpu_init_sim_support_pci(g); + if (err) + goto fail_sim; + + nvgpu_pci_init_usermode_support(l); + + return 0; + + fail_sim: + nvgpu_remove_sim_support_linux_pci(g); + fail: + if (l->regs) { + iounmap(l->regs); + l->regs = NULL; + } + if (l->bar1) { + iounmap(l->bar1); + l->bar1 = NULL; + } + + return err; +} + +static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = S_IRUGO | S_IWUGO; + return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); +} + +static struct class nvgpu_pci_class = { + .owner = THIS_MODULE, + .name = "nvidia-pci-gpu", + .devnode = nvgpu_pci_devnode, +}; + +#ifdef CONFIG_PM +static int nvgpu_pci_pm_runtime_resume(struct device *dev) +{ + return gk20a_pm_finalize_poweron(dev); +} + +static int nvgpu_pci_pm_runtime_suspend(struct device *dev) +{ + return 0; +} + +static const struct dev_pm_ops nvgpu_pci_pm_ops = { + .runtime_resume = nvgpu_pci_pm_runtime_resume, + .runtime_suspend = nvgpu_pci_pm_runtime_suspend, + .resume = nvgpu_pci_pm_runtime_resume, + .suspend = nvgpu_pci_pm_runtime_suspend, +}; +#endif + +static int nvgpu_pci_pm_init(struct device *dev) +{ +#ifdef CONFIG_PM + struct gk20a *g = get_gk20a(dev); + + if (!g->can_railgate) { + pm_runtime_disable(dev); + } else { + if (g->railgate_delay) + pm_runtime_set_autosuspend_delay(dev, + g->railgate_delay); + + /* + * Runtime PM for PCI devices is disabled by default, + * so we need to enable it first + */ + pm_runtime_use_autosuspend(dev); + pm_runtime_put_noidle(dev); + pm_runtime_allow(dev); + } +#endif + return 0; +} + +static int nvgpu_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pent) +{ + struct gk20a_platform *platform = NULL; + struct nvgpu_os_linux *l; + struct gk20a *g; + int err; + char nodefmt[64]; + struct device_node *np; + + /* make sure driver_data is a sane index */ + if (pent->driver_data >= sizeof(nvgpu_pci_device) / + sizeof(nvgpu_pci_device[0])) { + return -EINVAL; + } + + l = kzalloc(sizeof(*l), GFP_KERNEL); + if (!l) { + dev_err(&pdev->dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + + hash_init(l->ecc_sysfs_stats_htable); + + g = &l->g; + nvgpu_init_gk20a(g); + + nvgpu_kmem_init(g); + + /* Allocate memory to hold platform data*/ + platform = (struct gk20a_platform *)nvgpu_kzalloc( g, + sizeof(struct gk20a_platform)); + if (!platform) { + dev_err(&pdev->dev, "couldn't allocate platform data"); + err = -ENOMEM; + goto err_free_l; + } + + /* copy detected device data to allocated platform space*/ + memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data], + sizeof(struct gk20a_platform)); + + pci_set_drvdata(pdev, platform); + + err = nvgpu_init_enabled_flags(g); + if (err) + goto err_free_platform; + + platform->g = g; + l->dev = &pdev->dev; + + np = nvgpu_get_node(g); + if (of_dma_is_coherent(np)) { + __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); + } + + err = pci_enable_device(pdev); + if (err) + goto err_free_platform; + pci_set_master(pdev); + + g->pci_vendor_id = pdev->vendor; + g->pci_device_id = pdev->device; + g->pci_subsystem_vendor_id = pdev->subsystem_vendor; + g->pci_subsystem_device_id = pdev->subsystem_device; + g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub + g->pci_revision = pdev->revision; + + g->ina3221_dcb_index = platform->ina3221_dcb_index; + g->ina3221_i2c_address = platform->ina3221_i2c_address; + g->ina3221_i2c_port = platform->ina3221_i2c_port; + g->hardcode_sw_threshold = platform->hardcode_sw_threshold; + +#if defined(CONFIG_PCI_MSI) + err = pci_enable_msi(pdev); + if (err) { + nvgpu_err(g, + "MSI could not be enabled, falling back to legacy"); + g->msi_enabled = false; + } else + g->msi_enabled = true; +#endif + + g->irq_stall = pdev->irq; + g->irq_nonstall = pdev->irq; + if (g->irq_stall < 0) { + err = -ENXIO; + goto err_disable_msi; + } + + err = devm_request_threaded_irq(&pdev->dev, + g->irq_stall, + nvgpu_pci_isr, + nvgpu_pci_intr_thread, +#if defined(CONFIG_PCI_MSI) + g->msi_enabled ? 0 : +#endif + IRQF_SHARED, "nvgpu", g); + if (err) { + nvgpu_err(g, + "failed to request irq @ %d", g->irq_stall); + goto err_disable_msi; + } + disable_irq(g->irq_stall); + + err = nvgpu_pci_init_support(pdev); + if (err) + goto err_free_irq; + + if (strchr(dev_name(&pdev->dev), '%')) { + nvgpu_err(g, "illegal character in device name"); + err = -EINVAL; + goto err_free_irq; + } + + snprintf(nodefmt, sizeof(nodefmt), + PCI_INTERFACE_NAME, dev_name(&pdev->dev)); + + err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class); + if (err) + goto err_free_irq; + + err = nvgpu_pci_pm_init(&pdev->dev); + if (err) { + nvgpu_err(g, "pm init failed"); + goto err_free_irq; + } + + err = nvgpu_nvlink_probe(g); + /* + * ENODEV is a legal error which means there is no NVLINK + * any other error is fatal + */ + if (err) { + if (err != -ENODEV) { + nvgpu_err(g, "fatal error probing nvlink, bailing out"); + goto err_free_irq; + } + /* Enable Semaphore SHIM on nvlink only for now. */ + __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); + g->has_syncpoints = false; + } else { + err = nvgpu_nvhost_syncpt_init(g); + if (err) { + if (err != -ENOSYS) { + nvgpu_err(g, "syncpt init failed"); + goto err_free_irq; + } + } + } + + g->mm.has_physical_mode = false; + + return 0; + +err_free_irq: + nvgpu_free_irq(g); +err_disable_msi: +#if defined(CONFIG_PCI_MSI) + if (g->msi_enabled) + pci_disable_msi(pdev); +#endif +err_free_platform: + nvgpu_kfree(g, platform); +err_free_l: + kfree(l); + return err; +} + +static void nvgpu_pci_remove(struct pci_dev *pdev) +{ + struct gk20a *g = get_gk20a(&pdev->dev); + struct device *dev = dev_from_gk20a(g); + int err; + + /* no support yet for unbind if DGPU is in VGPU mode */ + if (gk20a_gpu_is_virtual(dev)) + return; + + nvgpu_nvlink_remove(g); + + gk20a_driver_start_unload(g); + err = nvgpu_quiesce(g); + /* TODO: handle failure to idle */ + WARN(err, "gpu failed to idle during driver removal"); + + nvgpu_free_irq(g); + + nvgpu_remove(dev, &nvgpu_pci_class); + +#if defined(CONFIG_PCI_MSI) + if (g->msi_enabled) + pci_disable_msi(pdev); + else { + /* IRQ does not need to be enabled in MSI as the line is not + * shared + */ + enable_irq(g->irq_stall); + } +#endif + + /* free allocated platform data space */ + nvgpu_kfree(g, gk20a_get_platform(&pdev->dev)); + + gk20a_get_platform(&pdev->dev)->g = NULL; + gk20a_put(g); +} + +static struct pci_driver nvgpu_pci_driver = { + .name = "nvgpu", + .id_table = nvgpu_pci_table, + .probe = nvgpu_pci_probe, + .remove = nvgpu_pci_remove, +#ifdef CONFIG_PM + .driver.pm = &nvgpu_pci_pm_ops, +#endif +}; + +int __init nvgpu_pci_init(void) +{ + int ret; + + ret = class_register(&nvgpu_pci_class); + if (ret) + return ret; + + return pci_register_driver(&nvgpu_pci_driver); +} + +void __exit nvgpu_pci_exit(void) +{ + pci_unregister_driver(&nvgpu_pci_driver); + class_unregister(&nvgpu_pci_class); +} diff --git a/drivers/gpu/nvgpu/os/linux/pci.h b/drivers/gpu/nvgpu/os/linux/pci.h new file mode 100644 index 00000000..cc6b77b1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef NVGPU_PCI_H +#define NVGPU_PCI_H + +#ifdef CONFIG_GK20A_PCI +int nvgpu_pci_init(void); +void nvgpu_pci_exit(void); +#else +static inline int nvgpu_pci_init(void) { return 0; } +static inline void nvgpu_pci_exit(void) {} +#endif + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.c b/drivers/gpu/nvgpu/os/linux/pci_usermode.c new file mode 100644 index 00000000..270b834b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#include + +#include "os_linux.h" + +void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l) +{ + l->usermode_regs = l->regs + usermode_cfg0_r(); + l->usermode_regs_saved = l->usermode_regs; +} diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.h b/drivers/gpu/nvgpu/os/linux/pci_usermode.h new file mode 100644 index 00000000..25a08d28 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __NVGPU_PCI_USERMODE_H__ +#define __NVGPU_PCI_USERMODE_H__ + +struct nvgpu_os_linux; + +void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c new file mode 100644 index 00000000..2a6ace37 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include + +#include "os_linux.h" + +#include "gk20a/gk20a.h" + +#include "platform_gk20a.h" +#include "platform_gk20a_tegra.h" +#include "platform_gp10b.h" +#include "platform_gp10b_tegra.h" +#include "platform_ecc_sysfs.h" + +static u32 gen_ecc_hash_key(char *str) +{ + int i = 0; + u32 hash_key = 0x811c9dc5; + + while (str[i]) { + hash_key *= 0x1000193; + hash_key ^= (u32)(str[i]); + i++; + }; + + return hash_key; +} + +static ssize_t ecc_stat_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + const char *ecc_stat_full_name = attr->attr.name; + const char *ecc_stat_base_name; + unsigned int hw_unit; + unsigned int subunit; + struct gk20a_ecc_stat *ecc_stat; + u32 hash_key; + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit, + &subunit) == 2) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]); + hw_unit = g->gr.slices_per_ltc * hw_unit + subunit; + } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]); + } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]); + } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]); + } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]); + } else { + return snprintf(buf, + PAGE_SIZE, + "Error: Invalid ECC stat name!\n"); + } + + hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); + + hash_for_each_possible(l->ecc_sysfs_stats_htable, + ecc_stat, + hash_node, + hash_key) { + if (hw_unit >= ecc_stat->count) + continue; + if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit])) + return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]); + } + + return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); +} + +int nvgpu_gr_ecc_stat_create(struct device *dev, + int is_l2, char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat) +{ + struct gk20a *g = get_gk20a(dev); + char *ltc_unit_name = "ltc"; + char *gr_unit_name = "gpc0_tpc"; + char *lts_unit_name = "lts"; + int num_hw_units = 0; + int num_subunits = 0; + + if (is_l2 == 1) + num_hw_units = g->ltc_count; + else if (is_l2 == 2) { + num_hw_units = g->ltc_count; + num_subunits = g->gr.slices_per_ltc; + } else + num_hw_units = g->gr.tpc_count; + + + return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits, + is_l2 ? ltc_unit_name : gr_unit_name, + num_subunits ? lts_unit_name: NULL, + ecc_stat_name, + ecc_stat); +} + +int nvgpu_ecc_stat_create(struct device *dev, + int num_hw_units, int num_subunits, + char *ecc_unit_name, char *ecc_subunit_name, + char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat) +{ + int error = 0; + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int hw_unit = 0; + int subunit = 0; + int element = 0; + u32 hash_key = 0; + struct device_attribute *dev_attr_array; + + int num_elements = num_subunits ? num_subunits * num_hw_units : + num_hw_units; + + /* Allocate arrays */ + dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) * + num_elements); + ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements); + ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements); + + for (hw_unit = 0; hw_unit < num_elements; hw_unit++) { + ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) * + ECC_STAT_NAME_MAX_SIZE); + } + ecc_stat->count = num_elements; + if (num_subunits) { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + for (subunit = 0; subunit < num_subunits; subunit++) { + element = hw_unit*num_subunits + subunit; + + snprintf(ecc_stat->names[element], + ECC_STAT_NAME_MAX_SIZE, + "%s%d_%s%d_%s", + ecc_unit_name, + hw_unit, + ecc_subunit_name, + subunit, + ecc_stat_name); + + sysfs_attr_init(&dev_attr_array[element].attr); + dev_attr_array[element].attr.name = + ecc_stat->names[element]; + dev_attr_array[element].attr.mode = + VERIFY_OCTAL_PERMISSIONS(S_IRUGO); + dev_attr_array[element].show = ecc_stat_show; + dev_attr_array[element].store = NULL; + + /* Create sysfs file */ + error |= device_create_file(dev, + &dev_attr_array[element]); + + } + } + } else { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + + /* Fill in struct device_attribute members */ + snprintf(ecc_stat->names[hw_unit], + ECC_STAT_NAME_MAX_SIZE, + "%s%d_%s", + ecc_unit_name, + hw_unit, + ecc_stat_name); + + sysfs_attr_init(&dev_attr_array[hw_unit].attr); + dev_attr_array[hw_unit].attr.name = + ecc_stat->names[hw_unit]; + dev_attr_array[hw_unit].attr.mode = + VERIFY_OCTAL_PERMISSIONS(S_IRUGO); + dev_attr_array[hw_unit].show = ecc_stat_show; + dev_attr_array[hw_unit].store = NULL; + + /* Create sysfs file */ + error |= device_create_file(dev, + &dev_attr_array[hw_unit]); + } + } + + /* Add hash table entry */ + hash_key = gen_ecc_hash_key(ecc_stat_name); + hash_add(l->ecc_sysfs_stats_htable, + &ecc_stat->hash_node, + hash_key); + + ecc_stat->attr_array = dev_attr_array; + + return error; +} + +void nvgpu_gr_ecc_stat_remove(struct device *dev, + int is_l2, struct gk20a_ecc_stat *ecc_stat) +{ + struct gk20a *g = get_gk20a(dev); + int num_hw_units = 0; + int num_subunits = 0; + + if (is_l2 == 1) + num_hw_units = g->ltc_count; + else if (is_l2 == 2) { + num_hw_units = g->ltc_count; + num_subunits = g->gr.slices_per_ltc; + } else + num_hw_units = g->gr.tpc_count; + + nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat); +} + +void nvgpu_ecc_stat_remove(struct device *dev, + int num_hw_units, int num_subunits, + struct gk20a_ecc_stat *ecc_stat) +{ + struct gk20a *g = get_gk20a(dev); + struct device_attribute *dev_attr_array = ecc_stat->attr_array; + int hw_unit = 0; + int subunit = 0; + int element = 0; + int num_elements = num_subunits ? num_subunits * num_hw_units : + num_hw_units; + + /* Remove sysfs files */ + if (num_subunits) { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + for (subunit = 0; subunit < num_subunits; subunit++) { + element = hw_unit * num_subunits + subunit; + + device_remove_file(dev, + &dev_attr_array[element]); + } + } + } else { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) + device_remove_file(dev, &dev_attr_array[hw_unit]); + } + + /* Remove hash table entry */ + hash_del(&ecc_stat->hash_node); + + /* Free arrays */ + nvgpu_kfree(g, ecc_stat->counters); + + for (hw_unit = 0; hw_unit < num_elements; hw_unit++) + nvgpu_kfree(g, ecc_stat->names[hw_unit]); + + nvgpu_kfree(g, ecc_stat->names); + nvgpu_kfree(g, dev_attr_array); +} diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h new file mode 100644 index 00000000..d29f7bd3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _NVGPU_PLATFORM_SYSFS_H_ +#define _NVGPU_PLATFORM_SYSFS_H_ + +#include "gp10b/gr_gp10b.h" + +#define ECC_STAT_NAME_MAX_SIZE 100 + +int nvgpu_gr_ecc_stat_create(struct device *dev, + int is_l2, char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat); +int nvgpu_ecc_stat_create(struct device *dev, + int num_hw_units, int num_subunits, + char *ecc_unit_name, char *ecc_subunit_name, + char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat); +void nvgpu_gr_ecc_stat_remove(struct device *dev, + int is_l2, struct gk20a_ecc_stat *ecc_stat); +void nvgpu_ecc_stat_remove(struct device *dev, + int num_hw_units, int num_subunits, + struct gk20a_ecc_stat *ecc_stat); +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h new file mode 100644 index 00000000..9a99b7fe --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -0,0 +1,317 @@ +/* + * GK20A Platform (SoC) Interface + * + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _GK20A_PLATFORM_H_ +#define _GK20A_PLATFORM_H_ + +#include + +#include + +#include "gk20a/gk20a.h" + +#define GK20A_CLKS_MAX 4 + +struct gk20a; +struct channel_gk20a; +struct gr_ctx_buffer_desc; +struct gk20a_scale_profile; + +struct secure_page_buffer { + void (*destroy)(struct gk20a *, struct secure_page_buffer *); + size_t size; + dma_addr_t phys; + size_t used; +}; + +struct gk20a_platform { + /* Populated by the gk20a driver before probing the platform. */ + struct gk20a *g; + + /* Should be populated at probe. */ + bool can_railgate_init; + + /* Should be populated at probe. */ + bool can_elpg_init; + + /* Should be populated at probe. */ + bool has_syncpoints; + + /* channel limit after which to start aggressive sync destroy */ + unsigned int aggressive_sync_destroy_thresh; + + /* flag to set sync destroy aggressiveness */ + bool aggressive_sync_destroy; + + /* set if ASPM should be disabled on boot; only makes sense for PCI */ + bool disable_aspm; + + /* Set if the platform can unify the small/large address spaces. */ + bool unify_address_spaces; + + /* Clock configuration is stored here. Platform probe is responsible + * for filling this data. */ + struct clk *clk[GK20A_CLKS_MAX]; + int num_clks; + int maxmin_clk_id; + +#ifdef CONFIG_RESET_CONTROLLER + /* Reset control for device */ + struct reset_control *reset_control; +#endif + + /* Delay before rail gated */ + int railgate_delay_init; + + /* init value for slowdown factor */ + u8 ldiv_slowdown_factor_init; + + /* Second Level Clock Gating: true = enable false = disable */ + bool enable_slcg; + + /* Block Level Clock Gating: true = enable flase = disable */ + bool enable_blcg; + + /* Engine Level Clock Gating: true = enable flase = disable */ + bool enable_elcg; + + /* Should be populated at probe. */ + bool can_slcg; + + /* Should be populated at probe. */ + bool can_blcg; + + /* Should be populated at probe. */ + bool can_elcg; + + /* Engine Level Power Gating: true = enable flase = disable */ + bool enable_elpg; + + /* Adaptative ELPG: true = enable flase = disable */ + bool enable_aelpg; + + /* PMU Perfmon: true = enable false = disable */ + bool enable_perfmon; + + /* Memory System Clock Gating: true = enable flase = disable*/ + bool enable_mscg; + + /* Timeout for per-channel watchdog (in mS) */ + u32 ch_wdt_timeout_ms; + + /* Disable big page support */ + bool disable_bigpage; + + /* + * gk20a_do_idle() API can take GPU either into rail gate or CAR reset + * This flag can be used to force CAR reset case instead of rail gate + */ + bool force_reset_in_do_idle; + + /* guest/vm id, needed for IPA to PA transation */ + int vmid; + + /* Initialize the platform interface of the gk20a driver. + * + * The platform implementation of this function must + * - set the power and clocks of the gk20a device to a known + * state, and + * - populate the gk20a_platform structure (a pointer to the + * structure can be obtained by calling gk20a_get_platform). + * + * After this function is finished, the driver will initialise + * pm runtime and genpd based on the platform configuration. + */ + int (*probe)(struct device *dev); + + /* Second stage initialisation - called once all power management + * initialisations are done. + */ + int (*late_probe)(struct device *dev); + + /* Remove device after power management has been done + */ + int (*remove)(struct device *dev); + + /* Poweron platform dependencies */ + int (*busy)(struct device *dev); + + /* Powerdown platform dependencies */ + void (*idle)(struct device *dev); + + /* Preallocated VPR buffer for kernel */ + size_t secure_buffer_size; + struct secure_page_buffer secure_buffer; + + /* Device is going to be suspended */ + int (*suspend)(struct device *); + + /* Called to turn off the device */ + int (*railgate)(struct device *dev); + + /* Called to turn on the device */ + int (*unrailgate)(struct device *dev); + struct nvgpu_mutex railgate_lock; + + /* Called to check state of device */ + bool (*is_railgated)(struct device *dev); + + /* get supported frequency list */ + int (*get_clk_freqs)(struct device *pdev, + unsigned long **freqs, int *num_freqs); + + /* clk related supported functions */ + long (*clk_round_rate)(struct device *dev, + unsigned long rate); + + /* Called to register GPCPLL with common clk framework */ + int (*clk_register)(struct gk20a *g); + + /* platform specific scale init quirks */ + void (*initscale)(struct device *dev); + + /* Postscale callback is called after frequency change */ + void (*postscale)(struct device *dev, + unsigned long freq); + + /* Pre callback is called before frequency change */ + void (*prescale)(struct device *dev); + + /* Devfreq governor name. If scaling is enabled, we request + * this governor to be used in scaling */ + const char *devfreq_governor; + + /* Quality of service notifier callback. If this is set, the scaling + * routines will register a callback to Qos. Each time we receive + * a new value, this callback gets called. */ + int (*qos_notify)(struct notifier_block *nb, + unsigned long n, void *p); + + /* Called as part of debug dump. If the gpu gets hung, this function + * is responsible for delivering all necessary debug data of other + * hw units which may interact with the gpu without direct supervision + * of the CPU. + */ + void (*dump_platform_dependencies)(struct device *dev); + + /* Defined when SMMU stage-2 is enabled, and we need to use physical + * addresses (not IPA). This is the case for GV100 nvlink in HV+L + * configuration, when dGPU is in pass-through mode. + */ + u64 (*phys_addr)(struct gk20a *g, u64 ipa); + + /* Callbacks to assert/deassert GPU reset */ + int (*reset_assert)(struct device *dev); + int (*reset_deassert)(struct device *dev); + struct clk *clk_reset; + struct dvfs_rail *gpu_rail; + + bool virtual_dev; +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + void *vgpu_priv; +#endif + /* source frequency for ptimer in hz */ + u32 ptimer_src_freq; + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + bool has_cde; +#endif + + /* soc name for finding firmware files */ + const char *soc_name; + + /* false if vidmem aperture actually points to sysmem */ + bool honors_aperture; + /* unified or split memory with separate vidmem? */ + bool unified_memory; + + /* + * DMA mask for Linux (both coh and non-coh). If not set defaults to + * 0x3ffffffff (i.e a 34 bit mask). + */ + u64 dma_mask; + + /* minimum supported VBIOS version */ + u32 vbios_min_version; + + /* true if we run preos microcode on this board */ + bool run_preos; + + /* true if we need to program sw threshold for + * power limits + */ + bool hardcode_sw_threshold; + + /* i2c device index, port and address for INA3221 */ + u32 ina3221_dcb_index; + u32 ina3221_i2c_address; + u32 ina3221_i2c_port; + + /* stream id to use */ + u32 ltc_streamid; + + /* scaling rate */ + unsigned long cached_rate; +}; + +static inline struct gk20a_platform *gk20a_get_platform( + struct device *dev) +{ + return (struct gk20a_platform *)dev_get_drvdata(dev); +} + +#ifdef CONFIG_TEGRA_GK20A +extern struct gk20a_platform gm20b_tegra_platform; +extern struct gk20a_platform gp10b_tegra_platform; +extern struct gk20a_platform gv11b_tegra_platform; +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION +extern struct gk20a_platform vgpu_tegra_platform; +extern struct gk20a_platform gv11b_vgpu_tegra_platform; +#endif +#endif + +int gk20a_tegra_busy(struct device *dev); +void gk20a_tegra_idle(struct device *dev); +void gk20a_tegra_debug_dump(struct device *pdev); + +static inline struct gk20a *get_gk20a(struct device *dev) +{ + return gk20a_get_platform(dev)->g; +} +static inline struct gk20a *gk20a_from_dev(struct device *dev) +{ + if (!dev) + return NULL; + + return ((struct gk20a_platform *)dev_get_drvdata(dev))->g; +} +static inline bool gk20a_gpu_is_virtual(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + + return platform->virtual_dev; +} + +static inline int support_gk20a_pmu(struct device *dev) +{ + if (IS_ENABLED(CONFIG_GK20A_PMU)) { + /* gPMU is not supported for vgpu */ + return !gk20a_gpu_is_virtual(dev); + } + + return 0; +} + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c new file mode 100644 index 00000000..af55e5b6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c @@ -0,0 +1,957 @@ +/* + * GK20A Tegra Platform Interface + * + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_TEGRA_DVFS) +#include +#endif +#include +#include +#include +#if defined(CONFIG_COMMON_CLK) +#include +#endif +#ifdef CONFIG_TEGRA_BWMGR +#include +#endif + +#include +#include + +#include +#include +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "gm20b/clk_gm20b.h" + +#include "scale.h" +#include "platform_gk20a.h" +#include "clk.h" +#include "os_linux.h" + +#include "../../../arch/arm/mach-tegra/iomap.h" +#include + +#define TEGRA_GK20A_BW_PER_FREQ 32 +#define TEGRA_GM20B_BW_PER_FREQ 64 +#define TEGRA_DDR3_BW_PER_FREQ 16 +#define TEGRA_DDR4_BW_PER_FREQ 16 +#define MC_CLIENT_GPU 34 +#define PMC_GPU_RG_CNTRL_0 0x2d4 + +#ifdef CONFIG_COMMON_CLK +#define GPU_RAIL_NAME "vdd-gpu" +#else +#define GPU_RAIL_NAME "vdd_gpu" +#endif + +extern struct device tegra_vpr_dev; + +#ifdef CONFIG_TEGRA_BWMGR +struct gk20a_emc_params { + unsigned long bw_ratio; + unsigned long freq_last_set; + struct tegra_bwmgr_client *bwmgr_cl; +}; +#else +struct gk20a_emc_params { + unsigned long bw_ratio; + unsigned long freq_last_set; +}; +#endif + +#define MHZ_TO_HZ(x) ((x) * 1000000) +#define HZ_TO_MHZ(x) ((x) / 1000000) + +static void gk20a_tegra_secure_page_destroy(struct gk20a *g, + struct secure_page_buffer *secure_buffer) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); + dma_free_attrs(&tegra_vpr_dev, secure_buffer->size, + (void *)(uintptr_t)secure_buffer->phys, + secure_buffer->phys, __DMA_ATTR(attrs)); + + secure_buffer->destroy = NULL; +} + +static int gk20a_tegra_secure_alloc(struct gk20a *g, + struct gr_ctx_buffer_desc *desc, + size_t size) +{ + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct secure_page_buffer *secure_buffer = &platform->secure_buffer; + dma_addr_t phys; + struct sg_table *sgt; + struct page *page; + int err = 0; + size_t aligned_size = PAGE_ALIGN(size); + + if (nvgpu_mem_is_valid(&desc->mem)) + return 0; + + /* We ran out of preallocated memory */ + if (secure_buffer->used + aligned_size > secure_buffer->size) { + nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used", + size, secure_buffer->used, secure_buffer->size); + return -ENOMEM; + } + + phys = secure_buffer->phys + secure_buffer->used; + + sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt)); + if (!sgt) { + nvgpu_err(platform->g, "failed to allocate memory"); + return -ENOMEM; + } + err = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (err) { + nvgpu_err(platform->g, "failed to allocate sg_table"); + goto fail_sgt; + } + page = phys_to_page(phys); + sg_set_page(sgt->sgl, page, size, 0); + /* This bypasses SMMU for VPR during gmmu_map. */ + sg_dma_address(sgt->sgl) = 0; + + desc->destroy = NULL; + + desc->mem.priv.sgt = sgt; + desc->mem.size = size; + desc->mem.aperture = APERTURE_SYSMEM; + + secure_buffer->used += aligned_size; + + return err; + +fail_sgt: + nvgpu_kfree(platform->g, sgt); + return err; +} + +/* + * gk20a_tegra_get_emc_rate() + * + * This function returns the minimum emc clock based on gpu frequency + */ + +static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, + struct gk20a_emc_params *emc_params) +{ + unsigned long gpu_freq, gpu_fmax_at_vmin; + unsigned long emc_rate, emc_scale; + + gpu_freq = clk_get_rate(g->clk.tegra_clk); + gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t( + clk_get_parent(g->clk.tegra_clk)); + + /* When scaling emc, account for the gpu load when the + * gpu frequency is less than or equal to fmax@vmin. */ + if (gpu_freq <= gpu_fmax_at_vmin) + emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); + else + emc_scale = g->emc3d_ratio; + + emc_rate = + (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000; + + return MHZ_TO_HZ(emc_rate); +} + +/* + * gk20a_tegra_prescale(profile, freq) + * + * This function informs EDP about changed constraints. + */ + +static void gk20a_tegra_prescale(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + u32 avg = 0; + + nvgpu_pmu_load_norm(g, &avg); + tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); +} + +/* + * gk20a_tegra_calibrate_emc() + * + */ + +static void gk20a_tegra_calibrate_emc(struct device *dev, + struct gk20a_emc_params *emc_params) +{ + enum tegra_chipid cid = tegra_get_chip_id(); + long gpu_bw, emc_bw; + + /* store gpu bw based on soc */ + switch (cid) { + case TEGRA210: + gpu_bw = TEGRA_GM20B_BW_PER_FREQ; + break; + case TEGRA124: + case TEGRA132: + gpu_bw = TEGRA_GK20A_BW_PER_FREQ; + break; + default: + gpu_bw = 0; + break; + } + + /* TODO detect DDR type. + * Okay for now since DDR3 and DDR4 have the same BW ratio */ + emc_bw = TEGRA_DDR3_BW_PER_FREQ; + + /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq + * NOTE the ratio must come out as an integer */ + emc_params->bw_ratio = (gpu_bw / emc_bw); +} + +#ifdef CONFIG_TEGRA_BWMGR +#ifdef CONFIG_TEGRA_DVFS +static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb) +{ + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *params; + unsigned long rate; + + if (!profile || !profile->private_data) + return; + + params = (struct gk20a_emc_params *)profile->private_data; + rate = (enb) ? params->freq_last_set : 0; + tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR); +} +#endif + +static void gm20b_tegra_postscale(struct device *dev, unsigned long freq) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *emc_params; + unsigned long emc_rate; + + if (!profile || !profile->private_data) + return; + + emc_params = profile->private_data; + emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params); + + if (emc_rate > tegra_bwmgr_get_max_emc_rate()) + emc_rate = tegra_bwmgr_get_max_emc_rate(); + + emc_params->freq_last_set = emc_rate; + if (platform->is_railgated && platform->is_railgated(dev)) + return; + + tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate, + TEGRA_BWMGR_SET_EMC_FLOOR); + +} + +#endif + +#if defined(CONFIG_TEGRA_DVFS) +/* + * gk20a_tegra_is_railgated() + * + * Check status of gk20a power rail + */ + +static bool gk20a_tegra_is_railgated(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + bool ret = false; + + if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + ret = !tegra_dvfs_is_rail_up(platform->gpu_rail); + + return ret; +} + +/* + * gm20b_tegra_railgate() + * + * Gate (disable) gm20b power rail + */ + +static int gm20b_tegra_railgate(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + int ret = 0; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) || + !tegra_dvfs_is_rail_up(platform->gpu_rail)) + return 0; + + tegra_mc_flush(MC_CLIENT_GPU); + + udelay(10); + + /* enable clamp */ + tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0); + tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); + + udelay(10); + + platform->reset_assert(dev); + + udelay(10); + + /* + * GPCPLL is already disabled before entering this function; reference + * clocks are enabled until now - disable them just before rail gating + */ + clk_disable_unprepare(platform->clk_reset); + clk_disable_unprepare(platform->clk[0]); + clk_disable_unprepare(platform->clk[1]); + if (platform->clk[3]) + clk_disable_unprepare(platform->clk[3]); + + udelay(10); + + tegra_soctherm_gpu_tsens_invalidate(1); + + if (tegra_dvfs_is_rail_up(platform->gpu_rail)) { + ret = tegra_dvfs_rail_power_down(platform->gpu_rail); + if (ret) + goto err_power_off; + } else + pr_info("No GPU regulator?\n"); + +#ifdef CONFIG_TEGRA_BWMGR + gm20b_bwmgr_set_rate(platform, false); +#endif + + return 0; + +err_power_off: + nvgpu_err(platform->g, "Could not railgate GPU"); + return ret; +} + + +/* + * gm20b_tegra_unrailgate() + * + * Ungate (enable) gm20b power rail + */ + +static int gm20b_tegra_unrailgate(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = platform->g; + int ret = 0; + bool first = false; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + return 0; + + ret = tegra_dvfs_rail_power_up(platform->gpu_rail); + if (ret) + return ret; + +#ifdef CONFIG_TEGRA_BWMGR + gm20b_bwmgr_set_rate(platform, true); +#endif + + tegra_soctherm_gpu_tsens_invalidate(0); + + if (!platform->clk_reset) { + platform->clk_reset = clk_get(dev, "gpu_gate"); + if (IS_ERR(platform->clk_reset)) { + nvgpu_err(g, "fail to get gpu reset clk"); + goto err_clk_on; + } + } + + if (!first) { + ret = clk_prepare_enable(platform->clk_reset); + if (ret) { + nvgpu_err(g, "could not turn on gpu_gate"); + goto err_clk_on; + } + + ret = clk_prepare_enable(platform->clk[0]); + if (ret) { + nvgpu_err(g, "could not turn on gpu pll"); + goto err_clk_on; + } + ret = clk_prepare_enable(platform->clk[1]); + if (ret) { + nvgpu_err(g, "could not turn on pwr clock"); + goto err_clk_on; + } + + if (platform->clk[3]) { + ret = clk_prepare_enable(platform->clk[3]); + if (ret) { + nvgpu_err(g, "could not turn on fuse clock"); + goto err_clk_on; + } + } + } + + udelay(10); + + platform->reset_assert(dev); + + udelay(10); + + tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0); + tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); + + udelay(10); + + clk_disable(platform->clk_reset); + platform->reset_deassert(dev); + clk_enable(platform->clk_reset); + + /* Flush MC after boot/railgate/SC7 */ + tegra_mc_flush(MC_CLIENT_GPU); + + udelay(10); + + tegra_mc_flush_done(MC_CLIENT_GPU); + + udelay(10); + + return 0; + +err_clk_on: + tegra_dvfs_rail_power_down(platform->gpu_rail); + + return ret; +} +#endif + + +static struct { + char *name; + unsigned long default_rate; +} tegra_gk20a_clocks[] = { + {"gpu_ref", UINT_MAX}, + {"pll_p_out5", 204000000}, + {"emc", UINT_MAX}, + {"fuse", UINT_MAX}, +}; + + + +/* + * gk20a_tegra_get_clocks() + * + * This function finds clocks in tegra platform and populates + * the clock information to gk20a platform data. + */ + +static int gk20a_tegra_get_clocks(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + char devname[16]; + unsigned int i; + int ret = 0; + + BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks)); + + snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev)); + + platform->num_clks = 0; + for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { + long rate = tegra_gk20a_clocks[i].default_rate; + struct clk *c; + + c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); + if (IS_ERR(c)) { + ret = PTR_ERR(c); + goto err_get_clock; + } + rate = clk_round_rate(c, rate); + clk_set_rate(c, rate); + platform->clk[i] = c; + if (i == 0) + platform->cached_rate = rate; + } + platform->num_clks = i; + + return 0; + +err_get_clock: + + while (i--) + clk_put(platform->clk[i]); + return ret; +} + +#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) +static int gm20b_tegra_reset_assert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (!platform->reset_control) { + WARN(1, "Reset control not initialized\n"); + return -ENOSYS; + } + + return reset_control_assert(platform->reset_control); +} + +static int gm20b_tegra_reset_deassert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (!platform->reset_control) { + WARN(1, "Reset control not initialized\n"); + return -ENOSYS; + } + + return reset_control_deassert(platform->reset_control); +} +#endif + +static void gk20a_tegra_scale_init(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *emc_params; + struct gk20a *g = platform->g; + + if (!profile) + return; + + if (profile->private_data) + return; + + emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params)); + if (!emc_params) + return; + + emc_params->freq_last_set = -1; + gk20a_tegra_calibrate_emc(dev, emc_params); + +#ifdef CONFIG_TEGRA_BWMGR + emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); + if (!emc_params->bwmgr_cl) { + nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__); + return; + } +#endif + + profile->private_data = emc_params; +} + +static void gk20a_tegra_scale_exit(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a_emc_params *emc_params; + + if (!profile) + return; + + emc_params = profile->private_data; +#ifdef CONFIG_TEGRA_BWMGR + tegra_bwmgr_unregister(emc_params->bwmgr_cl); +#endif + + nvgpu_kfree(platform->g, profile->private_data); +} + +void gk20a_tegra_debug_dump(struct device *dev) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + + if (g->nvhost_dev) + nvgpu_nvhost_debug_dump_device(g->nvhost_dev); +#endif +} + +int gk20a_tegra_busy(struct device *dev) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + + if (g->nvhost_dev) + return nvgpu_nvhost_module_busy_ext(g->nvhost_dev); +#endif + return 0; +} + +void gk20a_tegra_idle(struct device *dev) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + + if (g->nvhost_dev) + nvgpu_nvhost_module_idle_ext(g->nvhost_dev); +#endif +} + +int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform) +{ + struct gk20a *g = platform->g; + struct secure_page_buffer *secure_buffer = &platform->secure_buffer; + DEFINE_DMA_ATTRS(attrs); + dma_addr_t iova; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + return 0; + + dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); + (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova, + GFP_KERNEL, __DMA_ATTR(attrs)); + /* Some platforms disable VPR. In that case VPR allocations always + * fail. Just disable VPR usage in nvgpu in that case. */ + if (dma_mapping_error(&tegra_vpr_dev, iova)) + return 0; + + secure_buffer->size = platform->secure_buffer_size; + secure_buffer->phys = iova; + secure_buffer->destroy = gk20a_tegra_secure_page_destroy; + + g->ops.secure_alloc = gk20a_tegra_secure_alloc; + __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true); + + return 0; +} + +#ifdef CONFIG_COMMON_CLK +static struct clk *gk20a_clk_get(struct gk20a *g) +{ + if (!g->clk.tegra_clk) { + struct clk *clk; + char clk_dev_id[32]; + struct device *dev = dev_from_gk20a(g); + + snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); + + clk = clk_get_sys(clk_dev_id, "gpu"); + if (IS_ERR(clk)) { + nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n", + clk_dev_id); + return NULL; + } + g->clk.tegra_clk = clk; + } + + return g->clk.tegra_clk; +} + +static int gm20b_clk_prepare_ops(struct clk_hw *hw) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + return gm20b_clk_prepare(clk); +} + +static void gm20b_clk_unprepare_ops(struct clk_hw *hw) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + gm20b_clk_unprepare(clk); +} + +static int gm20b_clk_is_prepared_ops(struct clk_hw *hw) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + return gm20b_clk_is_prepared(clk); +} + +static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + return gm20b_recalc_rate(clk, parent_rate); +} + +static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + return gm20b_gpcclk_set_rate(clk, rate, parent_rate); +} + +static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct clk_gk20a *clk = to_clk_gk20a(hw); + return gm20b_round_rate(clk, rate, parent_rate); +} + +static const struct clk_ops gm20b_clk_ops = { + .prepare = gm20b_clk_prepare_ops, + .unprepare = gm20b_clk_unprepare_ops, + .is_prepared = gm20b_clk_is_prepared_ops, + .recalc_rate = gm20b_recalc_rate_ops, + .set_rate = gm20b_gpcclk_set_rate_ops, + .round_rate = gm20b_round_rate_ops, +}; + +static int gm20b_register_gpcclk(struct gk20a *g) +{ + const char *parent_name = "pllg_ref"; + struct clk_gk20a *clk = &g->clk; + struct clk_init_data init; + struct clk *c; + int err = 0; + + /* make sure the clock is available */ + if (!gk20a_clk_get(g)) + return -ENOSYS; + + err = gm20b_init_clk_setup_sw(g); + if (err) + return err; + + init.name = "gpcclk"; + init.ops = &gm20b_clk_ops; + init.parent_names = &parent_name; + init.num_parents = 1; + init.flags = 0; + + /* Data in .init is copied by clk_register(), so stack variable OK */ + clk->hw.init = &init; + c = clk_register(dev_from_gk20a(g), &clk->hw); + if (IS_ERR(c)) { + nvgpu_err(g, "Failed to register GPCPLL clock"); + return -EINVAL; + } + + clk->g = g; + clk_register_clkdev(c, "gpcclk", "gpcclk"); + + return err; +} +#endif /* CONFIG_COMMON_CLK */ + +static int gk20a_tegra_probe(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct device_node *np = dev->of_node; + bool joint_xpu_rail = false; + int ret; + struct gk20a *g = platform->g; + +#ifdef CONFIG_COMMON_CLK + /* DVFS is not guaranteed to be initialized at the time of probe on + * kernels with Common Clock Framework enabled. + */ + if (!platform->gpu_rail) { + platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME); + if (!platform->gpu_rail) { + nvgpu_log_info(g, "deferring probe no gpu_rail"); + return -EPROBE_DEFER; + } + } + + if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) { + nvgpu_log_info(g, "deferring probe gpu_rail not ready"); + return -EPROBE_DEFER; + } +#endif + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + ret = nvgpu_get_nvhost_dev(platform->g); + if (ret) + return ret; +#endif + +#ifdef CONFIG_OF + joint_xpu_rail = of_property_read_bool(of_chosen, + "nvidia,tegra-joint_xpu_rail"); +#endif + + if (joint_xpu_rail) { + nvgpu_log_info(g, "XPU rails are joint\n"); + platform->g->can_railgate = false; + } + + platform->g->clk.gpc_pll.id = GK20A_GPC_PLL; + if (tegra_get_chip_id() == TEGRA210) { + /* WAR for bug 1547668: Disable railgating and scaling + irrespective of platform data if the rework was not made. */ + np = of_find_node_by_path("/gpu-dvfs-rework"); + if (!(np && of_device_is_available(np))) { + platform->devfreq_governor = ""; + dev_warn(dev, "board does not support scaling"); + } + platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1; + if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p) + platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1; + } + + if (tegra_get_chip_id() == TEGRA132) + platform->soc_name = "tegra13x"; + + gk20a_tegra_get_clocks(dev); + nvgpu_linux_init_clk_support(platform->g); + ret = gk20a_tegra_init_secure_alloc(platform); + if (ret) + return ret; + + if (platform->clk_register) { + ret = platform->clk_register(platform->g); + if (ret) + return ret; + } + + return 0; +} + +static int gk20a_tegra_late_probe(struct device *dev) +{ + return 0; +} + +static int gk20a_tegra_remove(struct device *dev) +{ + /* deinitialise tegra specific scaling quirks */ + gk20a_tegra_scale_exit(dev); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_free_nvhost_dev(get_gk20a(dev)); +#endif + + return 0; +} + +static int gk20a_tegra_suspend(struct device *dev) +{ + tegra_edp_notify_gpu_load(0, 0); + return 0; +} + +#if defined(CONFIG_COMMON_CLK) +static long gk20a_round_clk_rate(struct device *dev, unsigned long rate) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + + /* make sure the clock is available */ + if (!gk20a_clk_get(g)) + return rate; + + return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); +} + +static int gk20a_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + + /* make sure the clock is available */ + if (!gk20a_clk_get(g)) + return -ENOSYS; + + return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), + freqs, num_freqs); +} +#endif + +struct gk20a_platform gm20b_tegra_platform = { + .has_syncpoints = true, + .aggressive_sync_destroy_thresh = 64, + + /* power management configuration */ + .railgate_delay_init = 500, + .can_railgate_init = true, + .can_elpg_init = true, + .enable_slcg = true, + .enable_blcg = true, + .enable_elcg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + .enable_elpg = true, + .enable_aelpg = true, + .enable_perfmon = true, + .ptimer_src_freq = 19200000, + + .force_reset_in_do_idle = false, + + .ch_wdt_timeout_ms = 5000, + + .probe = gk20a_tegra_probe, + .late_probe = gk20a_tegra_late_probe, + .remove = gk20a_tegra_remove, + /* power management callbacks */ + .suspend = gk20a_tegra_suspend, + +#if defined(CONFIG_TEGRA_DVFS) + .railgate = gm20b_tegra_railgate, + .unrailgate = gm20b_tegra_unrailgate, + .is_railgated = gk20a_tegra_is_railgated, +#endif + + .busy = gk20a_tegra_busy, + .idle = gk20a_tegra_idle, + +#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) + .reset_assert = gm20b_tegra_reset_assert, + .reset_deassert = gm20b_tegra_reset_deassert, +#else + .reset_assert = gk20a_tegra_reset_assert, + .reset_deassert = gk20a_tegra_reset_deassert, +#endif + +#if defined(CONFIG_COMMON_CLK) + .clk_round_rate = gk20a_round_clk_rate, + .get_clk_freqs = gk20a_clk_get_freqs, +#endif + +#ifdef CONFIG_COMMON_CLK + .clk_register = gm20b_register_gpcclk, +#endif + + /* frequency scaling configuration */ + .initscale = gk20a_tegra_scale_init, + .prescale = gk20a_tegra_prescale, +#ifdef CONFIG_TEGRA_BWMGR + .postscale = gm20b_tegra_postscale, +#endif + .devfreq_governor = "nvhost_podgov", + .qos_notify = gk20a_scale_qos_notify, + + .dump_platform_dependencies = gk20a_tegra_debug_dump, + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + .has_cde = true, +#endif + + .soc_name = "tegra21x", + + .unified_memory = true, + .dma_mask = DMA_BIT_MASK(34), + + .secure_buffer_size = 335872, +}; diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h new file mode 100644 index 00000000..f7d50406 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h @@ -0,0 +1,23 @@ +/* + * GK20A Platform (SoC) Interface + * + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_ +#define _NVGPU_PLATFORM_GK20A_TEGRA_H_ + +struct gk20a_platform; + +int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h new file mode 100644 index 00000000..d256d126 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h @@ -0,0 +1,39 @@ +/* + * GP10B Platform (SoC) Interface + * + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _GP10B_PLATFORM_H_ +#define _GP10B_PLATFORM_H_ + +struct device; + +int gp10b_tegra_get_clocks(struct device *dev); +int gp10b_tegra_reset_assert(struct device *dev); +int gp10b_tegra_reset_deassert(struct device *dev); +void gp10b_tegra_scale_init(struct device *dev); +long gp10b_round_clk_rate(struct device *dev, unsigned long rate); +int gp10b_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs); +void gp10b_tegra_prescale(struct device *dev); +void gp10b_tegra_postscale(struct device *pdev, unsigned long freq); +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c new file mode 100644 index 00000000..5cb82687 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c @@ -0,0 +1,607 @@ +/* + * GP10B Tegra Platform Interface + * + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "os_linux.h" + +#include "clk.h" + +#include "gk20a/gk20a.h" + +#include "platform_gk20a.h" +#include "platform_ecc_sysfs.h" +#include "platform_gk20a_tegra.h" +#include "platform_gp10b.h" +#include "platform_gp10b_tegra.h" +#include "scale.h" + +/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ +#define GP10B_FREQ_SELECT_STEP 8 +/* Max number of freq supported in h/w */ +#define GP10B_MAX_SUPPORTED_FREQS 120 +static unsigned long +gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; + +#define TEGRA_GP10B_BW_PER_FREQ 64 +#define TEGRA_DDR4_BW_PER_FREQ 16 + +#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) + +#define GPCCLK_INIT_RATE 1000000000 + +static struct { + char *name; + unsigned long default_rate; +} tegra_gp10b_clocks[] = { + {"gpu", GPCCLK_INIT_RATE}, + {"gpu_sys", 204000000} }; + +/* + * gp10b_tegra_get_clocks() + * + * This function finds clocks in tegra platform and populates + * the clock information to gp10b platform data. + */ + +int gp10b_tegra_get_clocks(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + unsigned int i; + + platform->num_clks = 0; + for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { + long rate = tegra_gp10b_clocks[i].default_rate; + struct clk *c; + + c = clk_get(dev, tegra_gp10b_clocks[i].name); + if (IS_ERR(c)) { + nvgpu_err(platform->g, "cannot get clock %s", + tegra_gp10b_clocks[i].name); + } else { + clk_set_rate(c, rate); + platform->clk[i] = c; + if (i == 0) + platform->cached_rate = rate; + } + } + platform->num_clks = i; + + if (platform->clk[0]) { + i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, + tegra_gp10b_clocks[0].name); + if (i > 0) + platform->maxmin_clk_id = i; + } + + return 0; +} + +void gp10b_tegra_scale_init(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct tegra_bwmgr_client *bwmgr_handle; + + if (!profile) + return; + + if ((struct tegra_bwmgr_client *)profile->private_data) + return; + + bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); + if (!bwmgr_handle) + return; + + profile->private_data = (void *)bwmgr_handle; +} + +static void gp10b_tegra_scale_exit(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + if (profile) + tegra_bwmgr_unregister( + (struct tegra_bwmgr_client *)profile->private_data); +} + +static int gp10b_tegra_probe(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); +#ifdef CONFIG_TEGRA_GK20A_NVHOST + int ret; + + ret = nvgpu_get_nvhost_dev(platform->g); + if (ret) + return ret; +#endif + + ret = gk20a_tegra_init_secure_alloc(platform); + if (ret) + return ret; + + platform->disable_bigpage = !device_is_iommuable(dev); + + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + + platform->g->gr.ctx_vars.force_preemption_gfxp = false; + platform->g->gr.ctx_vars.force_preemption_cilp = false; + + gp10b_tegra_get_clocks(dev); + nvgpu_linux_init_clk_support(platform->g); + + return 0; +} + +static int gp10b_tegra_late_probe(struct device *dev) +{ + return 0; +} + +static int gp10b_tegra_remove(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + if (g->ops.gr.remove_gr_sysfs) + g->ops.gr.remove_gr_sysfs(g); + + /* deinitialise tegra specific scaling quirks */ + gp10b_tegra_scale_exit(dev); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_free_nvhost_dev(get_gk20a(dev)); +#endif + + return 0; +} + +static bool gp10b_tegra_is_railgated(struct device *dev) +{ + bool ret = false; + + if (tegra_bpmp_running()) + ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); + + return ret; +} + +static int gp10b_tegra_railgate(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + /* remove emc frequency floor */ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + 0, TEGRA_BWMGR_SET_EMC_FLOOR); + + if (tegra_bpmp_running() && + tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { + int i; + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_disable_unprepare(platform->clk[i]); + } + tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); + } + return 0; +} + +static int gp10b_tegra_unrailgate(struct device *dev) +{ + int ret = 0; + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + if (tegra_bpmp_running()) { + int i; + ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_prepare_enable(platform->clk[i]); + } + } + + /* to start with set emc frequency floor to max rate*/ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + tegra_bwmgr_get_max_emc_rate(), + TEGRA_BWMGR_SET_EMC_FLOOR); + return ret; +} + +static int gp10b_tegra_suspend(struct device *dev) +{ + return 0; +} + +int gp10b_tegra_reset_assert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + int ret = 0; + + if (!platform->reset_control) + return -EINVAL; + + ret = reset_control_assert(platform->reset_control); + + return ret; +} + +int gp10b_tegra_reset_deassert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + int ret = 0; + + if (!platform->reset_control) + return -EINVAL; + + ret = reset_control_deassert(platform->reset_control); + + return ret; +} + +void gp10b_tegra_prescale(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + u32 avg = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_load_norm(g, &avg); + + nvgpu_log_fn(g, "done"); +} + +void gp10b_tegra_postscale(struct device *pdev, + unsigned long freq) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a *g = get_gk20a(pdev); + unsigned long emc_rate; + + nvgpu_log_fn(g, " "); + if (profile && !platform->is_railgated(pdev)) { + unsigned long emc_scale; + + if (freq <= gp10b_freq_table[0]) + emc_scale = 0; + else + emc_scale = g->emc3d_ratio; + + emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; + + if (emc_rate > tegra_bwmgr_get_max_emc_rate()) + emc_rate = tegra_bwmgr_get_max_emc_rate(); + + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); + } + nvgpu_log_fn(g, "done"); +} + +long gp10b_round_clk_rate(struct device *dev, unsigned long rate) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + unsigned long *freq_table = profile->devfreq_profile.freq_table; + int max_states = profile->devfreq_profile.max_state; + int i; + + for (i = 0; i < max_states; ++i) + if (freq_table[i] >= rate) + return freq_table[i]; + + return freq_table[max_states - 1]; +} + +int gp10b_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + unsigned long max_rate; + unsigned long new_rate = 0, prev_rate = 0; + int i = 0, freq_counter = 0; + + max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); + + /* + * Walk the h/w frequency table and only select + * GP10B_FREQ_SELECT_STEP'th frequencies and + * add MAX freq to last + */ + for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { + prev_rate = new_rate; + new_rate = clk_round_rate(platform->clk[0], prev_rate + 1); + + if (i % GP10B_FREQ_SELECT_STEP == 0 || + new_rate == max_rate) { + gp10b_freq_table[freq_counter++] = new_rate; + + if (new_rate == max_rate) + break; + } + } + + WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS); + + /* Fill freq table */ + *freqs = gp10b_freq_table; + *num_freqs = freq_counter; + + nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", + gp10b_freq_table[0], max_rate, *num_freqs); + + return 0; +} + +struct gk20a_platform gp10b_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .railgate_delay_init = 500, + + /* ldiv slowdown factor */ + .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, + + /* power management configuration */ + .can_railgate_init = true, + .enable_elpg = true, + .can_elpg_init = true, + .enable_blcg = true, + .enable_slcg = true, + .enable_elcg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + .enable_aelpg = true, + .enable_perfmon = true, + + /* ptimer src frequency in hz*/ + .ptimer_src_freq = 31250000, + + .ch_wdt_timeout_ms = 5000, + + .probe = gp10b_tegra_probe, + .late_probe = gp10b_tegra_late_probe, + .remove = gp10b_tegra_remove, + + /* power management callbacks */ + .suspend = gp10b_tegra_suspend, + .railgate = gp10b_tegra_railgate, + .unrailgate = gp10b_tegra_unrailgate, + .is_railgated = gp10b_tegra_is_railgated, + + .busy = gk20a_tegra_busy, + .idle = gk20a_tegra_idle, + + .dump_platform_dependencies = gk20a_tegra_debug_dump, + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + .has_cde = true, +#endif + + .clk_round_rate = gp10b_round_clk_rate, + .get_clk_freqs = gp10b_clk_get_freqs, + + /* frequency scaling configuration */ + .initscale = gp10b_tegra_scale_init, + .prescale = gp10b_tegra_prescale, + .postscale = gp10b_tegra_postscale, + .devfreq_governor = "nvhost_podgov", + + .qos_notify = gk20a_scale_qos_notify, + + .reset_assert = gp10b_tegra_reset_assert, + .reset_deassert = gp10b_tegra_reset_deassert, + + .force_reset_in_do_idle = false, + + .soc_name = "tegra18x", + + .unified_memory = true, + .dma_mask = DMA_BIT_MASK(36), + + .ltc_streamid = TEGRA_SID_GPUB, + + .secure_buffer_size = 401408, +}; + +void gr_gp10b_create_sysfs(struct gk20a *g) +{ + int error = 0; + struct device *dev = dev_from_gk20a(g); + + /* This stat creation function is called on GR init. GR can get + initialized multiple times but we only need to create the ECC + stats once. Therefore, add the following check to avoid + creating duplicate stat sysfs nodes. */ + if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL) + return; + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_lrf_ecc_single_err_count", + &g->ecc.gr.sm_lrf_single_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_lrf_ecc_double_err_count", + &g->ecc.gr.sm_lrf_double_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_shm_ecc_sec_count", + &g->ecc.gr.sm_shm_sec_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_shm_ecc_sed_count", + &g->ecc.gr.sm_shm_sed_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_shm_ecc_ded_count", + &g->ecc.gr.sm_shm_ded_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_total_sec_pipe0_count", + &g->ecc.gr.tex_total_sec_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_total_ded_pipe0_count", + &g->ecc.gr.tex_total_ded_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_unique_sec_pipe0_count", + &g->ecc.gr.tex_unique_sec_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_unique_ded_pipe0_count", + &g->ecc.gr.tex_unique_ded_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_total_sec_pipe1_count", + &g->ecc.gr.tex_total_sec_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_total_ded_pipe1_count", + &g->ecc.gr.tex_total_ded_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_unique_sec_pipe1_count", + &g->ecc.gr.tex_unique_sec_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "tex_ecc_unique_ded_pipe1_count", + &g->ecc.gr.tex_unique_ded_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 2, + "ecc_sec_count", + &g->ecc.ltc.l2_sec_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 2, + "ecc_ded_count", + &g->ecc.ltc.l2_ded_count); + + if (error) + dev_err(dev, "Failed to create sysfs attributes!\n"); +} + +void gr_gp10b_remove_sysfs(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + if (!g->ecc.gr.sm_lrf_single_err_count.counters) + return; + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_lrf_single_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_lrf_double_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_shm_sec_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_shm_sed_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_shm_ded_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_total_sec_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_total_ded_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_unique_sec_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_unique_ded_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_total_sec_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_total_ded_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_unique_sec_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.tex_unique_ded_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, + 2, + &g->ecc.ltc.l2_sec_count); + + nvgpu_gr_ecc_stat_remove(dev, + 2, + &g->ecc.ltc.l2_ded_count); +} diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h new file mode 100644 index 00000000..6de90275 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _PLATFORM_GP10B_TEGRA_H_ +#define _PLATFORM_GP10B_TEGRA_H_ + +#include "gp10b/gr_gp10b.h" +#include "platform_ecc_sysfs.h" + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c new file mode 100644 index 00000000..d62e7932 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -0,0 +1,588 @@ +/* + * GV11B Tegra Platform Interface + * + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "clk.h" +#include "scale.h" + +#include "platform_gp10b.h" +#include "platform_gp10b_tegra.h" +#include "platform_ecc_sysfs.h" + +#include "os_linux.h" +#include "platform_gk20a_tegra.h" +#include "gv11b/gr_gv11b.h" + +static void gv11b_tegra_scale_exit(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + if (profile) + tegra_bwmgr_unregister( + (struct tegra_bwmgr_client *)profile->private_data); +} + +static int gv11b_tegra_probe(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + int err; + + err = nvgpu_nvhost_syncpt_init(platform->g); + if (err) { + if (err != -ENOSYS) + return err; + } + + err = gk20a_tegra_init_secure_alloc(platform); + if (err) + return err; + + platform->disable_bigpage = !device_is_iommuable(dev); + + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + + platform->g->gr.ctx_vars.force_preemption_gfxp = false; + platform->g->gr.ctx_vars.force_preemption_cilp = false; + + gp10b_tegra_get_clocks(dev); + nvgpu_linux_init_clk_support(platform->g); + + return 0; +} + +static int gv11b_tegra_late_probe(struct device *dev) +{ + return 0; +} + + +static int gv11b_tegra_remove(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + if (g->ops.gr.remove_gr_sysfs) + g->ops.gr.remove_gr_sysfs(g); + + gv11b_tegra_scale_exit(dev); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_free_nvhost_dev(get_gk20a(dev)); +#endif + + return 0; +} + +static bool gv11b_tegra_is_railgated(struct device *dev) +{ + bool ret = false; +#ifdef TEGRA194_POWER_DOMAIN_GPU + struct gk20a *g = get_gk20a(dev); + + if (tegra_bpmp_running()) { + nvgpu_log(g, gpu_dbg_info, "bpmp running"); + ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU); + + nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no"); + } else { + nvgpu_log(g, gpu_dbg_info, "bpmp not running"); + } +#endif + return ret; +} + +static int gv11b_tegra_railgate(struct device *dev) +{ +#ifdef TEGRA194_POWER_DOMAIN_GPU + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a *g = get_gk20a(dev); + int i; + + /* remove emc frequency floor */ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + 0, TEGRA_BWMGR_SET_EMC_FLOOR); + + if (tegra_bpmp_running()) { + nvgpu_log(g, gpu_dbg_info, "bpmp running"); + if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) { + nvgpu_log(g, gpu_dbg_info, "powergate is not powered"); + return 0; + } + nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare"); + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_disable_unprepare(platform->clk[i]); + } + nvgpu_log(g, gpu_dbg_info, "powergate_partition"); + tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU); + } else { + nvgpu_log(g, gpu_dbg_info, "bpmp not running"); + } +#endif + return 0; +} + +static int gv11b_tegra_unrailgate(struct device *dev) +{ + int ret = 0; +#ifdef TEGRA194_POWER_DOMAIN_GPU + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + int i; + + if (tegra_bpmp_running()) { + nvgpu_log(g, gpu_dbg_info, "bpmp running"); + ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU); + if (ret) { + nvgpu_log(g, gpu_dbg_info, + "unpowergate partition failed"); + return ret; + } + nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable"); + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_prepare_enable(platform->clk[i]); + } + } else { + nvgpu_log(g, gpu_dbg_info, "bpmp not running"); + } + + /* to start with set emc frequency floor to max rate*/ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + tegra_bwmgr_get_max_emc_rate(), + TEGRA_BWMGR_SET_EMC_FLOOR); +#endif + return ret; +} + +static int gv11b_tegra_suspend(struct device *dev) +{ + return 0; +} + +struct gk20a_platform gv11b_tegra_platform = { + .has_syncpoints = true, + + /* ptimer src frequency in hz*/ + .ptimer_src_freq = 31250000, + + .ch_wdt_timeout_ms = 5000, + + .probe = gv11b_tegra_probe, + .late_probe = gv11b_tegra_late_probe, + .remove = gv11b_tegra_remove, + .railgate_delay_init = 500, + .can_railgate_init = true, + + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + .enable_slcg = true, + .enable_blcg = true, + .enable_elcg = true, + .enable_perfmon = true, + + /* power management configuration */ + .enable_elpg = true, + .can_elpg_init = true, + .enable_aelpg = true, + + /* power management callbacks */ + .suspend = gv11b_tegra_suspend, + .railgate = gv11b_tegra_railgate, + .unrailgate = gv11b_tegra_unrailgate, + .is_railgated = gv11b_tegra_is_railgated, + + .busy = gk20a_tegra_busy, + .idle = gk20a_tegra_idle, + + .clk_round_rate = gp10b_round_clk_rate, + .get_clk_freqs = gp10b_clk_get_freqs, + + /* frequency scaling configuration */ + .initscale = gp10b_tegra_scale_init, + .prescale = gp10b_tegra_prescale, + .postscale = gp10b_tegra_postscale, + .devfreq_governor = "nvhost_podgov", + + .qos_notify = gk20a_scale_qos_notify, + + .dump_platform_dependencies = gk20a_tegra_debug_dump, + + .soc_name = "tegra19x", + + .honors_aperture = true, + .unified_memory = true, + .dma_mask = DMA_BIT_MASK(36), + + .reset_assert = gp10b_tegra_reset_assert, + .reset_deassert = gp10b_tegra_reset_deassert, + + .secure_buffer_size = 667648, +}; + +void gr_gv11b_create_sysfs(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + int error = 0; + + /* This stat creation function is called on GR init. GR can get + initialized multiple times but we only need to create the ECC + stats once. Therefore, add the following check to avoid + creating duplicate stat sysfs nodes. */ + if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL) + return; + + gr_gp10b_create_sysfs(g); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_l1_tag_ecc_corrected_err_count", + &g->ecc.gr.sm_l1_tag_corrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_l1_tag_ecc_uncorrected_err_count", + &g->ecc.gr.sm_l1_tag_uncorrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_cbu_ecc_corrected_err_count", + &g->ecc.gr.sm_cbu_corrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_cbu_ecc_uncorrected_err_count", + &g->ecc.gr.sm_cbu_uncorrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_l1_data_ecc_corrected_err_count", + &g->ecc.gr.sm_l1_data_corrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_l1_data_ecc_uncorrected_err_count", + &g->ecc.gr.sm_l1_data_uncorrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_icache_ecc_corrected_err_count", + &g->ecc.gr.sm_icache_corrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "sm_icache_ecc_uncorrected_err_count", + &g->ecc.gr.sm_icache_uncorrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "gcc_l15_ecc_corrected_err_count", + &g->ecc.gr.gcc_l15_corrected_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, + 0, + "gcc_l15_ecc_uncorrected_err_count", + &g->ecc.gr.gcc_l15_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->ltc_count, + 0, + "ltc", + NULL, + "l2_cache_uncorrected_err_count", + &g->ecc.ltc.l2_cache_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->ltc_count, + 0, + "ltc", + NULL, + "l2_cache_corrected_err_count", + &g->ecc.ltc.l2_cache_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "gpc", + NULL, + "fecs_ecc_uncorrected_err_count", + &g->ecc.gr.fecs_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "gpc", + NULL, + "fecs_ecc_corrected_err_count", + &g->ecc.gr.fecs_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->gr.gpc_count, + 0, + "gpc", + NULL, + "gpccs_ecc_uncorrected_err_count", + &g->ecc.gr.gpccs_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->gr.gpc_count, + 0, + "gpc", + NULL, + "gpccs_ecc_corrected_err_count", + &g->ecc.gr.gpccs_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->gr.gpc_count, + 0, + "gpc", + NULL, + "mmu_l1tlb_ecc_uncorrected_err_count", + &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + g->gr.gpc_count, + 0, + "gpc", + NULL, + "mmu_l1tlb_ecc_corrected_err_count", + &g->ecc.gr.mmu_l1tlb_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_l2tlb_ecc_uncorrected_err_count", + &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_l2tlb_ecc_corrected_err_count", + &g->ecc.fb.mmu_l2tlb_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_hubtlb_ecc_uncorrected_err_count", + &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_hubtlb_ecc_corrected_err_count", + &g->ecc.fb.mmu_hubtlb_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_fillunit_ecc_uncorrected_err_count", + &g->ecc.fb.mmu_fillunit_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "mmu_fillunit_ecc_corrected_err_count", + &g->ecc.fb.mmu_fillunit_corrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "pmu_ecc_uncorrected_err_count", + &g->ecc.pmu.pmu_uncorrected_err_count); + + error |= nvgpu_ecc_stat_create(dev, + 1, + 0, + "eng", + NULL, + "pmu_ecc_corrected_err_count", + &g->ecc.pmu.pmu_corrected_err_count); + + if (error) + dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); +} + +void gr_gv11b_remove_sysfs(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + + if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters) + return; + gr_gp10b_remove_sysfs(g); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_l1_tag_corrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_l1_tag_uncorrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_cbu_corrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_cbu_uncorrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_l1_data_corrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_l1_data_uncorrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_icache_corrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.sm_icache_uncorrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.gcc_l15_corrected_err_count); + + nvgpu_gr_ecc_stat_remove(dev, + 0, + &g->ecc.gr.gcc_l15_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->ltc_count, + 0, + &g->ecc.ltc.l2_cache_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->ltc_count, + 0, + &g->ecc.ltc.l2_cache_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.gr.fecs_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.gr.fecs_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->gr.gpc_count, + 0, + &g->ecc.gr.gpccs_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->gr.gpc_count, + 0, + &g->ecc.gr.gpccs_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->gr.gpc_count, + 0, + &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + g->gr.gpc_count, + 0, + &g->ecc.gr.mmu_l1tlb_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_l2tlb_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_hubtlb_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_fillunit_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.fb.mmu_fillunit_corrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.pmu.pmu_uncorrected_err_count); + + nvgpu_ecc_stat_remove(dev, + 1, + 0, + &g->ecc.pmu.pmu_corrected_err_count); +} diff --git a/drivers/gpu/nvgpu/os/linux/rwsem.c b/drivers/gpu/nvgpu/os/linux/rwsem.c new file mode 100644 index 00000000..297ddf11 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/rwsem.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem) +{ + init_rwsem(&rwsem->rwsem); +} + +void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem) +{ + up_read(&rwsem->rwsem); +} + +void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem) +{ + down_read(&rwsem->rwsem); +} + +void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem) +{ + up_write(&rwsem->rwsem); +} + +void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem) +{ + down_write(&rwsem->rwsem); +} diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c new file mode 100644 index 00000000..84ac1cfd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/scale.c @@ -0,0 +1,428 @@ +/* + * gk20a clock scaling profile + * + * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include +#include + +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "scale.h" +#include "os_linux.h" + +/* + * gk20a_scale_qos_notify() + * + * This function is called when the minimum QoS requirement for the device + * has changed. The function calls postscaling callback if it is defined. + */ + +#if defined(CONFIG_COMMON_CLK) +int gk20a_scale_qos_notify(struct notifier_block *nb, + unsigned long n, void *p) +{ + struct gk20a_scale_profile *profile = + container_of(nb, struct gk20a_scale_profile, + qos_notify_block); + struct gk20a *g = get_gk20a(profile->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct devfreq *devfreq = l->devfreq; + + if (!devfreq) + return NOTIFY_OK; + + mutex_lock(&devfreq->lock); + /* check for pm_qos min and max frequency requirement */ + profile->qos_min_freq = + (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; + profile->qos_max_freq = + (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; + + if (profile->qos_min_freq > profile->qos_max_freq) { + nvgpu_err(g, + "QoS: setting invalid limit, min_freq=%lu max_freq=%lu", + profile->qos_min_freq, profile->qos_max_freq); + profile->qos_min_freq = profile->qos_max_freq; + } + + update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + + return NOTIFY_OK; +} +#else +int gk20a_scale_qos_notify(struct notifier_block *nb, + unsigned long n, void *p) +{ + struct gk20a_scale_profile *profile = + container_of(nb, struct gk20a_scale_profile, + qos_notify_block); + struct gk20a_platform *platform = dev_get_drvdata(profile->dev); + struct gk20a *g = get_gk20a(profile->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + unsigned long freq; + + if (!platform->postscale) + return NOTIFY_OK; + + /* get the frequency requirement. if devfreq is enabled, check if it + * has higher demand than qos */ + freq = platform->clk_round_rate(profile->dev, + (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS)); + if (l->devfreq) + freq = max(l->devfreq->previous_freq, freq); + + /* Update gpu load because we may scale the emc target + * if the gpu load changed. */ + nvgpu_pmu_load_update(g); + platform->postscale(profile->dev, freq); + + return NOTIFY_OK; +} +#endif + +/* + * gk20a_scale_make_freq_table(profile) + * + * This function initialises the frequency table for the given device profile + */ + +static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) +{ + struct gk20a_platform *platform = dev_get_drvdata(profile->dev); + int num_freqs, err; + unsigned long *freqs; + + if (platform->get_clk_freqs) { + /* get gpu frequency table */ + err = platform->get_clk_freqs(profile->dev, &freqs, + &num_freqs); + if (err) + return -ENOSYS; + } else + return -ENOSYS; + + profile->devfreq_profile.freq_table = (unsigned long *)freqs; + profile->devfreq_profile.max_state = num_freqs; + + return 0; +} + +/* + * gk20a_scale_target(dev, *freq, flags) + * + * This function scales the clock + */ + +static int gk20a_scale_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = platform->g; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_scale_profile *profile = g->scale_profile; + struct devfreq *devfreq = l->devfreq; + unsigned long local_freq = *freq; + unsigned long rounded_rate; + unsigned long min_freq = 0, max_freq = 0; + + /* + * Calculate floor and cap frequency values + * + * Policy : + * We have two APIs to clip the frequency + * 1. devfreq + * 2. pm_qos + * + * To calculate floor (min) freq, we select MAX of floor frequencies + * requested from both APIs + * To get cap (max) freq, we select MIN of max frequencies + * + * In case we have conflict (min_freq > max_freq) after above + * steps, we ensure that max_freq wins over min_freq + */ + min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq); + max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq); + + if (min_freq > max_freq) + min_freq = max_freq; + + /* Clip requested frequency */ + if (local_freq < min_freq) + local_freq = min_freq; + + if (local_freq > max_freq) + local_freq = max_freq; + + /* set the final frequency */ + rounded_rate = platform->clk_round_rate(dev, local_freq); + + /* Check for duplicate request */ + if (rounded_rate == g->last_freq) + return 0; + + if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate) + *freq = rounded_rate; + else { + g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); + *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); + } + + g->last_freq = *freq; + + /* postscale will only scale emc (dram clock) if evaluating + * gk20a_tegra_get_emc_rate() produces a new or different emc + * target because the load or_and gpufreq has changed */ + if (platform->postscale) + platform->postscale(dev, rounded_rate); + + return 0; +} + +/* + * update_load_estimate_gpmu(profile) + * + * Update load estimate using gpmu. The gpmu value is normalised + * based on the time it was asked last time. + */ + +static void update_load_estimate_gpmu(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + unsigned long dt; + u32 busy_time; + ktime_t t; + + t = ktime_get(); + dt = ktime_us_delta(t, profile->last_event_time); + + profile->dev_stat.total_time = dt; + profile->last_event_time = t; + nvgpu_pmu_load_norm(g, &busy_time); + profile->dev_stat.busy_time = (busy_time * dt) / 1000; +} + +/* + * gk20a_scale_suspend(dev) + * + * This function informs devfreq of suspend + */ + +void gk20a_scale_suspend(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct devfreq *devfreq = l->devfreq; + + if (!devfreq) + return; + + devfreq_suspend_device(devfreq); +} + +/* + * gk20a_scale_resume(dev) + * + * This functions informs devfreq of resume + */ + +void gk20a_scale_resume(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct devfreq *devfreq = l->devfreq; + + if (!devfreq) + return; + + g->last_freq = 0; + devfreq_resume_device(devfreq); +} + +/* + * gk20a_scale_get_dev_status(dev, *stat) + * + * This function queries the current device status. + */ + +static int gk20a_scale_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + struct gk20a_platform *platform = dev_get_drvdata(dev); + + /* update the software shadow */ + nvgpu_pmu_load_update(g); + + /* inform edp about new constraint */ + if (platform->prescale) + platform->prescale(dev); + + /* Make sure there are correct values for the current frequency */ + profile->dev_stat.current_frequency = + g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); + + /* Update load estimate */ + update_load_estimate_gpmu(dev); + + /* Copy the contents of the current device status */ + *stat = profile->dev_stat; + + /* Finally, clear out the local values */ + profile->dev_stat.total_time = 0; + profile->dev_stat.busy_time = 0; + + return 0; +} + +/* + * get_cur_freq(struct device *dev, unsigned long *freq) + * + * This function gets the current GPU clock rate. + */ + +static int get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct gk20a *g = get_gk20a(dev); + *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); + return 0; +} + + +/* + * gk20a_scale_init(dev) + */ + +void gk20a_scale_init(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = platform->g; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_scale_profile *profile; + int err; + + if (g->scale_profile) + return; + + if (!platform->devfreq_governor && !platform->qos_notify) + return; + + profile = nvgpu_kzalloc(g, sizeof(*profile)); + + profile->dev = dev; + profile->dev_stat.busy = false; + + /* Create frequency table */ + err = gk20a_scale_make_freq_table(profile); + if (err || !profile->devfreq_profile.max_state) + goto err_get_freqs; + + profile->qos_min_freq = 0; + profile->qos_max_freq = UINT_MAX; + + /* Store device profile so we can access it if devfreq governor + * init needs that */ + g->scale_profile = profile; + + if (platform->devfreq_governor) { + struct devfreq *devfreq; + + profile->devfreq_profile.initial_freq = + profile->devfreq_profile.freq_table[0]; + profile->devfreq_profile.target = gk20a_scale_target; + profile->devfreq_profile.get_dev_status = + gk20a_scale_get_dev_status; + profile->devfreq_profile.get_cur_freq = get_cur_freq; + profile->devfreq_profile.polling_ms = 25; + + devfreq = devfreq_add_device(dev, + &profile->devfreq_profile, + platform->devfreq_governor, NULL); + + if (IS_ERR(devfreq)) + devfreq = NULL; + + l->devfreq = devfreq; + } + + /* Should we register QoS callback for this device? */ + if (platform->qos_notify) { + profile->qos_notify_block.notifier_call = + platform->qos_notify; + + pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &profile->qos_notify_block); + pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &profile->qos_notify_block); + } + + return; + +err_get_freqs: + nvgpu_kfree(g, profile); +} + +void gk20a_scale_exit(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = platform->g; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int err; + + if (platform->qos_notify) { + pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &g->scale_profile->qos_notify_block); + pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &g->scale_profile->qos_notify_block); + } + + if (platform->devfreq_governor) { + err = devfreq_remove_device(l->devfreq); + l->devfreq = NULL; + } + + nvgpu_kfree(g, g->scale_profile); + g->scale_profile = NULL; +} + +/* + * gk20a_scale_hw_init(dev) + * + * Initialize hardware portion of the device + */ + +void gk20a_scale_hw_init(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + /* make sure that scaling has bee initialised */ + if (!profile) + return; + + profile->dev_stat.total_time = 0; + profile->last_event_time = ktime_get(); +} diff --git a/drivers/gpu/nvgpu/os/linux/scale.h b/drivers/gpu/nvgpu/os/linux/scale.h new file mode 100644 index 00000000..c1e6fe86 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/scale.h @@ -0,0 +1,66 @@ +/* + * gk20a clock scaling profile + * + * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef GK20A_SCALE_H +#define GK20A_SCALE_H + +#include + +struct clk; + +struct gk20a_scale_profile { + struct device *dev; + ktime_t last_event_time; + struct devfreq_dev_profile devfreq_profile; + struct devfreq_dev_status dev_stat; + struct notifier_block qos_notify_block; + unsigned long qos_min_freq; + unsigned long qos_max_freq; + void *private_data; +}; + +/* Initialization and de-initialization for module */ +void gk20a_scale_init(struct device *); +void gk20a_scale_exit(struct device *); +void gk20a_scale_hw_init(struct device *dev); + +#if defined(CONFIG_GK20A_DEVFREQ) +/* + * call when performing submit to notify scaling mechanism that the module is + * in use + */ +void gk20a_scale_notify_busy(struct device *); +void gk20a_scale_notify_idle(struct device *); + +void gk20a_scale_suspend(struct device *); +void gk20a_scale_resume(struct device *); +int gk20a_scale_qos_notify(struct notifier_block *nb, + unsigned long n, void *p); +#else +static inline void gk20a_scale_notify_busy(struct device *dev) {} +static inline void gk20a_scale_notify_idle(struct device *dev) {} +static inline void gk20a_scale_suspend(struct device *dev) {} +static inline void gk20a_scale_resume(struct device *dev) {} +static inline int gk20a_scale_qos_notify(struct notifier_block *nb, + unsigned long n, void *p) +{ + return -ENOSYS; +} +#endif + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c new file mode 100644 index 00000000..2ad5aabf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sched.c @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/gr_gk20a.h" +#include "sched.h" +#include "os_linux.h" +#include "ioctl_tsg.h" + +#include +#include + +ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, + size_t size, loff_t *off) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + struct nvgpu_sched_event_arg event = { 0 }; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, + "filp=%p buf=%p size=%zu", filp, buf, size); + + if (size < sizeof(event)) + return -EINVAL; + size = sizeof(event); + + nvgpu_mutex_acquire(&sched->status_lock); + while (!sched->status) { + nvgpu_mutex_release(&sched->status_lock); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq, + sched->status, 0); + if (err) + return err; + nvgpu_mutex_acquire(&sched->status_lock); + } + + event.reserved = 0; + event.status = sched->status; + + if (copy_to_user(buf, &event, size)) { + nvgpu_mutex_release(&sched->status_lock); + return -EFAULT; + } + + sched->status = 0; + + nvgpu_mutex_release(&sched->status_lock); + + return size; +} + +unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + unsigned int mask = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); + + nvgpu_mutex_acquire(&sched->status_lock); + poll_wait(filp, &sched->readout_wq.wq, wait); + if (sched->status) + mask |= POLLIN | POLLRDNORM; + nvgpu_mutex_release(&sched->status_lock); + + return mask; +} + +static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_args *arg) +{ + struct gk20a *g = sched->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", + arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + nvgpu_mutex_acquire(&sched->status_lock); + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + sched->active_tsg_bitmap, sched->bitmap_size)) { + nvgpu_mutex_release(&sched->status_lock); + return -EFAULT; + } + nvgpu_mutex_release(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_args *arg) +{ + struct gk20a *g = sched->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", + arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + nvgpu_mutex_acquire(&sched->status_lock); + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + sched->recent_tsg_bitmap, sched->bitmap_size)) { + nvgpu_mutex_release(&sched->status_lock); + return -EFAULT; + } + + memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); + nvgpu_mutex_release(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_get_tsgs_by_pid_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u64 *bitmap; + unsigned int tsgid; + /* pid at user level corresponds to kernel tgid */ + pid_t tgid = (pid_t)arg->pid; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", + (pid_t)arg->pid, arg->size, arg->buffer); + + if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { + arg->size = sched->bitmap_size; + return -ENOSPC; + } + + bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size); + if (!bitmap) + return -ENOMEM; + + nvgpu_mutex_acquire(&sched->status_lock); + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { + tsg = &f->tsg[tsgid]; + if (tsg->tgid == tgid) + NVGPU_SCHED_SET(tsgid, bitmap); + } + } + nvgpu_mutex_release(&sched->status_lock); + + if (copy_to_user((void __user *)(uintptr_t)arg->buffer, + bitmap, sched->bitmap_size)) + err = -EFAULT; + + nvgpu_kfree(sched->g, bitmap); + + return err; +} + +static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_get_params_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + nvgpu_speculation_barrier(); + + tsg = &f->tsg[tsgid]; + if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) + return -ENXIO; + + arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ + arg->runlist_interleave = tsg->interleave_level; + arg->timeslice = tsg->timeslice_us; + + arg->graphics_preempt_mode = + tsg->gr_ctx.graphics_preempt_mode; + arg->compute_preempt_mode = + tsg->gr_ctx.compute_preempt_mode; + + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + + return 0; +} + +static int gk20a_sched_dev_ioctl_tsg_set_timeslice( + struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_timeslice_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + nvgpu_speculation_barrier(); + + tsg = &f->tsg[tsgid]; + if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) + return -ENXIO; + + err = gk20a_busy(g); + if (err) + goto done; + + err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); + + gk20a_idle(g); + +done: + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + + return err; +} + +static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( + struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_runlist_interleave_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + nvgpu_speculation_barrier(); + + tsg = &f->tsg[tsgid]; + if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) + return -ENXIO; + + err = gk20a_busy(g); + if (err) + goto done; + + err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); + + gk20a_idle(g); + +done: + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + + return err; +} + +static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched) +{ + struct gk20a *g = sched->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); + + nvgpu_mutex_acquire(&sched->control_lock); + sched->control_locked = true; + nvgpu_mutex_release(&sched->control_lock); + return 0; +} + +static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched) +{ + struct gk20a *g = sched->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); + + nvgpu_mutex_acquire(&sched->control_lock); + sched->control_locked = false; + nvgpu_mutex_release(&sched->control_lock); + return 0; +} + +static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_api_version_args *args) +{ + struct gk20a *g = sched->g; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); + + args->version = NVGPU_SCHED_API_VERSION; + return 0; +} + +static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_refcount_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + nvgpu_speculation_barrier(); + + tsg = &f->tsg[tsgid]; + if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) + return -ENXIO; + + nvgpu_mutex_acquire(&sched->status_lock); + if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + nvgpu_warn(g, "tsgid=%d already referenced", tsgid); + /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */ + nvgpu_mutex_release(&sched->status_lock); + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + return -ENXIO; + } + + /* keep reference on TSG, will be released on + * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close + */ + NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); + nvgpu_mutex_release(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_refcount_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + nvgpu_speculation_barrier(); + + nvgpu_mutex_acquire(&sched->status_lock); + if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + nvgpu_mutex_release(&sched->status_lock); + nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid); + return -ENXIO; + } + NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); + nvgpu_mutex_release(&sched->status_lock); + + tsg = &f->tsg[tsgid]; + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + + return 0; +} + +int gk20a_sched_dev_open(struct inode *inode, struct file *filp) +{ + struct nvgpu_os_linux *l = container_of(inode->i_cdev, + struct nvgpu_os_linux, sched.cdev); + struct gk20a *g; + struct gk20a_sched_ctrl *sched; + int err = 0; + + g = gk20a_get(&l->g); + if (!g) + return -ENODEV; + sched = &l->sched_ctrl; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); + + if (!sched->sw_ready) { + err = gk20a_busy(g); + if (err) + goto free_ref; + + gk20a_idle(g); + } + + if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) { + err = -EBUSY; + goto free_ref; + } + + memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, + sched->bitmap_size); + memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); + + filp->private_data = sched; + nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched); + +free_ref: + if (err) + gk20a_put(g); + return err; +} + +long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; + int err = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); + + if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || + (_IOC_NR(cmd) == 0) || + (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || + (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) + return -EINVAL; + + memset(buf, 0, sizeof(buf)); + if (_IOC_DIR(cmd) & _IOC_WRITE) { + if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) + return -EFAULT; + } + + switch (cmd) { + case NVGPU_SCHED_IOCTL_GET_TSGS: + err = gk20a_sched_dev_ioctl_get_tsgs(sched, + (struct nvgpu_sched_get_tsgs_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: + err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched, + (struct nvgpu_sched_get_tsgs_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: + err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched, + (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: + err = gk20a_sched_dev_ioctl_get_params(sched, + (struct nvgpu_sched_tsg_get_params_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: + err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched, + (struct nvgpu_sched_tsg_timeslice_args *)buf); + break; + case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: + err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched, + (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); + break; + case NVGPU_SCHED_IOCTL_LOCK_CONTROL: + err = gk20a_sched_dev_ioctl_lock_control(sched); + break; + case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: + err = gk20a_sched_dev_ioctl_unlock_control(sched); + break; + case NVGPU_SCHED_IOCTL_GET_API_VERSION: + err = gk20a_sched_dev_ioctl_get_api_version(sched, + (struct nvgpu_sched_api_version_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_TSG: + err = gk20a_sched_dev_ioctl_get_tsg(sched, + (struct nvgpu_sched_tsg_refcount_args *)buf); + break; + case NVGPU_SCHED_IOCTL_PUT_TSG: + err = gk20a_sched_dev_ioctl_put_tsg(sched, + (struct nvgpu_sched_tsg_refcount_args *)buf); + break; + default: + nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); + err = -ENOTTY; + } + + /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on + * purpose with NULL buffer and/or zero size to discover TSG bitmap + * size. We need to update user arguments in this case too, even + * if we return an error. + */ + if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { + if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) + err = -EFAULT; + } + + return err; +} + +int gk20a_sched_dev_release(struct inode *inode, struct file *filp) +{ + struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + unsigned int tsgid; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); + + /* release any reference to TSGs */ + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + tsg = &f->tsg[tsgid]; + nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); + } + } + + /* unlock control */ + nvgpu_mutex_acquire(&sched->control_lock); + sched->control_locked = false; + nvgpu_mutex_release(&sched->control_lock); + + nvgpu_mutex_release(&sched->busy_lock); + gk20a_put(g); + return 0; +} + +void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + int err; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + if (!sched->sw_ready) { + err = gk20a_busy(g); + if (err) { + WARN_ON(err); + return; + } + + gk20a_idle(g); + } + + nvgpu_mutex_acquire(&sched->status_lock); + NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); + NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); + sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; + nvgpu_mutex_release(&sched->status_lock); + nvgpu_cond_signal_interruptible(&sched->readout_wq); +} + +void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); + + nvgpu_mutex_acquire(&sched->status_lock); + NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); + + /* clear recent_tsg_bitmap as well: if app manager did not + * notice that TSG was previously added, no need to notify it + * if the TSG has been released in the meantime. If the + * TSG gets reallocated, app manager will be notified as usual. + */ + NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); + + /* do not set event_pending, we only want to notify app manager + * when TSGs are added, so that it can apply sched params + */ + nvgpu_mutex_release(&sched->status_lock); +} + +int gk20a_sched_ctrl_init(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + struct fifo_gk20a *f = &g->fifo; + int err; + + if (sched->sw_ready) + return 0; + + sched->g = g; + sched->bitmap_size = roundup(f->num_channels, 64) / 8; + sched->status = 0; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", + g, sched, sched->bitmap_size); + + sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); + if (!sched->active_tsg_bitmap) + return -ENOMEM; + + sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); + if (!sched->recent_tsg_bitmap) { + err = -ENOMEM; + goto free_active; + } + + sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); + if (!sched->ref_tsg_bitmap) { + err = -ENOMEM; + goto free_recent; + } + + nvgpu_cond_init(&sched->readout_wq); + + err = nvgpu_mutex_init(&sched->status_lock); + if (err) + goto free_ref; + + err = nvgpu_mutex_init(&sched->control_lock); + if (err) + goto free_status_lock; + + err = nvgpu_mutex_init(&sched->busy_lock); + if (err) + goto free_control_lock; + + sched->sw_ready = true; + + return 0; + +free_control_lock: + nvgpu_mutex_destroy(&sched->control_lock); +free_status_lock: + nvgpu_mutex_destroy(&sched->status_lock); +free_ref: + nvgpu_kfree(g, sched->ref_tsg_bitmap); +free_recent: + nvgpu_kfree(g, sched->recent_tsg_bitmap); +free_active: + nvgpu_kfree(g, sched->active_tsg_bitmap); + + return err; +} + +void gk20a_sched_ctrl_cleanup(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct gk20a_sched_ctrl *sched = &l->sched_ctrl; + + nvgpu_kfree(g, sched->active_tsg_bitmap); + nvgpu_kfree(g, sched->recent_tsg_bitmap); + nvgpu_kfree(g, sched->ref_tsg_bitmap); + sched->active_tsg_bitmap = NULL; + sched->recent_tsg_bitmap = NULL; + sched->ref_tsg_bitmap = NULL; + + nvgpu_mutex_destroy(&sched->status_lock); + nvgpu_mutex_destroy(&sched->control_lock); + nvgpu_mutex_destroy(&sched->busy_lock); + + sched->sw_ready = false; +} diff --git a/drivers/gpu/nvgpu/os/linux/sched.h b/drivers/gpu/nvgpu/os/linux/sched.h new file mode 100644 index 00000000..a699bbea --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sched.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __NVGPU_SCHED_H +#define __NVGPU_SCHED_H + +struct gk20a; +struct gpu_ops; +struct tsg_gk20a; +struct poll_table_struct; + +struct gk20a_sched_ctrl { + struct gk20a *g; + + struct nvgpu_mutex control_lock; + bool control_locked; + bool sw_ready; + struct nvgpu_mutex status_lock; + struct nvgpu_mutex busy_lock; + + u64 status; + + size_t bitmap_size; + u64 *active_tsg_bitmap; + u64 *recent_tsg_bitmap; + u64 *ref_tsg_bitmap; + + struct nvgpu_cond readout_wq; +}; + +int gk20a_sched_dev_release(struct inode *inode, struct file *filp); +int gk20a_sched_dev_open(struct inode *inode, struct file *filp); +long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); +ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); +unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); + +void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); +void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); +int gk20a_sched_ctrl_init(struct gk20a *); + +void gk20a_sched_ctrl_cleanup(struct gk20a *g); + +#endif /* __NVGPU_SCHED_H */ diff --git a/drivers/gpu/nvgpu/os/linux/sim.c b/drivers/gpu/nvgpu/os/linux/sim.c new file mode 100644 index 00000000..8e964f39 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sim.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "gk20a/gk20a.h" +#include "platform_gk20a.h" +#include "os_linux.h" +#include "module.h" + +void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) +{ + struct sim_nvgpu_linux *sim_linux = + container_of(sim, struct sim_nvgpu_linux, sim); + + writel(v, sim_linux->regs + r); +} + +u32 sim_readl(struct sim_nvgpu *sim, u32 r) +{ + struct sim_nvgpu_linux *sim_linux = + container_of(sim, struct sim_nvgpu_linux, sim); + + return readl(sim_linux->regs + r); +} + +void nvgpu_remove_sim_support_linux(struct gk20a *g) +{ + struct sim_nvgpu_linux *sim_linux; + + if (!g->sim) + return; + + sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); + if (sim_linux->regs) { + sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); + iounmap(sim_linux->regs); + sim_linux->regs = NULL; + } + nvgpu_kfree(g, sim_linux); + g->sim = NULL; +} + +int nvgpu_init_sim_support_linux(struct gk20a *g, + struct platform_device *dev) +{ + struct sim_nvgpu_linux *sim_linux; + int err = -ENOMEM; + + if (!nvgpu_platform_is_simulation(g)) + return 0; + + sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); + if (!sim_linux) + return err; + g->sim = &sim_linux->sim; + g->sim->g = g; + sim_linux->regs = nvgpu_ioremap_resource(dev, + GK20A_SIM_IORESOURCE_MEM, + &sim_linux->reg_mem); + if (IS_ERR(sim_linux->regs)) { + nvgpu_err(g, "failed to remap gk20a sim regs"); + err = PTR_ERR(sim_linux->regs); + goto fail; + } + sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux; + return 0; + +fail: + nvgpu_remove_sim_support_linux(g); + return err; +} diff --git a/drivers/gpu/nvgpu/os/linux/sim_pci.c b/drivers/gpu/nvgpu/os/linux/sim_pci.c new file mode 100644 index 00000000..d37767b7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sim_pci.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "gk20a/gk20a.h" +#include "os_linux.h" +#include "module.h" + +static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base) +{ + u32 cfg; + bool is_simulation = false; + + cfg = nvgpu_readl(g, sim_base + sim_config_r()); + if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v()) + is_simulation = true; + + return is_simulation; +} + +void nvgpu_remove_sim_support_linux_pci(struct gk20a *g) +{ + struct sim_nvgpu_linux *sim_linux; + bool is_simulation; + + is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); + + if (!is_simulation) { + return; + } + + if (!g->sim) { + nvgpu_warn(g, "sim_gk20a not allocated"); + return; + } + sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); + + if (sim_linux->regs) { + sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); + sim_linux->regs = NULL; + } + nvgpu_kfree(g, sim_linux); + g->sim = NULL; +} + +int nvgpu_init_sim_support_linux_pci(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct sim_nvgpu_linux *sim_linux; + int err = -ENOMEM; + bool is_simulation; + + is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); + __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation); + + if (!is_simulation) + return 0; + + sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); + if (!sim_linux) + return err; + g->sim = &sim_linux->sim; + g->sim->g = g; + sim_linux->regs = l->regs + sim_r(); + sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci; + + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/soc.c b/drivers/gpu/nvgpu/os/linux/soc.c new file mode 100644 index 00000000..1b27d6f1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/soc.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#ifdef CONFIG_TEGRA_HV_MANAGER +#include +#endif + +#include +#include "os_linux.h" +#include "platform_gk20a.h" + +bool nvgpu_platform_is_silicon(struct gk20a *g) +{ + return tegra_platform_is_silicon(); +} + +bool nvgpu_platform_is_simulation(struct gk20a *g) +{ + return tegra_platform_is_vdk(); +} + +bool nvgpu_platform_is_fpga(struct gk20a *g) +{ + return tegra_platform_is_fpga(); +} + +bool nvgpu_is_hypervisor_mode(struct gk20a *g) +{ + return is_tegra_hypervisor_mode(); +} + +bool nvgpu_is_bpmp_running(struct gk20a *g) +{ + return tegra_bpmp_running(); +} + +bool nvgpu_is_soc_t194_a01(struct gk20a *g) +{ + return ((tegra_get_chip_id() == TEGRA194 && + tegra_chip_get_revision() == TEGRA194_REVISION_A01) ? + true : false); +} + +#ifdef CONFIG_TEGRA_HV_MANAGER +/* When nvlink is enabled on dGPU, we need to use physical memory addresses. + * There is no SMMU translation. However, the device initially enumerates as a + * PCIe device. As such, when allocation memory for this PCIe device, the DMA + * framework ends up allocating memory using SMMU (if enabled in device tree). + * As a result, when we switch to nvlink, we need to use underlying physical + * addresses, even if memory mappings exist in SMMU. + * In addition, when stage-2 SMMU translation is enabled (for instance when HV + * is enabled), the addresses we get from dma_alloc are IPAs. We need to + * convert them to PA. + */ +static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) +{ + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct hyp_ipa_pa_info info; + int err; + u64 pa = 0ULL; + + err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa); + if (err < 0) { + /* WAR for bug 2096877 + * hyp_read_ipa_pa_info only looks up RAM mappings. + * assume one to one IPA:PA mapping for syncpt aperture + */ + u64 start = g->syncpt_unit_base; + u64 end = g->syncpt_unit_base + g->syncpt_unit_size; + if ((ipa >= start) && (ipa < end)) { + pa = ipa; + nvgpu_log(g, gpu_dbg_map_v, + "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n", + ipa, platform->vmid, pa); + } else { + nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d", + ipa, platform->vmid, err); + } + } else { + pa = info.base + info.offset; + nvgpu_log(g, gpu_dbg_map_v, + "ipa=%llx vmid=%d -> pa=%llx " + "base=%llx offset=%llx size=%llx\n", + ipa, platform->vmid, pa, info.base, + info.offset, info.size); + } + return pa; +} +#endif + +int nvgpu_init_soc_vars(struct gk20a *g) +{ +#ifdef CONFIG_TEGRA_HV_MANAGER + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = gk20a_get_platform(dev); + int err; + + if (nvgpu_is_hypervisor_mode(g)) { + err = hyp_read_gid(&platform->vmid); + if (err) { + nvgpu_err(g, "failed to read vmid"); + return err; + } + platform->phys_addr = nvgpu_tegra_hv_ipa_pa; + } +#endif + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.c b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c new file mode 100644 index 00000000..4dd10e6e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c @@ -0,0 +1,419 @@ +/* + * Semaphore Sync Framework Integration + * + * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "../linux/channel.h" + +#include "../drivers/staging/android/sync.h" + +#include "sync_sema_android.h" + +static const struct sync_timeline_ops gk20a_sync_timeline_ops; + +struct gk20a_sync_timeline { + struct sync_timeline obj; + u32 max; + u32 min; +}; + +/** + * The sync framework dups pts when merging fences. We share a single + * refcounted gk20a_sync_pt for each duped pt. + */ +struct gk20a_sync_pt { + struct gk20a *g; + struct nvgpu_ref refcount; + u32 thresh; + struct nvgpu_semaphore *sema; + struct gk20a_sync_timeline *obj; + + /* + * Use a spin lock here since it will have better performance + * than a mutex - there should be very little contention on this + * lock. + */ + struct nvgpu_spinlock lock; +}; + +struct gk20a_sync_pt_inst { + struct sync_pt pt; + struct gk20a_sync_pt *shared; +}; + +/** + * Compares sync pt values a and b, both of which will trigger either before + * or after ref (i.e. a and b trigger before ref, or a and b trigger after + * ref). Supplying ref allows us to handle wrapping correctly. + * + * Returns -1 if a < b (a triggers before b) + * 0 if a = b (a and b trigger at the same time) + * 1 if a > b (b triggers before a) + */ +static int __gk20a_sync_pt_compare_ref( + u32 ref, + u32 a, + u32 b) +{ + /* + * We normalize both a and b by subtracting ref from them. + * Denote the normalized values by a_n and b_n. Note that because + * of wrapping, a_n and/or b_n may be negative. + * + * The normalized values a_n and b_n satisfy: + * - a positive value triggers before a negative value + * - a smaller positive value triggers before a greater positive value + * - a smaller negative value (greater in absolute value) triggers + * before a greater negative value (smaller in absolute value). + * + * Thus we can just stick to unsigned arithmetic and compare + * (u32)a_n to (u32)b_n. + * + * Just to reiterate the possible cases: + * + * 1A) ...ref..a....b.... + * 1B) ...ref..b....a.... + * 2A) ...b....ref..a.... b_n < 0 + * 2B) ...a....ref..b.... a_n > 0 + * 3A) ...a....b....ref.. a_n < 0, b_n < 0 + * 3A) ...b....a....ref.. a_n < 0, b_n < 0 + */ + u32 a_n = a - ref; + u32 b_n = b - ref; + if (a_n < b_n) + return -1; + else if (a_n > b_n) + return 1; + else + return 0; +} + +static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) +{ + struct gk20a_sync_pt_inst *pti = + container_of(pt, struct gk20a_sync_pt_inst, pt); + return pti->shared; +} +static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) +{ + if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) + return NULL; + return (struct gk20a_sync_timeline *)obj; +} + +static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) +{ + struct gk20a_sync_pt *pt = + container_of(ref, struct gk20a_sync_pt, refcount); + struct gk20a *g = pt->g; + + if (pt->sema) + nvgpu_semaphore_put(pt->sema); + nvgpu_kfree(g, pt); +} + +static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( + struct gk20a *g, + struct gk20a_sync_timeline *obj, + struct nvgpu_semaphore *sema) +{ + struct gk20a_sync_pt *shared; + + shared = nvgpu_kzalloc(g, sizeof(*shared)); + if (!shared) + return NULL; + + nvgpu_ref_init(&shared->refcount); + shared->g = g; + shared->obj = obj; + shared->sema = sema; + shared->thresh = ++obj->max; /* sync framework has a lock */ + + nvgpu_spinlock_init(&shared->lock); + + nvgpu_semaphore_get(sema); + + return shared; +} + +static struct sync_pt *gk20a_sync_pt_create_inst( + struct gk20a *g, + struct gk20a_sync_timeline *obj, + struct nvgpu_semaphore *sema) +{ + struct gk20a_sync_pt_inst *pti; + + pti = (struct gk20a_sync_pt_inst *) + sync_pt_create(&obj->obj, sizeof(*pti)); + if (!pti) + return NULL; + + pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); + if (!pti->shared) { + sync_pt_free(&pti->pt); + return NULL; + } + return &pti->pt; +} + +static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) +{ + struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); + if (pt) + nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); +} + +static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) +{ + struct gk20a_sync_pt_inst *pti; + struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); + + pti = (struct gk20a_sync_pt_inst *) + sync_pt_create(&pt->obj->obj, sizeof(*pti)); + if (!pti) + return NULL; + pti->shared = pt; + nvgpu_ref_get(&pt->refcount); + return &pti->pt; +} + +/* + * This function must be able to run on the same sync_pt concurrently. This + * requires a lock to protect access to the sync_pt's internal data structures + * which are modified as a side effect of calling this function. + */ +static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) +{ + struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); + struct gk20a_sync_timeline *obj = pt->obj; + bool signaled = true; + + nvgpu_spinlock_acquire(&pt->lock); + if (!pt->sema) + goto done; + + /* Acquired == not realeased yet == active == not signaled. */ + signaled = !nvgpu_semaphore_is_acquired(pt->sema); + + if (signaled) { + /* Update min if necessary. */ + if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, + obj->min) == 1) + obj->min = pt->thresh; + + /* Release the semaphore to the pool. */ + nvgpu_semaphore_put(pt->sema); + pt->sema = NULL; + } +done: + nvgpu_spinlock_release(&pt->lock); + + return signaled; +} + +static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) +{ + bool a_expired; + bool b_expired; + struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); + struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); + + if (WARN_ON(pt_a->obj != pt_b->obj)) + return 0; + + /* Early out */ + if (a == b) + return 0; + + a_expired = gk20a_sync_pt_has_signaled(a); + b_expired = gk20a_sync_pt_has_signaled(b); + if (a_expired && !b_expired) { + /* Easy, a was earlier */ + return -1; + } else if (!a_expired && b_expired) { + /* Easy, b was earlier */ + return 1; + } + + /* Both a and b are expired (trigger before min) or not + * expired (trigger after min), so we can use min + * as a reference value for __gk20a_sync_pt_compare_ref. + */ + return __gk20a_sync_pt_compare_ref(pt_a->obj->min, + pt_a->thresh, pt_b->thresh); +} + +static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) +{ + return obj->min; +} + +static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, + char *str, int size) +{ + struct gk20a_sync_timeline *obj = + (struct gk20a_sync_timeline *)timeline; + snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); +} + +static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, + char *str, int size) +{ + struct nvgpu_semaphore *s = pt->sema; + + snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", + s->location.pool->page_idx, + nvgpu_semaphore_get_value(s), + nvgpu_semaphore_read(s)); +} + +static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, + int size) +{ + struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); + + if (pt->sema) { + gk20a_sync_pt_value_str_for_sema(pt, str, size); + return; + } + + snprintf(str, size, "%d", pt->thresh); +} + +static const struct sync_timeline_ops gk20a_sync_timeline_ops = { + .driver_name = "nvgpu_semaphore", + .dup = gk20a_sync_pt_dup_inst, + .has_signaled = gk20a_sync_pt_has_signaled, + .compare = gk20a_sync_pt_compare, + .free_pt = gk20a_sync_pt_free_inst, + .timeline_value_str = gk20a_sync_timeline_value_str, + .pt_value_str = gk20a_sync_pt_value_str, +}; + +/* Public API */ + +struct sync_fence *gk20a_sync_fence_fdget(int fd) +{ + struct sync_fence *fence = sync_fence_fdget(fd); + int i; + + if (!fence) + return NULL; + + for (i = 0; i < fence->num_fences; i++) { + struct fence *pt = fence->cbs[i].sync_pt; + struct sync_pt *spt = sync_pt_from_fence(pt); + struct sync_timeline *t; + + if (spt == NULL) { + sync_fence_put(fence); + return NULL; + } + + t = sync_pt_parent(spt); + if (t->ops != &gk20a_sync_timeline_ops) { + sync_fence_put(fence); + return NULL; + } + } + + return fence; +} + +struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) +{ + struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); + struct nvgpu_semaphore *sema; + + nvgpu_spinlock_acquire(&pt->lock); + sema = pt->sema; + if (sema) + nvgpu_semaphore_get(sema); + nvgpu_spinlock_release(&pt->lock); + + return sema; +} + +void gk20a_sync_timeline_signal(struct sync_timeline *timeline) +{ + sync_timeline_signal(timeline, 0); +} + +void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) +{ + sync_timeline_destroy(timeline); +} + +struct sync_timeline *gk20a_sync_timeline_create( + const char *name) +{ + struct gk20a_sync_timeline *obj; + + obj = (struct gk20a_sync_timeline *) + sync_timeline_create(&gk20a_sync_timeline_ops, + sizeof(struct gk20a_sync_timeline), + name); + if (!obj) + return NULL; + obj->max = 0; + obj->min = 0; + return &obj->obj; +} + +struct sync_fence *gk20a_sync_fence_create( + struct channel_gk20a *c, + struct nvgpu_semaphore *sema, + const char *fmt, ...) +{ + char name[30]; + va_list args; + struct sync_pt *pt; + struct sync_fence *fence; + struct gk20a *g = c->g; + + struct nvgpu_channel_linux *os_channel_priv = c->os_priv; + struct nvgpu_os_fence_framework *fence_framework = NULL; + struct gk20a_sync_timeline *timeline = NULL; + + fence_framework = &os_channel_priv->fence_framework; + + timeline = to_gk20a_timeline(fence_framework->timeline); + + pt = gk20a_sync_pt_create_inst(g, timeline, sema); + if (pt == NULL) + return NULL; + + va_start(args, fmt); + vsnprintf(name, sizeof(name), fmt, args); + va_end(args); + + fence = sync_fence_create(name, pt); + if (fence == NULL) { + sync_pt_free(pt); + return NULL; + } + return fence; +} diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.h b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h new file mode 100644 index 00000000..4fca7bed --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h @@ -0,0 +1,51 @@ +/* + * Semaphore Sync Framework Integration + * + * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _GK20A_SYNC_H_ +#define _GK20A_SYNC_H_ + +struct sync_timeline; +struct sync_fence; +struct sync_pt; +struct nvgpu_semaphore; +struct fence; + +#ifdef CONFIG_SYNC +struct sync_timeline *gk20a_sync_timeline_create(const char *name); +void gk20a_sync_timeline_destroy(struct sync_timeline *); +void gk20a_sync_timeline_signal(struct sync_timeline *); +struct sync_fence *gk20a_sync_fence_create( + struct channel_gk20a *c, + struct nvgpu_semaphore *, + const char *fmt, ...); +struct sync_fence *gk20a_sync_fence_fdget(int fd); +struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt); +#else +static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} +static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} +static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) +{ + return NULL; +} +static inline struct sync_timeline *gk20a_sync_timeline_create( + const char *name) { + return NULL; +} +#endif + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c new file mode 100644 index 00000000..e5995bb8 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -0,0 +1,1205 @@ +/* + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include + +#include "sysfs.h" +#include "platform_gk20a.h" +#include "gk20a/pmu_gk20a.h" +#include "gk20a/gr_gk20a.h" +#include "gv11b/gr_gv11b.h" + +#define PTIMER_FP_FACTOR 1000000 + +#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) + +static ssize_t elcg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + if (val) { + g->elcg_enabled = true; + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); + } else { + g->elcg_enabled = false; + gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); + } + + gk20a_idle(g); + + nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t elcg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store); + +static ssize_t blcg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) + g->blcg_enabled = true; + else + g->blcg_enabled = false; + + err = gk20a_busy(g); + if (err) + return err; + + if (g->ops.clock_gating.blcg_bus_load_gating_prod) + g->ops.clock_gating.blcg_bus_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_ce_load_gating_prod) + g->ops.clock_gating.blcg_ce_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) + g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_fb_load_gating_prod) + g->ops.clock_gating.blcg_fb_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_fifo_load_gating_prod) + g->ops.clock_gating.blcg_fifo_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_gr_load_gating_prod) + g->ops.clock_gating.blcg_gr_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_ltc_load_gating_prod) + g->ops.clock_gating.blcg_ltc_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_pmu_load_gating_prod) + g->ops.clock_gating.blcg_pmu_load_gating_prod(g, + g->blcg_enabled); + if (g->ops.clock_gating.blcg_xbar_load_gating_prod) + g->ops.clock_gating.blcg_xbar_load_gating_prod(g, + g->blcg_enabled); + gk20a_idle(g); + + nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t blcg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0); +} + + +static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store); + +static ssize_t slcg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) + g->slcg_enabled = true; + else + g->slcg_enabled = false; + + /* + * TODO: slcg_therm_load_gating is not enabled anywhere during + * init. Therefore, it would be incongruous to add it here. Once + * it is added to init, we should add it here too. + */ + err = gk20a_busy(g); + if (err) + return err; + + if (g->ops.clock_gating.slcg_bus_load_gating_prod) + g->ops.clock_gating.slcg_bus_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_ce2_load_gating_prod) + g->ops.clock_gating.slcg_ce2_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) + g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) + g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_fb_load_gating_prod) + g->ops.clock_gating.slcg_fb_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_fifo_load_gating_prod) + g->ops.clock_gating.slcg_fifo_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_gr_load_gating_prod) + g->ops.clock_gating.slcg_gr_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_ltc_load_gating_prod) + g->ops.clock_gating.slcg_ltc_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_perf_load_gating_prod) + g->ops.clock_gating.slcg_perf_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_priring_load_gating_prod) + g->ops.clock_gating.slcg_priring_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_pmu_load_gating_prod) + g->ops.clock_gating.slcg_pmu_load_gating_prod(g, + g->slcg_enabled); + if (g->ops.clock_gating.slcg_xbar_load_gating_prod) + g->ops.clock_gating.slcg_xbar_load_gating_prod(g, + g->slcg_enabled); + gk20a_idle(g); + + nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t slcg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store); + +static ssize_t ptimer_scale_factor_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + u32 src_freq_hz = platform->ptimer_src_freq; + u32 scaling_factor_fp; + ssize_t res; + + if (!src_freq_hz) { + nvgpu_err(g, "reference clk_m rate is not set correctly"); + return -EINVAL; + } + + scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / + ((u32)(src_freq_hz) / + (u32)(PTIMER_FP_FACTOR)); + res = snprintf(buf, + PAGE_SIZE, + "%u.%u\n", + scaling_factor_fp / PTIMER_FP_FACTOR, + scaling_factor_fp % PTIMER_FP_FACTOR); + + return res; + +} + +static DEVICE_ATTR(ptimer_scale_factor, + S_IRUGO, + ptimer_scale_factor_show, + NULL); + +static ssize_t ptimer_ref_freq_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + u32 src_freq_hz = platform->ptimer_src_freq; + ssize_t res; + + if (!src_freq_hz) { + nvgpu_err(g, "reference clk_m rate is not set correctly"); + return -EINVAL; + } + + res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ); + + return res; + +} + +static DEVICE_ATTR(ptimer_ref_freq, + S_IRUGO, + ptimer_ref_freq_show, + NULL); + +static ssize_t ptimer_src_freq_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + u32 src_freq_hz = platform->ptimer_src_freq; + ssize_t res; + + if (!src_freq_hz) { + nvgpu_err(g, "reference clk_m rate is not set correctly"); + return -EINVAL; + } + + res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz); + + return res; + +} + +static DEVICE_ATTR(ptimer_src_freq, + S_IRUGO, + ptimer_src_freq_show, + NULL); + + +#if defined(CONFIG_PM) +static ssize_t railgate_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long railgate_enable = 0; + /* dev is guaranteed to be valid here. Ok to de-reference */ + struct gk20a *g = get_gk20a(dev); + int err; + + if (kstrtoul(buf, 10, &railgate_enable) < 0) + return -EINVAL; + + if (railgate_enable && !g->can_railgate) { + g->can_railgate = true; + pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); + } else if (railgate_enable == 0 && g->can_railgate) { + g->can_railgate = false; + pm_runtime_set_autosuspend_delay(dev, -1); + } + /* wake-up system to make rail-gating setting effective */ + err = gk20a_busy(g); + if (err) + return err; + gk20a_idle(g); + + nvgpu_info(g, "railgate is %s.", g->can_railgate ? + "enabled" : "disabled"); + + return count; +} + +static ssize_t railgate_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0); +} + +static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read, + railgate_enable_store); +#endif + +static ssize_t railgate_delay_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int railgate_delay = 0, ret = 0; + struct gk20a *g = get_gk20a(dev); + int err; + + if (!g->can_railgate) { + nvgpu_info(g, "does not support power-gating"); + return count; + } + + ret = sscanf(buf, "%d", &railgate_delay); + if (ret == 1 && railgate_delay >= 0) { + g->railgate_delay = railgate_delay; + pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); + } else + nvgpu_err(g, "Invalid powergate delay"); + + /* wake-up system to make rail-gating delay effective immediately */ + err = gk20a_busy(g); + if (err) + return err; + gk20a_idle(g); + + return count; +} +static ssize_t railgate_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay); +} +static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show, + railgate_delay_store); + +static ssize_t is_railgated_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + bool is_railgated = 0; + + if (platform->is_railgated) + is_railgated = platform->is_railgated(dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no"); +} +static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL); + +static ssize_t counters_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + u32 busy_cycles, total_cycles; + ssize_t res; + + nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); + + res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); + + return res; +} +static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); + +static ssize_t counters_show_reset(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t res = counters_show(dev, attr, buf); + struct gk20a *g = get_gk20a(dev); + + nvgpu_pmu_reset_load_counters(g); + + return res; +} +static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); + +static ssize_t gk20a_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + u32 busy_time; + ssize_t res; + int err; + + if (!g->power_on) { + busy_time = 0; + } else { + err = gk20a_busy(g); + if (err) + return err; + + nvgpu_pmu_load_update(g); + nvgpu_pmu_load_norm(g, &busy_time); + gk20a_idle(g); + } + + res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); + + return res; +} +static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL); + +static ssize_t elpg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (!g->power_on) { + g->elpg_enabled = val ? true : false; + } else { + err = gk20a_busy(g); + if (err) + return -EAGAIN; + /* + * Since elpg is refcounted, we should not unnecessarily call + * enable/disable if it is already so. + */ + if (val && !g->elpg_enabled) { + g->elpg_enabled = true; + nvgpu_pmu_pg_global_enable(g, true); + + } else if (!val && g->elpg_enabled) { + if (g->ops.pmu.pmu_pg_engines_feature_list && + g->ops.pmu.pmu_pg_engines_feature_list(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != + NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { + nvgpu_pmu_pg_global_enable(g, false); + g->elpg_enabled = false; + } else { + g->elpg_enabled = false; + nvgpu_pmu_pg_global_enable(g, false); + } + } + gk20a_idle(g); + } + nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t elpg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store); + +static ssize_t ldiv_slowdown_factor_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) { + nvgpu_err(g, "parse error for input SLOWDOWN factor\n"); + return -EINVAL; + } + + if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) { + nvgpu_err(g, "Invalid SLOWDOWN factor\n"); + return -EINVAL; + } + + if (val == g->ldiv_slowdown_factor) + return count; + + if (!g->power_on) { + g->ldiv_slowdown_factor = val; + } else { + err = gk20a_busy(g); + if (err) + return -EAGAIN; + + g->ldiv_slowdown_factor = val; + + if (g->ops.pmu.pmu_pg_init_param) + g->ops.pmu.pmu_pg_init_param(g, + PMU_PG_ELPG_ENGINE_ID_GRAPHICS); + + gk20a_idle(g); + } + + nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor); + + return count; +} + +static ssize_t ldiv_slowdown_factor_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor); +} + +static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW, + ldiv_slowdown_factor_read, ldiv_slowdown_factor_store); + +static ssize_t mscg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_pmu *pmu = &g->pmu; + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (!g->power_on) { + g->mscg_enabled = val ? true : false; + } else { + err = gk20a_busy(g); + if (err) + return -EAGAIN; + /* + * Since elpg is refcounted, we should not unnecessarily call + * enable/disable if it is already so. + */ + if (val && !g->mscg_enabled) { + g->mscg_enabled = true; + if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, + PMU_PG_LPWR_FEATURE_MSCG)) { + if (!ACCESS_ONCE(pmu->mscg_stat)) { + WRITE_ONCE(pmu->mscg_stat, + PMU_MSCG_ENABLED); + /* make status visible */ + smp_mb(); + } + } + + } else if (!val && g->mscg_enabled) { + if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, + PMU_PG_LPWR_FEATURE_MSCG)) { + nvgpu_pmu_pg_global_enable(g, false); + WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); + /* make status visible */ + smp_mb(); + g->mscg_enabled = false; + if (g->elpg_enabled) + nvgpu_pmu_pg_global_enable(g, true); + } + g->mscg_enabled = false; + } + gk20a_idle(g); + } + nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t mscg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store); + +static ssize_t aelpg_param_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + int status = 0; + union pmu_ap_cmd ap_cmd; + int *paramlist = (int *)g->pmu.aelpg_param; + u32 defaultparam[5] = { + APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US, + APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US, + APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US, + APCTRL_POWER_BREAKEVEN_DEFAULT_US, + APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT + }; + + /* Get each parameter value from input string*/ + sscanf(buf, "%d %d %d %d %d", ¶mlist[0], ¶mlist[1], + ¶mlist[2], ¶mlist[3], ¶mlist[4]); + + /* If parameter value is 0 then reset to SW default values*/ + if ((paramlist[0] | paramlist[1] | paramlist[2] + | paramlist[3] | paramlist[4]) == 0x00) { + memcpy(paramlist, defaultparam, sizeof(defaultparam)); + } + + /* If aelpg is enabled & pmu is ready then post values to + * PMU else store then post later + */ + if (g->aelpg_enabled && g->pmu.pmu_ready) { + /* Disable AELPG */ + ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; + ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; + status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); + + /* Enable AELPG */ + nvgpu_aelpg_init(g); + nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); + } + + return count; +} + +static ssize_t aelpg_param_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, + "%d %d %d %d %d\n", g->pmu.aelpg_param[0], + g->pmu.aelpg_param[1], g->pmu.aelpg_param[2], + g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]); +} + +static DEVICE_ATTR(aelpg_param, ROOTRW, + aelpg_param_read, aelpg_param_store); + +static ssize_t aelpg_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int status = 0; + union pmu_ap_cmd ap_cmd; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + err = gk20a_busy(g); + if (err) + return err; + + if (g->pmu.pmu_ready) { + if (val && !g->aelpg_enabled) { + g->aelpg_enabled = true; + /* Enable AELPG */ + ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL; + ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; + status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); + } else if (!val && g->aelpg_enabled) { + g->aelpg_enabled = false; + /* Disable AELPG */ + ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; + ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; + status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); + } + } else { + nvgpu_info(g, "PMU is not ready, AELPG request failed"); + } + gk20a_idle(g); + + nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" : + "disabled"); + + return count; +} + +static ssize_t aelpg_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0); +} + +static DEVICE_ATTR(aelpg_enable, ROOTRW, + aelpg_enable_read, aelpg_enable_store); + + +static ssize_t allow_all_enable_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0); +} + +static ssize_t allow_all_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + err = gk20a_busy(g); + g->allow_all = (val ? true : false); + gk20a_idle(g); + + return count; +} + +static DEVICE_ATTR(allow_all, ROOTRW, + allow_all_enable_read, allow_all_enable_store); + +static ssize_t emc3d_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + g->emc3d_ratio = val; + + return count; +} + +static ssize_t emc3d_ratio_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio); +} + +static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); + +static ssize_t fmax_at_vmin_safe_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long gpu_fmax_at_vmin_hz = 0; + + if (g->ops.clk.get_fmax_at_vmin_safe) + gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g); + + return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz)); +} + +static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL); + +#ifdef CONFIG_PM +static ssize_t force_idle_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + int err = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val) { + if (g->forced_idle) + return count; /* do nothing */ + else { + err = __gk20a_do_idle(g, false); + if (!err) { + g->forced_idle = 1; + nvgpu_info(g, "gpu is idle : %d", + g->forced_idle); + } + } + } else { + if (!g->forced_idle) + return count; /* do nothing */ + else { + err = __gk20a_do_unidle(g); + if (!err) { + g->forced_idle = 0; + nvgpu_info(g, "gpu is idle : %d", + g->forced_idle); + } + } + } + + return count; +} + +static ssize_t force_idle_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0); +} + +static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); +#endif + +static ssize_t tpc_fs_mask_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (!g->gr.gpc_tpc_mask) + return -ENODEV; + + if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { + g->gr.gpc_tpc_mask[0] = val; + g->tpc_fs_mask_user = val; + + g->ops.gr.set_gpc_tpc_mask(g, 0); + + nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image); + g->gr.ctx_vars.local_golden_image = NULL; + g->gr.ctx_vars.golden_image_initialized = false; + g->gr.ctx_vars.golden_image_size = 0; + /* Cause next poweron to reinit just gr */ + g->gr.sw_ready = false; + } + + return count; +} + +static ssize_t tpc_fs_mask_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + u32 gpc_index; + u32 tpc_fs_mask = 0; + int err = 0; + + err = gk20a_busy(g); + if (err) + return err; + + for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { + if (g->ops.gr.get_gpc_tpc_mask) + tpc_fs_mask |= + g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << + (gr->max_tpc_per_gpc_count * gpc_index); + } + + gk20a_idle(g); + + return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask); +} + +static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store); + +static ssize_t min_timeslice_us_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us); +} + +static ssize_t min_timeslice_us_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val > g->max_timeslice_us) + return -EINVAL; + + g->min_timeslice_us = val; + + return count; +} + +static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read, + min_timeslice_us_store); + +static ssize_t max_timeslice_us_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us); +} + +static ssize_t max_timeslice_us_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val < g->min_timeslice_us) + return -EINVAL; + + g->max_timeslice_us = val; + + return count; +} + +static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read, + max_timeslice_us_store); + +static ssize_t czf_bypass_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val >= 4) + return -EINVAL; + + g->gr.czf_bypass = val; + + return count; +} + +static ssize_t czf_bypass_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return sprintf(buf, "%d\n", g->gr.czf_bypass); +} + +static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); + +static ssize_t pd_max_batches_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val > 64) + return -EINVAL; + + g->gr.pd_max_batches = val; + + return count; +} + +static ssize_t pd_max_batches_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return sprintf(buf, "%d\n", g->gr.pd_max_batches); +} + +static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); + +static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + unsigned long val = 0; + int err = -1; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (g->ops.gr.get_max_gfxp_wfi_timeout_count) { + if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g)) + return -EINVAL; + } + + gr->gfxp_wfi_timeout_count = val; + + if (g->ops.gr.init_preemption_state && g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + err = gr_gk20a_elpg_protected_call(g, + g->ops.gr.init_preemption_state(g)); + + gk20a_idle(g); + + if (err) + return err; + } + return count; +} + +static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + int err = -1; + + if (count > 0 && buf[0] == 's') + /* sysclk */ + gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK; + else + /* usec */ + gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC; + + if (g->ops.gr.init_preemption_state && g->power_on) { + err = gk20a_busy(g); + if (err) + return err; + + err = gr_gk20a_elpg_protected_call(g, + g->ops.gr.init_preemption_state(g)); + + gk20a_idle(g); + + if (err) + return err; + } + + return count; +} + +static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + u32 val = gr->gfxp_wfi_timeout_count; + + return snprintf(buf, PAGE_SIZE, "%d\n", val); +} + +static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct gr_gk20a *gr = &g->gr; + + if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC) + return snprintf(buf, PAGE_SIZE, "usec\n"); + else + return snprintf(buf, PAGE_SIZE, "sysclk\n"); +} + +static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH), + gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store); + +static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH), + gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store); + +void nvgpu_remove_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_elcg_enable); + device_remove_file(dev, &dev_attr_blcg_enable); + device_remove_file(dev, &dev_attr_slcg_enable); + device_remove_file(dev, &dev_attr_ptimer_scale_factor); + device_remove_file(dev, &dev_attr_ptimer_ref_freq); + device_remove_file(dev, &dev_attr_ptimer_src_freq); + device_remove_file(dev, &dev_attr_elpg_enable); + device_remove_file(dev, &dev_attr_mscg_enable); + device_remove_file(dev, &dev_attr_emc3d_ratio); + device_remove_file(dev, &dev_attr_ldiv_slowdown_factor); + + device_remove_file(dev, &dev_attr_fmax_at_vmin_safe); + + device_remove_file(dev, &dev_attr_counters); + device_remove_file(dev, &dev_attr_counters_reset); + device_remove_file(dev, &dev_attr_load); + device_remove_file(dev, &dev_attr_railgate_delay); + device_remove_file(dev, &dev_attr_is_railgated); +#ifdef CONFIG_PM + device_remove_file(dev, &dev_attr_force_idle); + device_remove_file(dev, &dev_attr_railgate_enable); +#endif + device_remove_file(dev, &dev_attr_aelpg_param); + device_remove_file(dev, &dev_attr_aelpg_enable); + device_remove_file(dev, &dev_attr_allow_all); + device_remove_file(dev, &dev_attr_tpc_fs_mask); + device_remove_file(dev, &dev_attr_min_timeslice_us); + device_remove_file(dev, &dev_attr_max_timeslice_us); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_nvhost_remove_symlink(get_gk20a(dev)); +#endif + + device_remove_file(dev, &dev_attr_czf_bypass); + device_remove_file(dev, &dev_attr_pd_max_batches); + device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count); + device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit); + + if (strcmp(dev_name(dev), "gpu.0")) { + struct kobject *kobj = &dev->kobj; + struct device *parent = container_of((kobj->parent), + struct device, kobj); + sysfs_remove_link(&parent->kobj, "gpu.0"); + } +} + +int nvgpu_create_sysfs(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int error = 0; + + error |= device_create_file(dev, &dev_attr_elcg_enable); + error |= device_create_file(dev, &dev_attr_blcg_enable); + error |= device_create_file(dev, &dev_attr_slcg_enable); + error |= device_create_file(dev, &dev_attr_ptimer_scale_factor); + error |= device_create_file(dev, &dev_attr_ptimer_ref_freq); + error |= device_create_file(dev, &dev_attr_ptimer_src_freq); + error |= device_create_file(dev, &dev_attr_elpg_enable); + error |= device_create_file(dev, &dev_attr_mscg_enable); + error |= device_create_file(dev, &dev_attr_emc3d_ratio); + error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor); + + error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe); + + error |= device_create_file(dev, &dev_attr_counters); + error |= device_create_file(dev, &dev_attr_counters_reset); + error |= device_create_file(dev, &dev_attr_load); + error |= device_create_file(dev, &dev_attr_railgate_delay); + error |= device_create_file(dev, &dev_attr_is_railgated); +#ifdef CONFIG_PM + error |= device_create_file(dev, &dev_attr_force_idle); + error |= device_create_file(dev, &dev_attr_railgate_enable); +#endif + error |= device_create_file(dev, &dev_attr_aelpg_param); + error |= device_create_file(dev, &dev_attr_aelpg_enable); + error |= device_create_file(dev, &dev_attr_allow_all); + error |= device_create_file(dev, &dev_attr_tpc_fs_mask); + error |= device_create_file(dev, &dev_attr_min_timeslice_us); + error |= device_create_file(dev, &dev_attr_max_timeslice_us); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + error |= nvgpu_nvhost_create_symlink(g); +#endif + + error |= device_create_file(dev, &dev_attr_czf_bypass); + error |= device_create_file(dev, &dev_attr_pd_max_batches); + error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count); + error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit); + + if (strcmp(dev_name(dev), "gpu.0")) { + struct kobject *kobj = &dev->kobj; + struct device *parent = container_of((kobj->parent), + struct device, kobj); + error |= sysfs_create_link(&parent->kobj, + &dev->kobj, "gpu.0"); + } + + if (error) + nvgpu_err(g, "Failed to create sysfs attributes!\n"); + + return error; +} diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.h b/drivers/gpu/nvgpu/os/linux/sysfs.h new file mode 100644 index 00000000..80925844 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sysfs.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef NVGPU_SYSFS_H +#define NVGPU_SYSFS_H + +struct device; + +int nvgpu_create_sysfs(struct device *dev); +void nvgpu_remove_sysfs(struct device *dev); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c new file mode 100644 index 00000000..92c556f2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/thread.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +int nvgpu_thread_proxy(void *threaddata) +{ + struct nvgpu_thread *thread = threaddata; + int ret = thread->fn(thread->data); + + thread->running = false; + return ret; +} + +int nvgpu_thread_create(struct nvgpu_thread *thread, + void *data, + int (*threadfn)(void *data), const char *name) +{ + struct task_struct *task = kthread_create(nvgpu_thread_proxy, + thread, name); + if (IS_ERR(task)) + return PTR_ERR(task); + + thread->task = task; + thread->fn = threadfn; + thread->data = data; + thread->running = true; + wake_up_process(task); + return 0; +}; + +void nvgpu_thread_stop(struct nvgpu_thread *thread) +{ + if (thread->task) { + kthread_stop(thread->task); + thread->task = NULL; + } +}; + +bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) +{ + return kthread_should_stop(); +}; + +bool nvgpu_thread_is_running(struct nvgpu_thread *thread) +{ + return ACCESS_ONCE(thread->running); +}; diff --git a/drivers/gpu/nvgpu/os/linux/timers.c b/drivers/gpu/nvgpu/os/linux/timers.c new file mode 100644 index 00000000..d1aa641f --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/timers.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include + +#include "gk20a/gk20a.h" + +#include "platform_gk20a.h" + +/* + * Returns 1 if the platform is pre-Si and should ignore the timeout checking. + * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the + * timeout check regardless of platform). + */ +static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout) +{ + if (timeout->flags & NVGPU_TIMER_NO_PRE_SI) + return 0; + + return !nvgpu_platform_is_silicon(timeout->g); +} + +/** + * nvgpu_timeout_init - Init timer. + * + * @g - nvgpu device. + * @timeout - The timer. + * @duration - Timeout in milliseconds or number of retries. + * @flags - Flags for timer. + * + * This configures the timeout to start the timeout duration now, i.e: when this + * function is called. Available flags to pass to @flags: + * + * %NVGPU_TIMER_CPU_TIMER + * %NVGPU_TIMER_RETRY_TIMER + * %NVGPU_TIMER_NO_PRE_SI + * %NVGPU_TIMER_SILENT_TIMEOUT + * + * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then + * a CPU timer is used by default. + */ +int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout, + u32 duration, unsigned long flags) +{ + if (flags & ~NVGPU_TIMER_FLAG_MASK) + return -EINVAL; + + memset(timeout, 0, sizeof(*timeout)); + + timeout->g = g; + timeout->flags = flags; + + if (flags & NVGPU_TIMER_RETRY_TIMER) + timeout->retries.max = duration; + else + timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(), + (s64)NSEC_PER_MSEC * duration)); + + return 0; +} + +static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout, + void *caller, + const char *fmt, va_list args) +{ + struct gk20a *g = timeout->g; + ktime_t now = ktime_get(); + + if (nvgpu_timeout_is_pre_silicon(timeout)) + return 0; + + if (ktime_after(now, ns_to_ktime(timeout->time))) { + if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { + char buf[128]; + + vsnprintf(buf, sizeof(buf), fmt, args); + + nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf); + } + + return -ETIMEDOUT; + } + + return 0; +} + +static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout, + void *caller, + const char *fmt, va_list args) +{ + struct gk20a *g = timeout->g; + + if (nvgpu_timeout_is_pre_silicon(timeout)) + return 0; + + if (timeout->retries.attempted >= timeout->retries.max) { + if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { + char buf[128]; + + vsnprintf(buf, sizeof(buf), fmt, args); + + nvgpu_err(g, "No more retries @ %pF %s", caller, buf); + } + + return -ETIMEDOUT; + } + + timeout->retries.attempted++; + + return 0; +} + +/** + * __nvgpu_timeout_expired_msg - Check if a timeout has expired. + * + * @timeout - The timeout to check. + * @caller - Address of the caller of this function. + * @fmt - The fmt string. + * + * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise. + * + * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout + * then a message is printed based on %fmt. + */ +int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout, + void *caller, const char *fmt, ...) +{ + int ret; + va_list args; + + va_start(args, fmt); + if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) + ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt, + args); + else + ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt, + args); + va_end(args); + + return ret; +} + +/** + * nvgpu_timeout_peek_expired - Check the status of a timeout. + * + * @timeout - The timeout to check. + * + * Returns non-zero if the timeout is expired, zero otherwise. In the case of + * retry timers this will not increment the underlying retry count. Also if the + * timer has expired no messages will be printed. + * + * This function honors the pre-Si check as well. + */ +int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout) +{ + if (nvgpu_timeout_is_pre_silicon(timeout)) + return 0; + + if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) + return timeout->retries.attempted >= timeout->retries.max; + else + return ktime_after(ktime_get(), ns_to_ktime(timeout->time)); +} + +/** + * nvgpu_udelay - Delay for some number of microseconds. + * + * @usecs - Microseconds to wait for. + * + * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly + * accurate. This is normally backed by a busy-loop so this means waits should + * be kept short, below 100us. If longer delays are necessary then + * nvgpu_msleep() should be preferred. + * + * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This + * function will attempt to not use a busy-loop. + */ +void nvgpu_udelay(unsigned int usecs) +{ + udelay(usecs); +} + +/** + * nvgpu_usleep_range - Sleep for a range of microseconds. + * + * @min_us - Minimum wait time. + * @max_us - Maximum wait time. + * + * Wait for some number of microseconds between @min_us and @max_us. This, + * unlike nvgpu_udelay(), will attempt to sleep for the passed number of + * microseconds instead of busy looping. Not all platforms support this, + * and in that case this reduces to nvgpu_udelay(min_us). + * + * Linux note: this is not safe to use in atomic context. If you are in + * atomic context you must use nvgpu_udelay(). + */ +void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us) +{ + usleep_range(min_us, max_us); +} + +/** + * nvgpu_msleep - Sleep for some milliseconds. + * + * @msecs - Sleep for at least this many milliseconds. + * + * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms + * or so) the sleep will be significantly longer due to scheduling overhead and + * mechanics. + */ +void nvgpu_msleep(unsigned int msecs) +{ + msleep(msecs); +} + +/** + * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock. + * + * Return a clock in millisecond units. The start time of the clock is + * unspecified; the time returned can be compared with older ones to measure + * durations. The source clock does not jump when the system clock is adjusted. + */ +s64 nvgpu_current_time_ms(void) +{ + return ktime_to_ms(ktime_get()); +} + +/** + * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock. + * + * Return a clock in nanosecond units. The start time of the clock is + * unspecified; the time returned can be compared with older ones to measure + * durations. The source clock does not jump when the system clock is adjusted. + */ +s64 nvgpu_current_time_ns(void) +{ + return ktime_to_ns(ktime_get()); +} + +/** + * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp. + * + * Return a "high resolution" time stamp. It does not really matter exactly what + * it is, so long as it generally returns unique values and monotonically + * increases - wrap around _is_ possible though in a system running for long + * enough. + * + * Note: what high resolution means is system dependent. + */ +u64 nvgpu_hr_timestamp(void) +{ + return get_cycles(); +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c new file mode 100644 index 00000000..9f6017d3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c @@ -0,0 +1,168 @@ +/* + * Virtualized GPU Clock Interface + * + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "gk20a/gk20a.h" +#include "clk_vgpu.h" +#include "ctrl/ctrlclk.h" +#include "os/linux/platform_gk20a.h" + +static unsigned long +vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; + +static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err; + unsigned long ret = 0; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) + nvgpu_err(g, "%s failed - %d", __func__, err); + else + /* return frequency in Hz */ + ret = p->rate * 1000; + break; + case CTRL_CLK_DOMAIN_PWRCLK: + nvgpu_err(g, "unsupported clock: %u", api_domain); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return ret; +} + +static int vgpu_clk_set_rate(struct gk20a *g, + u32 api_domain, unsigned long rate) +{ + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + + /* server dvfs framework requires frequency in kHz */ + p->rate = (u32)(rate / 1000); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) + nvgpu_err(g, "%s failed - %d", __func__, err); + break; + case CTRL_CLK_DOMAIN_PWRCLK: + nvgpu_err(g, "unsupported clock: %u", api_domain); + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + return priv->constants.max_freq; +} + +void vgpu_init_clk_support(struct gk20a *g) +{ + g->ops.clk.get_rate = vgpu_clk_get_rate; + g->ops.clk.set_rate = vgpu_clk_set_rate; + g->ops.clk.get_maxrate = vgpu_clk_get_maxrate; +} + +long vgpu_clk_round_rate(struct device *dev, unsigned long rate) +{ + /* server will handle frequency rounding */ + return rate; +} + +int vgpu_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_get_gpu_freq_table_params *p = + &msg.params.get_gpu_freq_table; + unsigned int i; + int err; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE; + msg.handle = vgpu_get_handle(g); + + p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + return err; + } + + /* return frequency in Hz */ + for (i = 0; i < p->num_freqs; i++) + vgpu_freq_table[i] = p->freqs[i] * 1000; + + *freqs = vgpu_freq_table; + *num_freqs = p->num_freqs; + + return 0; +} + +int vgpu_clk_cap_rate(struct device *dev, unsigned long rate) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + struct tegra_vgpu_cmd_msg msg = {}; + struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; + int err = 0; + + nvgpu_log_fn(g, " "); + + msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE; + msg.handle = vgpu_get_handle(g); + p->rate = (u32)rate; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (err) { + nvgpu_err(g, "%s failed - %d", __func__, err); + return err; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h new file mode 100644 index 00000000..8d477643 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h @@ -0,0 +1,27 @@ +/* + * Virtualized GPU Clock Interface + * + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _CLK_VIRT_H_ +#define _CLK_VIRT_H_ + +void vgpu_init_clk_support(struct gk20a *g); +long vgpu_clk_round_rate(struct device *dev, unsigned long rate); +int vgpu_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs); +int vgpu_clk_cap_rate(struct device *dev, unsigned long rate); +#endif diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c new file mode 100644 index 00000000..6339aef9 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "os/linux/os_linux.h" +#include "vgpu/fecs_trace_vgpu.h" + +struct vgpu_fecs_trace { + struct tegra_hv_ivm_cookie *cookie; + struct nvgpu_ctxsw_ring_header *header; + struct nvgpu_ctxsw_trace_entry *entries; + int num_entries; + bool enabled; + void *buf; +}; + +int vgpu_fecs_trace_init(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + struct device_node *np = dev->of_node; + struct of_phandle_args args; + struct vgpu_fecs_trace *vcst; + u32 mempool; + int err; + + nvgpu_log_fn(g, " "); + + vcst = nvgpu_kzalloc(g, sizeof(*vcst)); + if (!vcst) + return -ENOMEM; + + err = of_parse_phandle_with_fixed_args(np, + "mempool-fecs-trace", 1, 0, &args); + if (err) { + nvgpu_info(g, "does not support fecs trace"); + goto fail; + } + __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); + + mempool = args.args[0]; + vcst->cookie = vgpu_ivm_mempool_reserve(mempool); + if (IS_ERR(vcst->cookie)) { + nvgpu_info(g, + "mempool %u reserve failed", mempool); + vcst->cookie = NULL; + err = -EINVAL; + goto fail; + } + + vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie), + vgpu_ivm_get_size(vcst->cookie)); + if (!vcst->buf) { + nvgpu_info(g, "ioremap_cache failed"); + err = -EINVAL; + goto fail; + } + vcst->header = vcst->buf; + vcst->num_entries = vcst->header->num_ents; + if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { + nvgpu_err(g, "entry size mismatch"); + goto fail; + } + vcst->entries = vcst->buf + sizeof(*vcst->header); + g->fecs_trace = (struct gk20a_fecs_trace *)vcst; + + return 0; +fail: + iounmap(vcst->buf); + if (vcst->cookie) + vgpu_ivm_mempool_unreserve(vcst->cookie); + nvgpu_kfree(g, vcst); + return err; +} + +int vgpu_fecs_trace_deinit(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + iounmap(vcst->buf); + vgpu_ivm_mempool_unreserve(vcst->cookie); + nvgpu_kfree(g, vcst); + return 0; +} + +int vgpu_fecs_trace_enable(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, + .handle = vgpu_get_handle(g), + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + vcst->enabled = !err; + return err; +} + +int vgpu_fecs_trace_disable(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, + .handle = vgpu_get_handle(g), + }; + int err; + + vcst->enabled = false; + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +bool vgpu_fecs_trace_is_enabled(struct gk20a *g) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + return (vcst && vcst->enabled); +} + +int vgpu_fecs_trace_poll(struct gk20a *g) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, + .handle = vgpu_get_handle(g), + }; + int err; + + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + *buf = vcst->buf; + *size = vgpu_ivm_get_size(vcst->cookie); + return 0; +} + +int vgpu_free_user_buffer(struct gk20a *g) +{ + return 0; +} + +int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + unsigned long size = vgpu_ivm_get_size(vcst->cookie); + unsigned long vsize = vma->vm_end - vma->vm_start; + + size = min(size, vsize); + size = round_up(size, PAGE_SIZE); + + return remap_pfn_range(vma, vma->vm_start, + vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT, + size, + vma->vm_page_prot); +} + +#ifdef CONFIG_GK20A_CTXSW_TRACE +int vgpu_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter) +{ + struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; + + return vcst->header->num_ents; +} + +#if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE +#error "FECS trace filter size mismatch!" +#endif + +int vgpu_fecs_trace_set_filter(struct gk20a *g, + struct nvgpu_ctxsw_trace_filter *filter) +{ + struct tegra_vgpu_cmd_msg msg = { + .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, + .handle = vgpu_get_handle(g), + }; + struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; + int err; + + memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + WARN_ON(err); + return err; +} + +void vgpu_fecs_trace_data_update(struct gk20a *g) +{ + gk20a_ctxsw_trace_wake_up(g, 0); +} +#endif /* CONFIG_GK20A_CTXSW_TRACE */ diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c new file mode 100644 index 00000000..66911626 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +#include "gk20a/gk20a.h" +#include "os/linux/vgpu/clk_vgpu.h" +#include "os/linux/platform_gk20a.h" +#include "os/linux/os_linux.h" + +static int gv11b_vgpu_probe(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct resource *r; + void __iomem *regs; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g); + struct gk20a *g = platform->g; + int ret; + + r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode"); + if (!r) { + nvgpu_err(g, "failed to get usermode regs"); + return -ENXIO; + } + regs = devm_ioremap_resource(dev, r); + if (IS_ERR(regs)) { + nvgpu_err(g, "failed to map usermode regs"); + return PTR_ERR(regs); + } + l->usermode_regs = regs; + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + ret = nvgpu_get_nvhost_dev(g); + if (ret) { + l->usermode_regs = NULL; + return ret; + } + + ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev, + &g->syncpt_unit_base, + &g->syncpt_unit_size); + if (ret) { + nvgpu_err(g, "Failed to get syncpt interface"); + return -ENOSYS; + } + g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); + nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", + g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size); +#endif + vgpu_init_clk_support(platform->g); + + return 0; +} + +struct gk20a_platform gv11b_vgpu_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .can_railgate_init = false, + .can_elpg_init = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_elcg = false, + .enable_elpg = false, + .enable_aelpg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .ch_wdt_timeout_ms = 5000, + + .probe = gv11b_vgpu_probe, + + .clk_round_rate = vgpu_clk_round_rate, + .get_clk_freqs = vgpu_clk_get_freqs, + + /* frequency scaling configuration */ + .devfreq_governor = "userspace", + + .virtual_dev = true, +}; diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c new file mode 100644 index 00000000..e4819e7d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c @@ -0,0 +1,69 @@ +/* + * Tegra Virtualized GPU Platform Interface + * + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "gk20a/gk20a.h" +#include "os/linux/platform_gk20a.h" +#include "clk_vgpu.h" + +static int gk20a_tegra_probe(struct device *dev) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a_platform *platform = dev_get_drvdata(dev); + int ret; + + ret = nvgpu_get_nvhost_dev(platform->g); + if (ret) + return ret; + + vgpu_init_clk_support(platform->g); + return 0; +#else + return 0; +#endif +} + +struct gk20a_platform vgpu_tegra_platform = { + .has_syncpoints = true, + .aggressive_sync_destroy_thresh = 64, + + /* power management configuration */ + .can_railgate_init = false, + .can_elpg_init = false, + .enable_slcg = false, + .enable_blcg = false, + .enable_elcg = false, + .enable_elpg = false, + .enable_aelpg = false, + .can_slcg = false, + .can_blcg = false, + .can_elcg = false, + + .ch_wdt_timeout_ms = 5000, + + .probe = gk20a_tegra_probe, + + .clk_round_rate = vgpu_clk_round_rate, + .get_clk_freqs = vgpu_clk_get_freqs, + + /* frequency scaling configuration */ + .devfreq_governor = "userspace", + + .virtual_dev = true, +}; diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c new file mode 100644 index 00000000..57aad4b4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "os/linux/platform_gk20a.h" + +static ssize_t vgpu_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load; + int err; + + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + if (err) + return err; + + return snprintf(buf, PAGE_SIZE, "%u\n", p->load); +} +static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); + +void vgpu_create_sysfs(struct device *dev) +{ + if (device_create_file(dev, &dev_attr_load)) + dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); +} + +void vgpu_remove_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_load); +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c new file mode 100644 index 00000000..950f0d49 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "os/linux/os_linux.h" + +int vgpu_ivc_init(struct gk20a *g, u32 elems, + const size_t *queue_sizes, u32 queue_start, u32 num_queues) +{ + struct platform_device *pdev = to_platform_device(dev_from_gk20a(g)); + + return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start, + num_queues); +} + +void vgpu_ivc_deinit(u32 queue_start, u32 num_queues) +{ + tegra_gr_comm_deinit(queue_start, num_queues); +} + +void vgpu_ivc_release(void *handle) +{ + tegra_gr_comm_release(handle); +} + +u32 vgpu_ivc_get_server_vmid(void) +{ + return tegra_gr_comm_get_server_vmid(); +} + +int vgpu_ivc_recv(u32 index, void **handle, void **data, + size_t *size, u32 *sender) +{ + return tegra_gr_comm_recv(index, handle, data, size, sender); +} + +int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size) +{ + return tegra_gr_comm_send(peer, index, data, size); +} + +int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle, + void **data, size_t *size) +{ + return tegra_gr_comm_sendrecv(peer, index, handle, data, size); +} + +u32 vgpu_ivc_get_peer_self(void) +{ + return TEGRA_GR_COMM_ID_SELF; +} + +void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr, + size_t *size) +{ + return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size); +} + +void vgpu_ivc_oob_put_ptr(void *handle) +{ + tegra_gr_comm_oob_put_ptr(handle); +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c new file mode 100644 index 00000000..bbd444da --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +#include "os/linux/os_linux.h" + +struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id) +{ + return tegra_hv_mempool_reserve(id); +} + +int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie) +{ + return tegra_hv_mempool_unreserve(cookie); +} + +u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie) +{ + return cookie->ipa; +} + +u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie) +{ + return cookie->size; +} + +void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie) +{ + return ioremap_cache(vgpu_ivm_get_ipa(cookie), + vgpu_ivm_get_size(cookie)); +} + +void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie, + void *addr) +{ + iounmap(addr); +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c new file mode 100644 index 00000000..a7612e54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -0,0 +1,475 @@ +/* + * Virtualized GPU for Linux + * + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vgpu_linux.h" +#include "vgpu/fecs_trace_vgpu.h" +#include "clk_vgpu.h" +#include "gk20a/tsg_gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/regops_gk20a.h" +#include "gm20b/hal_gm20b.h" + +#include "os/linux/module.h" +#include "os/linux/os_linux.h" +#include "os/linux/ioctl.h" +#include "os/linux/scale.h" +#include "os/linux/driver_common.h" +#include "os/linux/platform_gk20a.h" + +#include + +struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g) +{ + struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g)); + + return (struct vgpu_priv_data *)plat->vgpu_priv; +} + +static void vgpu_remove_support(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + vgpu_remove_support_common(g); + + /* free mappings to registers, etc*/ + + if (l->bar1) { + iounmap(l->bar1); + l->bar1 = NULL; + } +} + +static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + nvgpu_mutex_init(&g->poweron_lock); + nvgpu_mutex_init(&g->poweroff_lock); + nvgpu_mutex_init(&g->ctxsw_disable_lock); + l->regs_saved = l->regs; + l->bar1_saved = l->bar1; + + g->aggressive_sync_destroy = platform->aggressive_sync_destroy; + g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; + g->has_syncpoints = platform->has_syncpoints; + g->ptimer_src_freq = platform->ptimer_src_freq; + g->can_railgate = platform->can_railgate_init; + g->railgate_delay = platform->railgate_delay_init; + + __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, + platform->unify_address_spaces); +} + +static int vgpu_init_support(struct platform_device *pdev) +{ + struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct gk20a *g = get_gk20a(&pdev->dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + void __iomem *regs; + int err = 0; + + if (!r) { + nvgpu_err(g, "failed to get gk20a bar1"); + err = -ENXIO; + goto fail; + } + + if (r->name && !strcmp(r->name, "/vgpu")) { + regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(regs)) { + nvgpu_err(g, "failed to remap gk20a bar1"); + err = PTR_ERR(regs); + goto fail; + } + l->bar1 = regs; + l->bar1_mem = r; + } + + nvgpu_mutex_init(&g->dbg_sessions_lock); + nvgpu_mutex_init(&g->client_lock); + + nvgpu_init_list_node(&g->profiler_objects); + + g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); + if (!g->dbg_regops_tmp_buf) { + nvgpu_err(g, "couldn't allocate regops tmp buf"); + return -ENOMEM; + } + g->dbg_regops_tmp_buf_ops = + SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + + g->remove_support = vgpu_remove_support; + return 0; + + fail: + vgpu_remove_support(g); + return err; +} + +int vgpu_pm_prepare_poweroff(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + int ret = 0; + + nvgpu_log_fn(g, " "); + + if (!g->power_on) + return 0; + + ret = gk20a_channel_suspend(g); + if (ret) + return ret; + + g->power_on = false; + + return ret; +} + +int vgpu_pm_finalize_poweron(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + int err; + + nvgpu_log_fn(g, " "); + + if (g->power_on) + return 0; + + g->power_on = true; + + vgpu_detect_chip(g); + err = vgpu_init_hal(g); + if (err) + goto done; + + if (g->ops.ltc.init_fs_state) + g->ops.ltc.init_fs_state(g); + + err = nvgpu_init_ltc_support(g); + if (err) { + nvgpu_err(g, "failed to init ltc"); + goto done; + } + + err = vgpu_init_mm_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a mm"); + goto done; + } + + err = vgpu_init_fifo_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a fifo"); + goto done; + } + + err = vgpu_init_gr_support(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gr"); + goto done; + } + + err = g->ops.chip_init_gpu_characteristics(g); + if (err) { + nvgpu_err(g, "failed to init gk20a gpu characteristics"); + goto done; + } + + err = nvgpu_finalize_poweron_linux(l); + if (err) + goto done; + +#ifdef CONFIG_GK20A_CTXSW_TRACE + gk20a_ctxsw_trace_init(g); +#endif + gk20a_sched_ctrl_init(g); + gk20a_channel_resume(g); + + g->sw_ready = true; + +done: + return err; +} + +static int vgpu_qos_notify(struct notifier_block *nb, + unsigned long n, void *data) +{ + struct gk20a_scale_profile *profile = + container_of(nb, struct gk20a_scale_profile, + qos_notify_block); + struct gk20a *g = get_gk20a(profile->dev); + u32 max_freq; + int err; + + nvgpu_log_fn(g, " "); + + max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); + err = vgpu_clk_cap_rate(profile->dev, max_freq); + if (err) + nvgpu_err(g, "%s failed, err=%d", __func__, err); + + return NOTIFY_OK; /* need notify call further */ +} + +static int vgpu_pm_qos_init(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { + if (!profile) + return -EINVAL; + } else { + profile = nvgpu_kzalloc(g, sizeof(*profile)); + if (!profile) + return -ENOMEM; + g->scale_profile = profile; + } + + profile->dev = dev; + profile->qos_notify_block.notifier_call = vgpu_qos_notify; + pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &profile->qos_notify_block); + return 0; +} + +static void vgpu_pm_qos_remove(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + + pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, + &g->scale_profile->qos_notify_block); + nvgpu_kfree(g, g->scale_profile); + g->scale_profile = NULL; +} + +static int vgpu_pm_init(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + unsigned long *freqs; + int num_freqs; + int err = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_platform_is_simulation(g)) + return 0; + + __pm_runtime_disable(dev, false); + + if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) + gk20a_scale_init(dev); + + if (l->devfreq) { + /* set min/max frequency based on frequency table */ + err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs); + if (err) + return err; + + if (num_freqs < 1) + return -EINVAL; + + l->devfreq->min_freq = freqs[0]; + l->devfreq->max_freq = freqs[num_freqs - 1]; + } + + err = vgpu_pm_qos_init(dev); + if (err) + return err; + + return err; +} + +int vgpu_probe(struct platform_device *pdev) +{ + struct nvgpu_os_linux *l; + struct gk20a *gk20a; + int err; + struct device *dev = &pdev->dev; + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct vgpu_priv_data *priv; + + if (!platform) { + dev_err(dev, "no platform data\n"); + return -ENODATA; + } + + l = kzalloc(sizeof(*l), GFP_KERNEL); + if (!l) { + dev_err(dev, "couldn't allocate gk20a support"); + return -ENOMEM; + } + gk20a = &l->g; + + nvgpu_log_fn(gk20a, " "); + + nvgpu_init_gk20a(gk20a); + + nvgpu_kmem_init(gk20a); + + err = nvgpu_init_enabled_flags(gk20a); + if (err) { + kfree(gk20a); + return err; + } + + l->dev = dev; + if (tegra_platform_is_vdk()) + __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); + + gk20a->is_virtual = true; + + priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); + if (!priv) { + kfree(gk20a); + return -ENOMEM; + } + + platform->g = gk20a; + platform->vgpu_priv = priv; + + err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); + if (err) + return err; + + vgpu_init_support(pdev); + + vgpu_init_vars(gk20a, platform); + + init_rwsem(&l->busy_lock); + + nvgpu_spinlock_init(&gk20a->mc_enable_lock); + + gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; + + /* Initialize the platform interface. */ + err = platform->probe(dev); + if (err) { + if (err == -EPROBE_DEFER) + nvgpu_info(gk20a, "platform probe failed"); + else + nvgpu_err(gk20a, "platform probe failed"); + return err; + } + + if (platform->late_probe) { + err = platform->late_probe(dev); + if (err) { + nvgpu_err(gk20a, "late probe failed"); + return err; + } + } + + err = vgpu_comm_init(gk20a); + if (err) { + nvgpu_err(gk20a, "failed to init comm interface"); + return -ENOSYS; + } + + priv->virt_handle = vgpu_connect(); + if (!priv->virt_handle) { + nvgpu_err(gk20a, "failed to connect to server node"); + vgpu_comm_deinit(); + return -ENOSYS; + } + + err = vgpu_get_constants(gk20a); + if (err) { + vgpu_comm_deinit(); + return err; + } + + err = vgpu_pm_init(dev); + if (err) { + nvgpu_err(gk20a, "pm init failed"); + return err; + } + + err = nvgpu_thread_create(&priv->intr_handler, gk20a, + vgpu_intr_thread, "gk20a"); + if (err) + return err; + + gk20a_debug_init(gk20a, "gpu.0"); + + /* Set DMA parameters to allow larger sgt lists */ + dev->dma_parms = &l->dma_parms; + dma_set_max_seg_size(dev, UINT_MAX); + + gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; + gk20a->timeouts_disabled_by_user = false; + nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0); + + vgpu_create_sysfs(dev); + gk20a_init_gr(gk20a); + + nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); + gk20a->gr.max_comptag_mem = totalram_pages + >> (10 - (PAGE_SHIFT - 10)); + + nvgpu_ref_init(&gk20a->refcount); + + return 0; +} + +int vgpu_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct gk20a *g = get_gk20a(dev); + + nvgpu_log_fn(g, " "); + + vgpu_pm_qos_remove(dev); + if (g->remove_support) + g->remove_support(g); + + vgpu_comm_deinit(); + gk20a_sched_ctrl_cleanup(g); + gk20a_user_deinit(dev, &nvgpu_class); + vgpu_remove_sysfs(dev); + gk20a_get_platform(dev)->g = NULL; + gk20a_put(g); + + return 0; +} + +bool vgpu_is_reduced_bar1(struct gk20a *g) +{ + struct fifo_gk20a *f = &g->fifo; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + + return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size; +} diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h new file mode 100644 index 00000000..38379cf2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h @@ -0,0 +1,57 @@ +/* + * Virtualized GPU Linux Interfaces + * + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __VGPU_LINUX_H__ +#define __VGPU_LINUX_H__ + +struct device; +struct platform_device; + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + +#include + +int vgpu_pm_prepare_poweroff(struct device *dev); +int vgpu_pm_finalize_poweron(struct device *dev); +int vgpu_probe(struct platform_device *dev); +int vgpu_remove(struct platform_device *dev); + +void vgpu_create_sysfs(struct device *dev); +void vgpu_remove_sysfs(struct device *dev); +#else +/* define placeholders for functions used outside of vgpu */ + +static inline int vgpu_pm_prepare_poweroff(struct device *dev) +{ + return -ENOSYS; +} +static inline int vgpu_pm_finalize_poweron(struct device *dev) +{ + return -ENOSYS; +} +static inline int vgpu_probe(struct platform_device *dev) +{ + return -ENOSYS; +} +static inline int vgpu_remove(struct platform_device *dev) +{ + return -ENOSYS; +} +#endif + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/vidmem.c b/drivers/gpu/nvgpu/os/linux/vidmem.c new file mode 100644 index 00000000..136d4a10 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vidmem.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +bool nvgpu_addr_is_vidmem_page_alloc(u64 addr) +{ + return !!(addr & 1ULL); +} + +void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr) +{ + /* set bit 0 to indicate vidmem allocation */ + sg_dma_address(sgl) = (addr | 1ULL); +} + +struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl) +{ + u64 addr; + + addr = sg_dma_address(sgl); + + if (nvgpu_addr_is_vidmem_page_alloc(addr)) + addr = addr & ~1ULL; + else + WARN_ON(1); + + return (struct nvgpu_page_alloc *)(uintptr_t)addr; +} + +static struct sg_table *gk20a_vidbuf_map_dma_buf( + struct dma_buf_attachment *attach, enum dma_data_direction dir) +{ + struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv; + + return buf->mem->priv.sgt; +} + +static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ +} + +static void gk20a_vidbuf_release(struct dma_buf *dmabuf) +{ + struct nvgpu_vidmem_buf *buf = dmabuf->priv; + struct nvgpu_vidmem_linux *linux_buf = buf->priv; + struct gk20a *g = buf->g; + + vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", + dmabuf, buf->mem->size >> 10); + + if (linux_buf && linux_buf->dmabuf_priv_delete) + linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv); + + nvgpu_kfree(g, linux_buf); + nvgpu_vidmem_buf_free(g, buf); + + gk20a_put(g); +} + +static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) +{ + WARN_ON("Not supported"); + return NULL; +} + +static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, + unsigned long page_num) +{ + WARN_ON("Not supported"); + return NULL; +} + +static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + return -EINVAL; +} + +static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, + struct device *dev, void *priv, void (*delete)(void *priv)) +{ + struct nvgpu_vidmem_buf *buf = dmabuf->priv; + struct nvgpu_vidmem_linux *linux_buf = buf->priv; + + linux_buf->dmabuf_priv = priv; + linux_buf->dmabuf_priv_delete = delete; + + return 0; +} + +static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, + struct device *dev) +{ + struct nvgpu_vidmem_buf *buf = dmabuf->priv; + struct nvgpu_vidmem_linux *linux_buf = buf->priv; + + return linux_buf->dmabuf_priv; +} + +static const struct dma_buf_ops gk20a_vidbuf_ops = { + .map_dma_buf = gk20a_vidbuf_map_dma_buf, + .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, + .release = gk20a_vidbuf_release, + .kmap_atomic = gk20a_vidbuf_kmap_atomic, + .kmap = gk20a_vidbuf_kmap, + .mmap = gk20a_vidbuf_mmap, + .set_drvdata = gk20a_vidbuf_set_private, + .get_drvdata = gk20a_vidbuf_get_private, +}; + +static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.priv = buf; + exp_info.ops = &gk20a_vidbuf_ops; + exp_info.size = buf->mem->size; + exp_info.flags = O_RDWR; + + return dma_buf_export(&exp_info); +} + +struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) +{ + struct nvgpu_vidmem_buf *buf = dmabuf->priv; + + if (dmabuf->ops != &gk20a_vidbuf_ops) + return NULL; + + return buf->g; +} + +int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) +{ + struct nvgpu_vidmem_buf *buf = NULL; + struct nvgpu_vidmem_linux *priv; + int err, fd; + + /* + * This ref is released when the dma_buf is closed. + */ + if (!gk20a_get(g)) + return -ENODEV; + + vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes); + + priv = nvgpu_kzalloc(g, sizeof(*priv)); + if (!priv) { + err = -ENOMEM; + goto fail; + } + + buf = nvgpu_vidmem_user_alloc(g, bytes); + if (IS_ERR(buf)) { + err = PTR_ERR(buf); + goto fail; + } + + priv->dmabuf = gk20a_vidbuf_export(buf); + if (IS_ERR(priv->dmabuf)) { + err = PTR_ERR(priv->dmabuf); + goto fail; + } + + buf->priv = priv; + +#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD + fd = tegra_alloc_fd(current->files, 1024, O_RDWR); +#else + fd = get_unused_fd_flags(O_RDWR); +#endif + if (fd < 0) { + /* ->release frees what we have done */ + dma_buf_put(priv->dmabuf); + return fd; + } + + /* fclose() on this drops one ref, freeing the dma buf */ + fd_install(fd, priv->dmabuf->file); + + vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", + priv->dmabuf, buf->mem->size >> 10); + + return fd; + +fail: + nvgpu_vidmem_buf_free(g, buf); + nvgpu_kfree(g, priv); + gk20a_put(g); + + vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err); + return err; +} + +int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, + void *buffer, u64 offset, u64 size, u32 cmd) +{ + struct nvgpu_vidmem_buf *vidmem_buf; + struct nvgpu_mem *mem; + int err = 0; + + if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM) + return -EINVAL; + + vidmem_buf = dmabuf->priv; + mem = vidmem_buf->mem; + + switch (cmd) { + case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: + nvgpu_mem_rd_n(g, mem, offset, buffer, size); + break; + + case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE: + nvgpu_mem_wr_n(g, mem, offset, buffer, size); + break; + + default: + err = -EINVAL; + } + + return err; +} + +void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) +{ + nvgpu_free(vidmem->allocator, + (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); + nvgpu_free_sgtable(g, &vidmem->priv.sgt); +} diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c new file mode 100644 index 00000000..baa77515 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +#include "platform_gk20a.h" +#include "os_linux.h" +#include "dmabuf.h" + +static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) +{ + u32 core_flags = 0; + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) + core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) + core_flags |= NVGPU_VM_MAP_CACHEABLE; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) + core_flags |= NVGPU_VM_MAP_IO_COHERENT; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) + core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) + core_flags |= NVGPU_VM_MAP_L3_ALLOC; + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) + core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; + + if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) + nvgpu_warn(g, "Ignoring deprecated flag: " + "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); + + return core_flags; +} + +static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( + struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) +{ + struct nvgpu_rbtree_node *node = NULL; + struct nvgpu_rbtree_node *root = vm->mapped_buffers; + + nvgpu_rbtree_enum_start(0, &node, root); + + while (node) { + struct nvgpu_mapped_buf *mapped_buffer = + mapped_buffer_from_rbtree_node(node); + + if (mapped_buffer->os_priv.dmabuf == dmabuf && + mapped_buffer->kind == kind) + return mapped_buffer; + + nvgpu_rbtree_enum_next(&node, node); + } + + return NULL; +} + +int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset) +{ + struct nvgpu_mapped_buf *mapped_buffer; + struct gk20a *g = gk20a_from_vm(vm); + + nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); + if (!mapped_buffer) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return -EINVAL; + } + + *dmabuf = mapped_buffer->os_priv.dmabuf; + *offset = gpu_va - mapped_buffer->addr; + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + return 0; +} + +u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) +{ + return os_buf->dmabuf->size; +} + +/* + * vm->update_gmmu_lock must be held. This checks to see if we already have + * mapped the passed buffer into this VM. If so, just return the existing + * mapping address. + */ +struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, + struct nvgpu_os_buffer *os_buf, + u64 map_addr, + u32 flags, + int kind) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *mapped_buffer = NULL; + + if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { + mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); + if (!mapped_buffer) + return NULL; + + if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || + mapped_buffer->kind != (u32)kind) + return NULL; + } else { + mapped_buffer = + __nvgpu_vm_find_mapped_buf_reverse(vm, + os_buf->dmabuf, + kind); + if (!mapped_buffer) + return NULL; + } + + if (mapped_buffer->flags != flags) + return NULL; + + /* + * If we find the mapping here then that means we have mapped it already + * and the prior pin and get must be undone. + */ + gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, + mapped_buffer->os_priv.sgt); + dma_buf_put(os_buf->dmabuf); + + nvgpu_log(g, gpu_dbg_map, + "gv: 0x%04x_%08x + 0x%-7zu " + "[dma: 0x%010llx, pa: 0x%010llx] " + "pgsz=%-3dKb as=%-2d " + "flags=0x%x apt=%s (reused)", + u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), + os_buf->dmabuf->size, + (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), + (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), + vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, + vm_aspace_id(vm), + mapped_buffer->flags, + nvgpu_aperture_str(g, + gk20a_dmabuf_aperture(g, os_buf->dmabuf))); + + return mapped_buffer; +} + +int nvgpu_vm_map_linux(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags, + s16 compr_kind, + s16 incompr_kind, + int rw_flag, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch, + u64 *gpu_va) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct device *dev = dev_from_gk20a(g); + struct nvgpu_os_buffer os_buf; + struct sg_table *sgt; + struct nvgpu_sgt *nvgpu_sgt = NULL; + struct nvgpu_mapped_buf *mapped_buffer = NULL; + struct dma_buf_attachment *attachment; + u64 map_addr = 0ULL; + int err = 0; + + if (flags & NVGPU_VM_MAP_FIXED_OFFSET) + map_addr = offset_align; + + sgt = gk20a_mm_pin(dev, dmabuf, &attachment); + if (IS_ERR(sgt)) { + nvgpu_warn(g, "Failed to pin dma_buf!"); + return PTR_ERR(sgt); + } + os_buf.dmabuf = dmabuf; + os_buf.attachment = attachment; + os_buf.dev = dev; + + if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { + err = -EINVAL; + goto clean_up; + } + + nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); + if (!nvgpu_sgt) { + err = -ENOMEM; + goto clean_up; + } + + mapped_buffer = nvgpu_vm_map(vm, + &os_buf, + nvgpu_sgt, + map_addr, + mapping_size, + buffer_offset, + rw_flag, + flags, + compr_kind, + incompr_kind, + batch, + gk20a_dmabuf_aperture(g, dmabuf)); + + nvgpu_sgt_free(g, nvgpu_sgt); + + if (IS_ERR(mapped_buffer)) { + err = PTR_ERR(mapped_buffer); + goto clean_up; + } + + mapped_buffer->os_priv.dmabuf = dmabuf; + mapped_buffer->os_priv.attachment = attachment; + mapped_buffer->os_priv.sgt = sgt; + + *gpu_va = mapped_buffer->addr; + return 0; + +clean_up: + gk20a_mm_unpin(dev, dmabuf, attachment, sgt); + + return err; +} + +int nvgpu_vm_map_buffer(struct vm_gk20a *vm, + int dmabuf_fd, + u64 *offset_align, + u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ + s16 compr_kind, + s16 incompr_kind, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct dma_buf *dmabuf; + u64 ret_va; + int err = 0; + + /* get ref to the mem handle (released on unmap_locked) */ + dmabuf = dma_buf_get(dmabuf_fd); + if (IS_ERR(dmabuf)) { + nvgpu_warn(g, "%s: fd %d is not a dmabuf", + __func__, dmabuf_fd); + return PTR_ERR(dmabuf); + } + + /* verify that we're not overflowing the buffer, i.e. + * (buffer_offset + mapping_size)> dmabuf->size. + * + * Since buffer_offset + mapping_size could overflow, first check + * that mapping size < dmabuf_size, at which point we can subtract + * mapping_size from both sides for the final comparison. + */ + if ((mapping_size > dmabuf->size) || + (buffer_offset > (dmabuf->size - mapping_size))) { + nvgpu_err(g, + "buf size %llx < (offset(%llx) + map_size(%llx))\n", + (u64)dmabuf->size, buffer_offset, mapping_size); + dma_buf_put(dmabuf); + return -EINVAL; + } + + err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); + if (err) { + dma_buf_put(dmabuf); + return err; + } + + err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align, + nvgpu_vm_translate_linux_flags(g, flags), + compr_kind, incompr_kind, + gk20a_mem_flag_none, + buffer_offset, + mapping_size, + batch, + &ret_va); + + if (!err) + *offset_align = ret_va; + else + dma_buf_put(dmabuf); + + return err; +} + +/* + * This is the function call-back for freeing OS specific components of an + * nvgpu_mapped_buf. This should most likely never be called outside of the + * core MM framework! + * + * Note: the VM lock will be held. + */ +void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) +{ + struct vm_gk20a *vm = mapped_buffer->vm; + + gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, + mapped_buffer->os_priv.attachment, + mapped_buffer->os_priv.sgt); + + dma_buf_put(mapped_buffer->os_priv.dmabuf); +} diff --git a/drivers/gpu/nvgpu/pmgr/pmgr.c b/drivers/gpu/nvgpu/pmgr/pmgr.c index 2a9f9673..3d028c98 100644 --- a/drivers/gpu/nvgpu/pmgr/pmgr.c +++ b/drivers/gpu/nvgpu/pmgr/pmgr.c @@ -26,7 +26,7 @@ #ifdef CONFIG_DEBUG_FS #include -#include "common/linux/os_linux.h" +#include "os/linux/os_linux.h" #endif int pmgr_pwr_devices_get_power(struct gk20a *g, u32 *val) -- cgit v1.2.2