Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next

Major points for this pull request: - Add dGPU support for amdkfd initialization code and queue handling. It's not complete support since the GPUVM part is missing (the under debate stuff). - Enable PCIe atomics for dGPU if present - Various adjustments to the amdgpu<-->amdkfd interface for dGPUs - Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system - Add HSA process eviction code in case of system memory pressure - Various fixes and small changes * tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits) uapi: Fix type used in ioctl parameter structures drm/amdkfd: Implement KFD process eviction/restore drm/amdkfd: Add GPUVM virtual address space to PDD drm/amdkfd: Remove unaligned memory access drm/amdkfd: Centralize IOMMUv2 code and make it conditional drm/amdgpu: Add submit IB function for KFD drm/amdgpu: Add GPUVM memory management functions for KFD drm/amdgpu: add amdgpu_sync_clone drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support drm/amdgpu: Add KFD eviction fence drm/amdgpu: Remove unused kfd2kgd interface drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid drm/amdgpu: Fix header file dependencies drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem drm/amdgpu: remove useless BUG_ONs drm/amdgpu: Enable KFD initialization on dGPUs drm/amdkfd: Add dGPU device IDs and device info drm/amdkfd: Add dGPU support to kernel_queue_init drm/amdkfd: Add dGPU support to the MQD manager drm/amdkfd: Add dGPU support to the device queue manager ...
author: Dave Airlie <airlied@redhat.com> 2018-03-13 21:06:38 -0400
committer: Dave Airlie <airlied@redhat.com> 2018-03-13 21:06:38 -0400
commit: 6fa7324ac5489ad43c4b6351355b869bc5458bef (patch)
tree: 97de1061f074d0a76c83d8cb364c67094a33a0fa
parent: 0b8eeac5c6ca6dcb19cce04bf8910006ac73dbd3 (diff)
parent: a11024457d348672b26b3d4581ed19c793399b48 (diff)
41 files changed, 3757 insertions, 362 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 13c8ec11135a..dccae57985fe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -766,6 +766,8 @@ F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 F:      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+F:      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:      drivers/gpu/drm/amd/amdkfd/
 F:      drivers/gpu/drm/amd/include/cik_structs.h
 F:      drivers/gpu/drm/amd/include/kgd_kfd_interface.h
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 5d101c4053e0..4edb9fd3cf47 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
        trace_dma_fence_destroy(fence);
+        /* Failed to signal before release, could be a refcounting issue */
        WARN_ON(!list_empty(&fence->cb_list));
        if (fence->ops->release)
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 353c937d947d..8522c2ea1f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -129,6 +129,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += \
         amdgpu_amdkfd.o \
+         amdgpu_amdkfd_fence.o \
+         amdgpu_amdkfd_gpuvm.o \
         amdgpu_amdkfd_gfx_v8.o
 # add cgs
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 450426dbed92..8a23aa8f9c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,6 +30,8 @@
 const struct kgd2kfd_calls *kgd2kfd;
 bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
+static const unsigned int compute_vmid_bitmap = 0xFF00;
 int amdgpu_amdkfd_init(void)
 {
        int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
 #else
        ret = -ENOENT;
 #endif
+        amdgpu_amdkfd_gpuvm_init_mem_limits();
        return ret;
 }
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
        switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
        case CHIP_KAVERI:
+        case CHIP_HAWAII:
                kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
                break;
 #endif
        case CHIP_CARRIZO:
+        case CHIP_TONGA:
+        case CHIP_FIJI:
+        case CHIP_POLARIS10:
+        case CHIP_POLARIS11:
                kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
                break;
        default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
        int last_valid_bit;
        if (adev->kfd) {
                struct kgd2kfd_shared_resources gpu_resources = {
-                        .compute_vmid_bitmap = 0xFF00,
+                        .compute_vmid_bitmap = compute_vmid_bitmap,
                        .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
-                        .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
+                        .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+                        .gpuvm_size = min(adev->vm_manager.max_pfn
+                                          << AMDGPU_GPU_PAGE_SHIFT,
+                                          AMDGPU_VA_HOLE_START),
+                        .drm_render_minor = adev->ddev->render->index
                };
                /* this is going to have a few of the MSBs set that we need to
@@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
                        void **cpu_ptr)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-        struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+        struct amdgpu_bo *bo = NULL;
        int r;
+        uint64_t gpu_addr_tmp = 0;
-        BUG_ON(kgd == NULL);
+        void *cpu_ptr_tmp = NULL;
-        BUG_ON(gpu_addr == NULL);
-        BUG_ON(cpu_ptr == NULL);
-        *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-        if ((*mem) == NULL)
-                return -ENOMEM;
        r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
-                             AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
+                        AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
        if (r) {
                dev_err(adev->dev,
                        "failed to allocate BO for amdkfd (%d)\n", r);
@@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
        }
        /* map the buffer */
-        r = amdgpu_bo_reserve((*mem)->bo, true);
+        r = amdgpu_bo_reserve(bo, true);
        if (r) {
                dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
                goto allocate_mem_reserve_bo_failed;
        }
-        r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
+        r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
-                                &(*mem)->gpu_addr);
+                                &gpu_addr_tmp);
        if (r) {
                dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
                goto allocate_mem_pin_bo_failed;
        }
-        *gpu_addr = (*mem)->gpu_addr;
-        r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+        r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
        if (r) {
                dev_err(adev->dev,
                        "(%d) failed to map bo to kernel for amdkfd\n", r);
                goto allocate_mem_kmap_bo_failed;
        }
-        *cpu_ptr = (*mem)->cpu_ptr;
-        amdgpu_bo_unreserve((*mem)->bo);
+        *mem_obj = bo;
+        *gpu_addr = gpu_addr_tmp;
+        *cpu_ptr = cpu_ptr_tmp;
+        amdgpu_bo_unreserve(bo);
        return 0;
 allocate_mem_kmap_bo_failed:
-        amdgpu_bo_unpin((*mem)->bo);
+        amdgpu_bo_unpin(bo);
 allocate_mem_pin_bo_failed:
-        amdgpu_bo_unreserve((*mem)->bo);
+        amdgpu_bo_unreserve(bo);
 allocate_mem_reserve_bo_failed:
-        amdgpu_bo_unref(&(*mem)->bo);
+        amdgpu_bo_unref(&bo);
        return r;
 }
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
-        struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
+        struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
-        BUG_ON(mem == NULL);
+        amdgpu_bo_reserve(bo, true);
+        amdgpu_bo_kunmap(bo);
-        amdgpu_bo_reserve(mem->bo, true);
+        amdgpu_bo_unpin(bo);
-        amdgpu_bo_kunmap(mem->bo);
+        amdgpu_bo_unreserve(bo);
-        amdgpu_bo_unpin(mem->bo);
+        amdgpu_bo_unref(&(bo));
-        amdgpu_bo_unreserve(mem->bo);
-        amdgpu_bo_unref(&(mem->bo));
-        kfree(mem);
 }
 void get_local_mem_info(struct kgd_dev *kgd,
@@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
        return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 }
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+                                uint32_t vmid, uint64_t gpu_addr,
+                                uint32_t *ib_cmd, uint32_t ib_len)
+{
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+        struct amdgpu_job *job;
+        struct amdgpu_ib *ib;
+        struct amdgpu_ring *ring;
+        struct dma_fence *f = NULL;
+        int ret;
+        switch (engine) {
+        case KGD_ENGINE_MEC1:
+                ring = &adev->gfx.compute_ring[0];
+                break;
+        case KGD_ENGINE_SDMA1:
+                ring = &adev->sdma.instance[0].ring;
+                break;
+        case KGD_ENGINE_SDMA2:
+                ring = &adev->sdma.instance[1].ring;
+                break;
+        default:
+                pr_err("Invalid engine in IB submission: %d\n", engine);
+                ret = -EINVAL;
+                goto err;
+        }
+        ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+        if (ret)
+                goto err;
+        ib = &job->ibs[0];
+        memset(ib, 0, sizeof(struct amdgpu_ib));
+        ib->gpu_addr = gpu_addr;
+        ib->ptr = ib_cmd;
+        ib->length_dw = ib_len;
+        /* This works for NO_HWS. TODO: need to handle without knowing VMID */
+        job->vmid = vmid;
+        ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+        if (ret) {
+                DRM_ERROR("amdgpu: failed to schedule IB.\n");
+                goto err_ib_sched;
+        }
+        ret = dma_fence_wait(f, false);
+err_ib_sched:
+        dma_fence_put(f);
+        amdgpu_job_free(job);
+err:
+        return ret;
+}
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+        if (adev->kfd) {
+                if ((1 << vmid) & compute_vmid_bitmap)
+                        return true;
+        }
+        return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2a519f9062ee..d7509b706b26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,13 +28,89 @@
 #include <linux/types.h>
 #include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+extern const struct kgd2kfd_calls *kgd2kfd;
 struct amdgpu_device;
+struct kfd_bo_va_list {
+        struct list_head bo_list;
+        struct amdgpu_bo_va *bo_va;
+        void *kgd_dev;
+        bool is_mapped;
+        uint64_t va;
+        uint64_t pte_flags;
+};
 struct kgd_mem {
+        struct mutex lock;
        struct amdgpu_bo *bo;
-        uint64_t gpu_addr;
+        struct list_head bo_va_list;
-        void *cpu_ptr;
+        /* protected by amdkfd_process_info.lock */
+        struct ttm_validate_buffer validate_list;
+        struct ttm_validate_buffer resv_list;
+        uint32_t domain;
+        unsigned int mapped_to_gpu_memory;
+        uint64_t va;
+        uint32_t mapping_flags;
+        struct amdkfd_process_info *process_info;
+        struct amdgpu_sync sync;
+        bool aql_queue;
+};
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+        struct dma_fence base;
+        struct mm_struct *mm;
+        spinlock_t lock;
+        char timeline_name[TASK_COMM_LEN];
+};
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+                                                       struct mm_struct *mm);
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+struct amdkfd_process_info {
+        /* List head of all VMs that belong to a KFD process */
+        struct list_head vm_list_head;
+        /* List head for all KFD BOs that belong to a KFD process. */
+        struct list_head kfd_bo_list;
+        /* Lock to protect kfd_bo_list */
+        struct mutex lock;
+        /* Number of VMs */
+        unsigned int n_vms;
+        /* Eviction Fence */
+        struct amdgpu_amdkfd_fence *eviction_fence;
+};
+/* struct amdkfd_vm -
+ * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
+ * belonging to a KFD process. All the VMs belonging to the same process point
+ * to the same amdkfd_process_info.
+ */
+struct amdkfd_vm {
+        /* Keep base as the first parameter for pointer compatibility between
+         * amdkfd_vm and amdgpu_vm.
+         */
+        struct amdgpu_vm base;
+        /* List node in amdkfd_process_info.vm_list_head*/
+        struct list_head vm_list_node;
+        struct amdgpu_device *adev;
+        /* Points to the KFD process VM info*/
+        struct amdkfd_process_info *process_info;
+        uint64_t pd_phys_addr;
 };
 int amdgpu_amdkfd_init(void);
@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+                                uint32_t vmid, uint64_t gpu_addr,
+                                uint32_t *ib_cmd, uint32_t ib_len);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
                        void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
                valid;                                                  \
        })
+/* GPUVM API */
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+                                          void **process_info,
+                                          struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+                struct kgd_dev *kgd, uint64_t va, uint64_t size,
+                void *vm, struct kgd_mem **mem,
+                uint64_t *offset, uint32_t flags);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+                struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+                struct kgd_mem *mem, void **kptr, uint64_t *size);
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+                                            struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644
index 000000000000..2c14025e5e76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ *  --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+                                                       struct mm_struct *mm)
+{
+        struct amdgpu_amdkfd_fence *fence;
+        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+        if (fence == NULL)
+                return NULL;
+        /* This reference gets released in amdkfd_fence_release */
+        mmgrab(mm);
+        fence->mm = mm;
+        get_task_comm(fence->timeline_name, current);
+        spin_lock_init(&fence->lock);
+        dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+                   context, atomic_inc_return(&fence_seq));
+        return fence;
+}
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+        struct amdgpu_amdkfd_fence *fence;
+        if (!f)
+                return NULL;
+        fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+        if (fence && f->ops == &amdkfd_fence_ops)
+                return fence;
+        return NULL;
+}
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+        return "amdgpu_amdkfd_fence";
+}
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+        struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+        return fence->timeline_name;
+}
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+        struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+        if (!fence)
+                return false;
+        if (dma_fence_is_signaled(f))
+                return true;
+        if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+                return true;
+        return false;
+}
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+        struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+        /* Unconditionally signal the fence. The process is getting
+         * terminated.
+         */
+        if (WARN_ON(!fence))
+                return; /* Not an amdgpu_amdkfd_fence */
+        mmdrop(fence->mm);
+        kfree_rcu(f, rcu);
+}
+/**
+ * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
+ *  if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+        struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+        if (!fence)
+                return false;
+        else if (fence->mm == mm)
+                return true;
+        return false;
+}
+static const struct dma_fence_ops amdkfd_fence_ops = {
+        .get_driver_name = amdkfd_fence_get_driver_name,
+        .get_timeline_name = amdkfd_fence_get_timeline_name,
+        .enable_signaling = amdkfd_fence_enable_signaling,
+        .signaled = NULL,
+        .wait = dma_fence_default_wait,
+        .release = amdkfd_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index a9e6aea0e5f8..7485c376b90e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
                                                        uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
                                        uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+                uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 /* Because of REG_GET_FIELD() being used, we put this function in the
 * asic specific file.
@@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .address_watch_get_offset = kgd_address_watch_get_offset,
        .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
        .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-        .write_vmid_invalidate_request = write_vmid_invalidate_request,
        .get_fw_version = get_fw_version,
        .set_scratch_backing_va = set_scratch_backing_va,
        .get_tile_config = get_tile_config,
        .get_cu_info = get_cu_info,
-        .get_vram_usage = amdgpu_amdkfd_get_vram_usage
+        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+        .set_vm_context_page_table_base = set_vm_context_page_table_base,
+        .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+        .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+        .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+        .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+        .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+        .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+        .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+        .invalidate_tlbs = invalidate_tlbs,
+        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+        .submit_ib = amdgpu_amdkfd_submit_ib,
 };
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+        return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
        const union amdgpu_firmware_header *hdr;
-        BUG_ON(kgd == NULL);
        switch (type) {
        case KGD_ENGINE_PFP:
                hdr = (const union amdgpu_firmware_header *)
@@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
        return hdr->common.ucode_version;
 }
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+                        uint32_t page_table_base)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+                pr_err("trying to set page table base for wrong VMID\n");
+                return;
+        }
+        WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+        int vmid;
+        unsigned int tmp;
+        for (vmid = 0; vmid < 16; vmid++) {
+                if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+                        continue;
+                tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+                if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+                        (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+                        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+                        RREG32(mmVM_INVALIDATE_RESPONSE);
+                        break;
+                }
+        }
+        return 0;
+}
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+                pr_err("non kfd vmid\n");
+                return 0;
+        }
+        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+        RREG32(mmVM_INVALIDATE_RESPONSE);
+        return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index b127259d7d85..7be453494423 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
                                uint32_t queue_id);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
                                unsigned int utimeout);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static int kgd_address_watch_disable(struct kgd_dev *kgd);
 static int kgd_address_watch_execute(struct kgd_dev *kgd,
                                        unsigned int watch_point_id,
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
                uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
                uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
                                        uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+                uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 /* Because of REG_GET_FIELD() being used, we put this function in the
 * asic specific file.
@@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
                        get_atc_vmid_pasid_mapping_pasid,
        .get_atc_vmid_pasid_mapping_valid =
                        get_atc_vmid_pasid_mapping_valid,
-        .write_vmid_invalidate_request = write_vmid_invalidate_request,
        .get_fw_version = get_fw_version,
        .set_scratch_backing_va = set_scratch_backing_va,
        .get_tile_config = get_tile_config,
        .get_cu_info = get_cu_info,
-        .get_vram_usage = amdgpu_amdkfd_get_vram_usage
+        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+        .set_vm_context_page_table_base = set_vm_context_page_table_base,
+        .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+        .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+        .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+        .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+        .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+        .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+        .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+        .invalidate_tlbs = invalidate_tlbs,
+        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+        .submit_ib = amdgpu_amdkfd_submit_ib,
 };
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
        reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-        return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+        return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
        const union amdgpu_firmware_header *hdr;
-        BUG_ON(kgd == NULL);
        switch (type) {
        case KGD_ENGINE_PFP:
                hdr = (const union amdgpu_firmware_header *)
@@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
        /* Only 12 bit in use*/
        return hdr->common.ucode_version;
 }
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+                uint32_t page_table_base)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+                pr_err("trying to set page table base for wrong VMID\n");
+                return;
+        }
+        WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+        int vmid;
+        unsigned int tmp;
+        for (vmid = 0; vmid < 16; vmid++) {
+                if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+                        continue;
+                tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+                if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+                        (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+                        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+                        RREG32(mmVM_INVALIDATE_RESPONSE);
+                        break;
+                }
+        }
+        return 0;
+}
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+        struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+        if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+                pr_err("non kfd vmid %d\n", vmid);
+                return -EINVAL;
+        }
+        WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+        RREG32(mmVM_INVALIDATE_RESPONSE);
+        return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
new file mode 100644
index 000000000000..e0371a9967b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -0,0 +1,1506 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+#include <linux/list.h>
+#include <drm/drmP.h>
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+/* Special VM and GART address alignment needed for VI pre-Fiji due to
+ * a HW bug.
+ */
+#define VI_BO_SIZE_ALIGN (0x8000)
+/* Impose limit on how much memory KFD can use */
+static struct {
+        uint64_t max_system_mem_limit;
+        int64_t system_mem_used;
+        spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+/* Struct used for amdgpu_amdkfd_bo_validate */
+struct amdgpu_vm_parser {
+        uint32_t        domain;
+        bool            wait;
+};
+static const char * const domain_bit_to_string[] = {
+                "CPU",
+                "GTT",
+                "VRAM",
+                "GDS",
+                "GWS",
+                "OA"
+};
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+        return (struct amdgpu_device *)kgd;
+}
+static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+                struct kgd_mem *mem)
+{
+        struct kfd_bo_va_list *entry;
+        list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+                if (entry->bo_va->base.vm == avm)
+                        return false;
+        return true;
+}
+/* Set memory usage limits. Current, limits are
+ *  System (kernel) memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+        struct sysinfo si;
+        uint64_t mem;
+        si_meminfo(&si);
+        mem = si.totalram - si.totalhigh;
+        mem *= si.mem_unit;
+        spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+        kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
+        pr_debug("Kernel memory limit %lluM\n",
+                (kfd_mem_limit.max_system_mem_limit >> 20));
+}
+static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+                                              uint64_t size, u32 domain)
+{
+        size_t acc_size;
+        int ret = 0;
+        acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+                                       sizeof(struct amdgpu_bo));
+        spin_lock(&kfd_mem_limit.mem_limit_lock);
+        if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+                if (kfd_mem_limit.system_mem_used + (acc_size + size) >
+                        kfd_mem_limit.max_system_mem_limit) {
+                        ret = -ENOMEM;
+                        goto err_no_mem;
+                }
+                kfd_mem_limit.system_mem_used += (acc_size + size);
+        }
+err_no_mem:
+        spin_unlock(&kfd_mem_limit.mem_limit_lock);
+        return ret;
+}
+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+                                       uint64_t size, u32 domain)
+{
+        size_t acc_size;
+        acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+                                       sizeof(struct amdgpu_bo));
+        spin_lock(&kfd_mem_limit.mem_limit_lock);
+        if (domain == AMDGPU_GEM_DOMAIN_GTT)
+                kfd_mem_limit.system_mem_used -= (acc_size + size);
+        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+                  "kfd system memory accounting unbalanced");
+        spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+        spin_lock(&kfd_mem_limit.mem_limit_lock);
+        if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+                kfd_mem_limit.system_mem_used -=
+                        (bo->tbo.acc_size + amdgpu_bo_size(bo));
+        }
+        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+                  "kfd system memory accounting unbalanced");
+        spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ *  is present in the shared list.
+ * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
+ *  from BO's reservation object shared list.
+ * @ef_count: [OUT] Number of fences in ef_list.
+ *
+ * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
+ *  called to restore the eviction fences and to avoid memory leak. This is
+ *  useful for shared BOs.
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+                                        struct amdgpu_amdkfd_fence *ef,
+                                        struct amdgpu_amdkfd_fence ***ef_list,
+                                        unsigned int *ef_count)
+{
+        struct reservation_object_list *fobj;
+        struct reservation_object *resv;
+        unsigned int i = 0, j = 0, k = 0, shared_count;
+        unsigned int count = 0;
+        struct amdgpu_amdkfd_fence **fence_list;
+        if (!ef && !ef_list)
+                return -EINVAL;
+        if (ef_list) {
+                *ef_list = NULL;
+                *ef_count = 0;
+        }
+        resv = bo->tbo.resv;
+        fobj = reservation_object_get_list(resv);
+        if (!fobj)
+                return 0;
+        preempt_disable();
+        write_seqcount_begin(&resv->seq);
+        /* Go through all the shared fences in the resevation object. If
+         * ef is specified and it exists in the list, remove it and reduce the
+         * count. If ef is not specified, then get the count of eviction fences
+         * present.
+         */
+        shared_count = fobj->shared_count;
+        for (i = 0; i < shared_count; ++i) {
+                struct dma_fence *f;
+                f = rcu_dereference_protected(fobj->shared[i],
+                                              reservation_object_held(resv));
+                if (ef) {
+                        if (f->context == ef->base.context) {
+                                dma_fence_put(f);
+                                fobj->shared_count--;
+                        } else {
+                                RCU_INIT_POINTER(fobj->shared[j++], f);
+                        }
+                } else if (to_amdgpu_amdkfd_fence(f))
+                        count++;
+        }
+        write_seqcount_end(&resv->seq);
+        preempt_enable();
+        if (ef || !count)
+                return 0;
+        /* Alloc memory for count number of eviction fence pointers. Fill the
+         * ef_list array and ef_count
+         */
+        fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
+                             GFP_KERNEL);
+        if (!fence_list)
+                return -ENOMEM;
+        preempt_disable();
+        write_seqcount_begin(&resv->seq);
+        j = 0;
+        for (i = 0; i < shared_count; ++i) {
+                struct dma_fence *f;
+                struct amdgpu_amdkfd_fence *efence;
+                f = rcu_dereference_protected(fobj->shared[i],
+                        reservation_object_held(resv));
+                efence = to_amdgpu_amdkfd_fence(f);
+                if (efence) {
+                        fence_list[k++] = efence;
+                        fobj->shared_count--;
+                } else {
+                        RCU_INIT_POINTER(fobj->shared[j++], f);
+                }
+        }
+        write_seqcount_end(&resv->seq);
+        preempt_enable();
+        *ef_list = fence_list;
+        *ef_count = k;
+        return 0;
+}
+/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Add eviction fences to this BO
+ * @ef_list: [IN] List of eviction fences to be added
+ * @ef_count: [IN] Number of fences in ef_list.
+ *
+ * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
+ *  function.
+ */
+static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
+                                struct amdgpu_amdkfd_fence **ef_list,
+                                unsigned int ef_count)
+{
+        int i;
+        if (!ef_list || !ef_count)
+                return;
+        for (i = 0; i < ef_count; i++) {
+                amdgpu_bo_fence(bo, &ef_list[i]->base, true);
+                /* Re-adding the fence takes an additional reference. Drop that
+                 * reference.
+                 */
+                dma_fence_put(&ef_list[i]->base);
+        }
+        kfree(ef_list);
+}
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+                                     bool wait)
+{
+        struct ttm_operation_ctx ctx = { false, false };
+        int ret;
+        if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+                 "Called with userptr BO"))
+                return -EINVAL;
+        amdgpu_ttm_placement_from_domain(bo, domain);
+        ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+        if (ret)
+                goto validate_fail;
+        if (wait) {
+                struct amdgpu_amdkfd_fence **ef_list;
+                unsigned int ef_count;
+                ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
+                                                          &ef_count);
+                if (ret)
+                        goto validate_fail;
+                ttm_bo_wait(&bo->tbo, false, false);
+                amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
+        }
+validate_fail:
+        return ret;
+}
+static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+{
+        struct amdgpu_vm_parser *p = param;
+        return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
+}
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
+{
+        struct amdgpu_bo *pd = vm->base.root.base.bo;
+        struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+        struct amdgpu_vm_parser param;
+        uint64_t addr, flags = AMDGPU_PTE_VALID;
+        int ret;
+        param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+        param.wait = false;
+        ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
+                                        &param);
+        if (ret) {
+                pr_err("amdgpu: failed to validate PT BOs\n");
+                return ret;
+        }
+        ret = amdgpu_amdkfd_validate(&param, pd);
+        if (ret) {
+                pr_err("amdgpu: failed to validate PD\n");
+                return ret;
+        }
+        addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
+        amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+        vm->pd_phys_addr = addr;
+        if (vm->base.use_cpu_for_update) {
+                ret = amdgpu_bo_kmap(pd, NULL);
+                if (ret) {
+                        pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+                        return ret;
+                }
+        }
+        return 0;
+}
+static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+                         struct dma_fence *f)
+{
+        int ret = amdgpu_sync_fence(adev, sync, f, false);
+        /* Sync objects can't handle multiple GPUs (contexts) updating
+         * sync->last_vm_update. Fortunately we don't need it for
+         * KFD's purposes, so we can just drop that fence.
+         */
+        if (sync->last_vm_update) {
+                dma_fence_put(sync->last_vm_update);
+                sync->last_vm_update = NULL;
+        }
+        return ret;
+}
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+        struct amdgpu_bo *pd = vm->root.base.bo;
+        struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+        int ret;
+        ret = amdgpu_vm_update_directories(adev, vm);
+        if (ret)
+                return ret;
+        return sync_vm_fence(adev, sync, vm->last_update);
+}
+/* add_bo_to_vm - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a.  Validate new page tables and directories
+ */
+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+                struct amdgpu_vm *avm, bool is_aql,
+                struct kfd_bo_va_list **p_bo_va_entry)
+{
+        int ret;
+        struct kfd_bo_va_list *bo_va_entry;
+        struct amdkfd_vm *kvm = container_of(avm,
+                                             struct amdkfd_vm, base);
+        struct amdgpu_bo *pd = avm->root.base.bo;
+        struct amdgpu_bo *bo = mem->bo;
+        uint64_t va = mem->va;
+        struct list_head *list_bo_va = &mem->bo_va_list;
+        unsigned long bo_size = bo->tbo.mem.size;
+        if (!va) {
+                pr_err("Invalid VA when adding BO to VM\n");
+                return -EINVAL;
+        }
+        if (is_aql)
+                va += bo_size;
+        bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
+        if (!bo_va_entry)
+                return -ENOMEM;
+        pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+                        va + bo_size, avm);
+        /* Add BO to VM internal data structures*/
+        bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
+        if (!bo_va_entry->bo_va) {
+                ret = -EINVAL;
+                pr_err("Failed to add BO object to VM. ret == %d\n",
+                                ret);
+                goto err_vmadd;
+        }
+        bo_va_entry->va = va;
+        bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
+                                                         mem->mapping_flags);
+        bo_va_entry->kgd_dev = (void *)adev;
+        list_add(&bo_va_entry->bo_list, list_bo_va);
+        if (p_bo_va_entry)
+                *p_bo_va_entry = bo_va_entry;
+        /* Allocate new page tables if needed and validate
+         * them. Clearing of new page tables and validate need to wait
+         * on move fences. We don't want that to trigger the eviction
+         * fence, so remove it temporarily.
+         */
+        amdgpu_amdkfd_remove_eviction_fence(pd,
+                                        kvm->process_info->eviction_fence,
+                                        NULL, NULL);
+        ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
+        if (ret) {
+                pr_err("Failed to allocate pts, err=%d\n", ret);
+                goto err_alloc_pts;
+        }
+        ret = vm_validate_pt_pd_bos(kvm);
+        if (ret) {
+                pr_err("validate_pt_pd_bos() failed\n");
+                goto err_alloc_pts;
+        }
+        /* Add the eviction fence back */
+        amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+        return 0;
+err_alloc_pts:
+        amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+        amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
+        list_del(&bo_va_entry->bo_list);
+err_vmadd:
+        kfree(bo_va_entry);
+        return ret;
+}
+static void remove_bo_from_vm(struct amdgpu_device *adev,
+                struct kfd_bo_va_list *entry, unsigned long size)
+{
+        pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
+                        entry->va,
+                        entry->va + size, entry);
+        amdgpu_vm_bo_rmv(adev, entry->bo_va);
+        list_del(&entry->bo_list);
+        kfree(entry);
+}
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+                                struct amdkfd_process_info *process_info)
+{
+        struct ttm_validate_buffer *entry = &mem->validate_list;
+        struct amdgpu_bo *bo = mem->bo;
+        INIT_LIST_HEAD(&entry->head);
+        entry->shared = true;
+        entry->bo = &bo->tbo;
+        mutex_lock(&process_info->lock);
+        list_add_tail(&entry->head, &process_info->kfd_bo_list);
+        mutex_unlock(&process_info->lock);
+}
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+        struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
+        unsigned int n_vms;                 /* Number of VMs reserved       */
+        struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
+        struct ww_acquire_ctx ticket;       /* Reservation ticket           */
+        struct list_head list, duplicates;  /* BO lists                     */
+        struct amdgpu_sync *sync;           /* Pointer to sync object       */
+        bool reserved;                      /* Whether BOs are reserved     */
+};
+enum bo_vm_match {
+        BO_VM_NOT_MAPPED = 0,   /* Match VMs where a BO is not mapped */
+        BO_VM_MAPPED,           /* Match VMs where a BO is mapped     */
+        BO_VM_ALL,              /* Match all VMs a BO was added to    */
+};
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+                              struct amdgpu_vm *vm,
+                              struct bo_vm_reservation_context *ctx)
+{
+        struct amdgpu_bo *bo = mem->bo;
+        int ret;
+        WARN_ON(!vm);
+        ctx->reserved = false;
+        ctx->n_vms = 1;
+        ctx->sync = &mem->sync;
+        INIT_LIST_HEAD(&ctx->list);
+        INIT_LIST_HEAD(&ctx->duplicates);
+        ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
+        if (!ctx->vm_pd)
+                return -ENOMEM;
+        ctx->kfd_bo.robj = bo;
+        ctx->kfd_bo.priority = 0;
+        ctx->kfd_bo.tv.bo = &bo->tbo;
+        ctx->kfd_bo.tv.shared = true;
+        ctx->kfd_bo.user_pages = NULL;
+        list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+        amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
+        ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+                                     false, &ctx->duplicates);
+        if (!ret)
+                ctx->reserved = true;
+        else {
+                pr_err("Failed to reserve buffers in ttm\n");
+                kfree(ctx->vm_pd);
+                ctx->vm_pd = NULL;
+        }
+        return ret;
+}
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+                                struct amdgpu_vm *vm, enum bo_vm_match map_type,
+                                struct bo_vm_reservation_context *ctx)
+{
+        struct amdgpu_bo *bo = mem->bo;
+        struct kfd_bo_va_list *entry;
+        unsigned int i;
+        int ret;
+        ctx->reserved = false;
+        ctx->n_vms = 0;
+        ctx->vm_pd = NULL;
+        ctx->sync = &mem->sync;
+        INIT_LIST_HEAD(&ctx->list);
+        INIT_LIST_HEAD(&ctx->duplicates);
+        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+                if ((vm && vm != entry->bo_va->base.vm) ||
+                        (entry->is_mapped != map_type
+                        && map_type != BO_VM_ALL))
+                        continue;
+                ctx->n_vms++;
+        }
+        if (ctx->n_vms != 0) {
+                ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
+                                     GFP_KERNEL);
+                if (!ctx->vm_pd)
+                        return -ENOMEM;
+        }
+        ctx->kfd_bo.robj = bo;
+        ctx->kfd_bo.priority = 0;
+        ctx->kfd_bo.tv.bo = &bo->tbo;
+        ctx->kfd_bo.tv.shared = true;
+        ctx->kfd_bo.user_pages = NULL;
+        list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+        i = 0;
+        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+                if ((vm && vm != entry->bo_va->base.vm) ||
+                        (entry->is_mapped != map_type
+                        && map_type != BO_VM_ALL))
+                        continue;
+                amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
+                                &ctx->vm_pd[i]);
+                i++;
+        }
+        ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+                                     false, &ctx->duplicates);
+        if (!ret)
+                ctx->reserved = true;
+        else
+                pr_err("Failed to reserve buffers in ttm.\n");
+        if (ret) {
+                kfree(ctx->vm_pd);
+                ctx->vm_pd = NULL;
+        }
+        return ret;
+}
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+                                 bool wait, bool intr)
+{
+        int ret = 0;
+        if (wait)
+                ret = amdgpu_sync_wait(ctx->sync, intr);
+        if (ctx->reserved)
+                ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
+        kfree(ctx->vm_pd);
+        ctx->sync = NULL;
+        ctx->reserved = false;
+        ctx->vm_pd = NULL;
+        return ret;
+}
+static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+                                struct kfd_bo_va_list *entry,
+                                struct amdgpu_sync *sync)
+{
+        struct amdgpu_bo_va *bo_va = entry->bo_va;
+        struct amdgpu_vm *vm = bo_va->base.vm;
+        struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
+        struct amdgpu_bo *pd = vm->root.base.bo;
+        /* Remove eviction fence from PD (and thereby from PTs too as
+         * they share the resv. object). Otherwise during PT update
+         * job (see amdgpu_vm_bo_update_mapping), eviction fence would
+         * get added to job->sync object and job execution would
+         * trigger the eviction fence.
+         */
+        amdgpu_amdkfd_remove_eviction_fence(pd,
+                                            kvm->process_info->eviction_fence,
+                                            NULL, NULL);
+        amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+        amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+        /* Add the eviction fence back */
+        amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
+        sync_vm_fence(adev, sync, bo_va->last_pt_update);
+        return 0;
+}
+static int update_gpuvm_pte(struct amdgpu_device *adev,
+                struct kfd_bo_va_list *entry,
+                struct amdgpu_sync *sync)
+{
+        int ret;
+        struct amdgpu_vm *vm;
+        struct amdgpu_bo_va *bo_va;
+        struct amdgpu_bo *bo;
+        bo_va = entry->bo_va;
+        vm = bo_va->base.vm;
+        bo = bo_va->base.bo;
+        /* Update the page tables  */
+        ret = amdgpu_vm_bo_update(adev, bo_va, false);
+        if (ret) {
+                pr_err("amdgpu_vm_bo_update failed\n");
+                return ret;
+        }
+        return sync_vm_fence(adev, sync, bo_va->last_pt_update);
+}
+static int map_bo_to_gpuvm(struct amdgpu_device *adev,
+                struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+{
+        int ret;
+        /* Set virtual address for the allocation */
+        ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+                               amdgpu_bo_size(entry->bo_va->base.bo),
+                               entry->pte_flags);
+        if (ret) {
+                pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+                                entry->va, ret);
+                return ret;
+        }
+        ret = update_gpuvm_pte(adev, entry, sync);
+        if (ret) {
+                pr_err("update_gpuvm_pte() failed\n");
+                goto update_gpuvm_pte_failed;
+        }
+        return 0;
+update_gpuvm_pte_failed:
+        unmap_bo_from_gpuvm(adev, entry, sync);
+        return ret;
+}
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+        struct amdkfd_vm *peer_vm;
+        int ret;
+        list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                            vm_list_node) {
+                ret = vm_validate_pt_pd_bos(peer_vm);
+                if (ret)
+                        return ret;
+        }
+        return 0;
+}
+static int process_update_pds(struct amdkfd_process_info *process_info,
+                              struct amdgpu_sync *sync)
+{
+        struct amdkfd_vm *peer_vm;
+        int ret;
+        list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                            vm_list_node) {
+                ret = vm_update_pds(&peer_vm->base, sync);
+                if (ret)
+                        return ret;
+        }
+        return 0;
+}
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+                                          void **process_info,
+                                          struct dma_fence **ef)
+{
+        int ret;
+        struct amdkfd_vm *new_vm;
+        struct amdkfd_process_info *info;
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+        if (!new_vm)
+                return -ENOMEM;
+        /* Initialize the VM context, allocate the page directory and zero it */
+        ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+        if (ret) {
+                pr_err("Failed init vm ret %d\n", ret);
+                goto vm_init_fail;
+        }
+        new_vm->adev = adev;
+        if (!*process_info) {
+                info = kzalloc(sizeof(*info), GFP_KERNEL);
+                if (!info) {
+                        ret = -ENOMEM;
+                        goto alloc_process_info_fail;
+                }
+                mutex_init(&info->lock);
+                INIT_LIST_HEAD(&info->vm_list_head);
+                INIT_LIST_HEAD(&info->kfd_bo_list);
+                info->eviction_fence =
+                        amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+                                                   current->mm);
+                if (!info->eviction_fence) {
+                        pr_err("Failed to create eviction fence\n");
+                        goto create_evict_fence_fail;
+                }
+                *process_info = info;
+                *ef = dma_fence_get(&info->eviction_fence->base);
+        }
+        new_vm->process_info = *process_info;
+        mutex_lock(&new_vm->process_info->lock);
+        list_add_tail(&new_vm->vm_list_node,
+                        &(new_vm->process_info->vm_list_head));
+        new_vm->process_info->n_vms++;
+        mutex_unlock(&new_vm->process_info->lock);
+        *vm = (void *) new_vm;
+        pr_debug("Created process vm %p\n", *vm);
+        return ret;
+create_evict_fence_fail:
+        mutex_destroy(&info->lock);
+        kfree(info);
+alloc_process_info_fail:
+        amdgpu_vm_fini(adev, &new_vm->base);
+vm_init_fail:
+        kfree(new_vm);
+        return ret;
+}
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
+        struct amdgpu_vm *avm = &kfd_vm->base;
+        struct amdgpu_bo *pd;
+        struct amdkfd_process_info *process_info;
+        if (WARN_ON(!kgd || !vm))
+                return;
+        pr_debug("Destroying process vm %p\n", vm);
+        /* Release eviction fence from PD */
+        pd = avm->root.base.bo;
+        amdgpu_bo_reserve(pd, false);
+        amdgpu_bo_fence(pd, NULL, false);
+        amdgpu_bo_unreserve(pd);
+        process_info = kfd_vm->process_info;
+        mutex_lock(&process_info->lock);
+        process_info->n_vms--;
+        list_del(&kfd_vm->vm_list_node);
+        mutex_unlock(&process_info->lock);
+        /* Release per-process resources */
+        if (!process_info->n_vms) {
+                WARN_ON(!list_empty(&process_info->kfd_bo_list));
+                dma_fence_put(&process_info->eviction_fence->base);
+                mutex_destroy(&process_info->lock);
+                kfree(process_info);
+        }
+        /* Release the VM context */
+        amdgpu_vm_fini(adev, avm);
+        kfree(vm);
+}
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+{
+        struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
+        return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+}
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+                struct kgd_dev *kgd, uint64_t va, uint64_t size,
+                void *vm, struct kgd_mem **mem,
+                uint64_t *offset, uint32_t flags)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+        struct amdgpu_bo *bo;
+        int byte_align;
+        u32 alloc_domain;
+        u64 alloc_flags;
+        uint32_t mapping_flags;
+        int ret;
+        /*
+         * Check on which domain to allocate BO
+         */
+        if (flags & ALLOC_MEM_FLAGS_VRAM) {
+                alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+                alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
+                alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
+                        AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+        } else if (flags & ALLOC_MEM_FLAGS_GTT) {
+                alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+                alloc_flags = 0;
+        } else {
+                return -EINVAL;
+        }
+        *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+        if (!*mem)
+                return -ENOMEM;
+        INIT_LIST_HEAD(&(*mem)->bo_va_list);
+        mutex_init(&(*mem)->lock);
+        (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+        /* Workaround for AQL queue wraparound bug. Map the same
+         * memory twice. That means we only actually allocate half
+         * the memory.
+         */
+        if ((*mem)->aql_queue)
+                size = size >> 1;
+        /* Workaround for TLB bug on older VI chips */
+        byte_align = (adev->family == AMDGPU_FAMILY_VI &&
+                        adev->asic_type != CHIP_FIJI &&
+                        adev->asic_type != CHIP_POLARIS10 &&
+                        adev->asic_type != CHIP_POLARIS11) ?
+                        VI_BO_SIZE_ALIGN : 1;
+        mapping_flags = AMDGPU_VM_PAGE_READABLE;
+        if (flags & ALLOC_MEM_FLAGS_WRITABLE)
+                mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+        if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+                mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+        if (flags & ALLOC_MEM_FLAGS_COHERENT)
+                mapping_flags |= AMDGPU_VM_MTYPE_UC;
+        else
+                mapping_flags |= AMDGPU_VM_MTYPE_NC;
+        (*mem)->mapping_flags = mapping_flags;
+        amdgpu_sync_create(&(*mem)->sync);
+        ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+        if (ret) {
+                pr_debug("Insufficient system memory\n");
+                goto err_reserve_system_mem;
+        }
+        pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
+                        va, size, domain_string(alloc_domain));
+        ret = amdgpu_bo_create(adev, size, byte_align, false,
+                                alloc_domain, alloc_flags, NULL, NULL, &bo);
+        if (ret) {
+                pr_debug("Failed to create BO on domain %s. ret %d\n",
+                                domain_string(alloc_domain), ret);
+                goto err_bo_create;
+        }
+        bo->kfd_bo = *mem;
+        (*mem)->bo = bo;
+        (*mem)->va = va;
+        (*mem)->domain = alloc_domain;
+        (*mem)->mapped_to_gpu_memory = 0;
+        (*mem)->process_info = kfd_vm->process_info;
+        add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info);
+        if (offset)
+                *offset = amdgpu_bo_mmap_offset(bo);
+        return 0;
+err_bo_create:
+        unreserve_system_mem_limit(adev, size, alloc_domain);
+err_reserve_system_mem:
+        mutex_destroy(&(*mem)->lock);
+        kfree(*mem);
+        return ret;
+}
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+        struct amdkfd_process_info *process_info = mem->process_info;
+        unsigned long bo_size = mem->bo->tbo.mem.size;
+        struct kfd_bo_va_list *entry, *tmp;
+        struct bo_vm_reservation_context ctx;
+        struct ttm_validate_buffer *bo_list_entry;
+        int ret;
+        mutex_lock(&mem->lock);
+        if (mem->mapped_to_gpu_memory > 0) {
+                pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+                                mem->va, bo_size);
+                mutex_unlock(&mem->lock);
+                return -EBUSY;
+        }
+        mutex_unlock(&mem->lock);
+        /* lock is not needed after this, since mem is unused and will
+         * be freed anyway
+         */
+        /* Make sure restore workers don't access the BO any more */
+        bo_list_entry = &mem->validate_list;
+        mutex_lock(&process_info->lock);
+        list_del(&bo_list_entry->head);
+        mutex_unlock(&process_info->lock);
+        ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+        if (unlikely(ret))
+                return ret;
+        /* The eviction fence should be removed by the last unmap.
+         * TODO: Log an error condition if the bo still has the eviction fence
+         * attached
+         */
+        amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+                                        process_info->eviction_fence,
+                                        NULL, NULL);
+        pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+                mem->va + bo_size * (1 + mem->aql_queue));
+        /* Remove from VM internal data structures */
+        list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
+                remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
+                                entry, bo_size);
+        ret = unreserve_bo_and_vms(&ctx, false, false);
+        /* Free the sync object */
+        amdgpu_sync_free(&mem->sync);
+        /* Free the BO*/
+        amdgpu_bo_unref(&mem->bo);
+        mutex_destroy(&mem->lock);
+        kfree(mem);
+        return ret;
+}
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+        int ret;
+        struct amdgpu_bo *bo;
+        uint32_t domain;
+        struct kfd_bo_va_list *entry;
+        struct bo_vm_reservation_context ctx;
+        struct kfd_bo_va_list *bo_va_entry = NULL;
+        struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+        unsigned long bo_size;
+        /* Make sure restore is not running concurrently.
+         */
+        mutex_lock(&mem->process_info->lock);
+        mutex_lock(&mem->lock);
+        bo = mem->bo;
+        if (!bo) {
+                pr_err("Invalid BO when mapping memory to GPU\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        domain = mem->domain;
+        bo_size = bo->tbo.mem.size;
+        pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+                        mem->va,
+                        mem->va + bo_size * (1 + mem->aql_queue),
+                        vm, domain_string(domain));
+        ret = reserve_bo_and_vm(mem, vm, &ctx);
+        if (unlikely(ret))
+                goto out;
+        if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
+                ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
+                                &bo_va_entry);
+                if (ret)
+                        goto add_bo_to_vm_failed;
+                if (mem->aql_queue) {
+                        ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
+                                        true, &bo_va_entry_aql);
+                        if (ret)
+                                goto add_bo_to_vm_failed_aql;
+                }
+        } else {
+                ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+                if (unlikely(ret))
+                        goto add_bo_to_vm_failed;
+        }
+        if (mem->mapped_to_gpu_memory == 0) {
+                /* Validate BO only once. The eviction fence gets added to BO
+                 * the first time it is mapped. Validate will wait for all
+                 * background evictions to complete.
+                 */
+                ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+                if (ret) {
+                        pr_debug("Validate failed\n");
+                        goto map_bo_to_gpuvm_failed;
+                }
+        }
+        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+                if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+                        pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+                                        entry->va, entry->va + bo_size,
+                                        entry);
+                        ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+                        if (ret) {
+                                pr_err("Failed to map radeon bo to gpuvm\n");
+                                goto map_bo_to_gpuvm_failed;
+                        }
+                        ret = vm_update_pds(vm, ctx.sync);
+                        if (ret) {
+                                pr_err("Failed to update page directories\n");
+                                goto map_bo_to_gpuvm_failed;
+                        }
+                        entry->is_mapped = true;
+                        mem->mapped_to_gpu_memory++;
+                        pr_debug("\t INC mapping count %d\n",
+                                        mem->mapped_to_gpu_memory);
+                }
+        }
+        if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
+                amdgpu_bo_fence(bo,
+                                &kfd_vm->process_info->eviction_fence->base,
+                                true);
+        ret = unreserve_bo_and_vms(&ctx, false, false);
+        goto out;
+map_bo_to_gpuvm_failed:
+        if (bo_va_entry_aql)
+                remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
+add_bo_to_vm_failed_aql:
+        if (bo_va_entry)
+                remove_bo_from_vm(adev, bo_va_entry, bo_size);
+add_bo_to_vm_failed:
+        unreserve_bo_and_vms(&ctx, false, false);
+out:
+        mutex_unlock(&mem->process_info->lock);
+        mutex_unlock(&mem->lock);
+        return ret;
+}
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+                struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+        struct amdgpu_device *adev = get_amdgpu_device(kgd);
+        struct amdkfd_process_info *process_info =
+                ((struct amdkfd_vm *)vm)->process_info;
+        unsigned long bo_size = mem->bo->tbo.mem.size;
+        struct kfd_bo_va_list *entry;
+        struct bo_vm_reservation_context ctx;
+        int ret;
+        mutex_lock(&mem->lock);
+        ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+        if (unlikely(ret))
+                goto out;
+        /* If no VMs were reserved, it means the BO wasn't actually mapped */
+        if (ctx.n_vms == 0) {
+                ret = -EINVAL;
+                goto unreserve_out;
+        }
+        ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+        if (unlikely(ret))
+                goto unreserve_out;
+        pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+                mem->va,
+                mem->va + bo_size * (1 + mem->aql_queue),
+                vm);
+        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+                if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+                        pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+                                        entry->va,
+                                        entry->va + bo_size,
+                                        entry);
+                        ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+                        if (ret == 0) {
+                                entry->is_mapped = false;
+                        } else {
+                                pr_err("failed to unmap VA 0x%llx\n",
+                                                mem->va);
+                                goto unreserve_out;
+                        }
+                        mem->mapped_to_gpu_memory--;
+                        pr_debug("\t DEC mapping count %d\n",
+                                        mem->mapped_to_gpu_memory);
+                }
+        }
+        /* If BO is unmapped from all VMs, unfence it. It can be evicted if
+         * required.
+         */
+        if (mem->mapped_to_gpu_memory == 0 &&
+            !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
+                amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+                                                process_info->eviction_fence,
+                                                    NULL, NULL);
+unreserve_out:
+        unreserve_bo_and_vms(&ctx, false, false);
+out:
+        mutex_unlock(&mem->lock);
+        return ret;
+}
+int amdgpu_amdkfd_gpuvm_sync_memory(
+                struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+{
+        struct amdgpu_sync sync;
+        int ret;
+        amdgpu_sync_create(&sync);
+        mutex_lock(&mem->lock);
+        amdgpu_sync_clone(&mem->sync, &sync);
+        mutex_unlock(&mem->lock);
+        ret = amdgpu_sync_wait(&sync, intr);
+        amdgpu_sync_free(&sync);
+        return ret;
+}
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+                struct kgd_mem *mem, void **kptr, uint64_t *size)
+{
+        int ret;
+        struct amdgpu_bo *bo = mem->bo;
+        if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+                pr_err("userptr can't be mapped to kernel\n");
+                return -EINVAL;
+        }
+        /* delete kgd_mem from kfd_bo_list to avoid re-validating
+         * this BO in BO's restoring after eviction.
+         */
+        mutex_lock(&mem->process_info->lock);
+        ret = amdgpu_bo_reserve(bo, true);
+        if (ret) {
+                pr_err("Failed to reserve bo. ret %d\n", ret);
+                goto bo_reserve_failed;
+        }
+        ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+        if (ret) {
+                pr_err("Failed to pin bo. ret %d\n", ret);
+                goto pin_failed;
+        }
+        ret = amdgpu_bo_kmap(bo, kptr);
+        if (ret) {
+                pr_err("Failed to map bo to kernel. ret %d\n", ret);
+                goto kmap_failed;
+        }
+        amdgpu_amdkfd_remove_eviction_fence(
+                bo, mem->process_info->eviction_fence, NULL, NULL);
+        list_del_init(&mem->validate_list.head);
+        if (size)
+                *size = amdgpu_bo_size(bo);
+        amdgpu_bo_unreserve(bo);
+        mutex_unlock(&mem->process_info->lock);
+        return 0;
+kmap_failed:
+        amdgpu_bo_unpin(bo);
+pin_failed:
+        amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+        mutex_unlock(&mem->process_info->lock);
+        return ret;
+}
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ *   KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1.  Release old eviction fence and create new one
+ * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ *     BOs that need to be reserved.
+ * 4.  Reserve all the BOs
+ * 5.  Validate of PD and PT BOs.
+ * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7.  Add fence to all PD and PT BOs.
+ * 8.  Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+{
+        struct amdgpu_bo_list_entry *pd_bo_list;
+        struct amdkfd_process_info *process_info = info;
+        struct amdkfd_vm *peer_vm;
+        struct kgd_mem *mem;
+        struct bo_vm_reservation_context ctx;
+        struct amdgpu_amdkfd_fence *new_fence;
+        int ret = 0, i;
+        struct list_head duplicate_save;
+        struct amdgpu_sync sync_obj;
+        INIT_LIST_HEAD(&duplicate_save);
+        INIT_LIST_HEAD(&ctx.list);
+        INIT_LIST_HEAD(&ctx.duplicates);
+        pd_bo_list = kcalloc(process_info->n_vms,
+                             sizeof(struct amdgpu_bo_list_entry),
+                             GFP_KERNEL);
+        if (!pd_bo_list)
+                return -ENOMEM;
+        i = 0;
+        mutex_lock(&process_info->lock);
+        list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                        vm_list_node)
+                amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
+                                    &pd_bo_list[i++]);
+        /* Reserve all BOs and page tables/directory. Add all BOs from
+         * kfd_bo_list to ctx.list
+         */
+        list_for_each_entry(mem, &process_info->kfd_bo_list,
+                            validate_list.head) {
+                list_add_tail(&mem->resv_list.head, &ctx.list);
+                mem->resv_list.bo = mem->validate_list.bo;
+                mem->resv_list.shared = mem->validate_list.shared;
+        }
+        ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
+                                     false, &duplicate_save);
+        if (ret) {
+                pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
+                goto ttm_reserve_fail;
+        }
+        amdgpu_sync_create(&sync_obj);
+        /* Validate PDs and PTs */
+        ret = process_validate_vms(process_info);
+        if (ret)
+                goto validate_map_fail;
+        /* Wait for PD/PTs validate to finish */
+        /* FIXME: I think this isn't needed */
+        list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                            vm_list_node) {
+                struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+                ttm_bo_wait(&bo->tbo, false, false);
+        }
+        /* Validate BOs and map them to GPUVM (update VM page tables). */
+        list_for_each_entry(mem, &process_info->kfd_bo_list,
+                            validate_list.head) {
+                struct amdgpu_bo *bo = mem->bo;
+                uint32_t domain = mem->domain;
+                struct kfd_bo_va_list *bo_va_entry;
+                ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+                if (ret) {
+                        pr_debug("Memory eviction: Validate BOs failed. Try again\n");
+                        goto validate_map_fail;
+                }
+                list_for_each_entry(bo_va_entry, &mem->bo_va_list,
+                                    bo_list) {
+                        ret = update_gpuvm_pte((struct amdgpu_device *)
+                                              bo_va_entry->kgd_dev,
+                                              bo_va_entry,
+                                              &sync_obj);
+                        if (ret) {
+                                pr_debug("Memory eviction: update PTE failed. Try again\n");
+                                goto validate_map_fail;
+                        }
+                }
+        }
+        /* Update page directories */
+        ret = process_update_pds(process_info, &sync_obj);
+        if (ret) {
+                pr_debug("Memory eviction: update PDs failed. Try again\n");
+                goto validate_map_fail;
+        }
+        amdgpu_sync_wait(&sync_obj, false);
+        /* Release old eviction fence and create new one, because fence only
+         * goes from unsignaled to signaled, fence cannot be reused.
+         * Use context and mm from the old fence.
+         */
+        new_fence = amdgpu_amdkfd_fence_create(
+                                process_info->eviction_fence->base.context,
+                                process_info->eviction_fence->mm);
+        if (!new_fence) {
+                pr_err("Failed to create eviction fence\n");
+                ret = -ENOMEM;
+                goto validate_map_fail;
+        }
+        dma_fence_put(&process_info->eviction_fence->base);
+        process_info->eviction_fence = new_fence;
+        *ef = dma_fence_get(&new_fence->base);
+        /* Wait for validate to finish and attach new eviction fence */
+        list_for_each_entry(mem, &process_info->kfd_bo_list,
+                validate_list.head)
+                ttm_bo_wait(&mem->bo->tbo, false, false);
+        list_for_each_entry(mem, &process_info->kfd_bo_list,
+                validate_list.head)
+                amdgpu_bo_fence(mem->bo,
+                        &process_info->eviction_fence->base, true);
+        /* Attach eviction fence to PD / PT BOs */
+        list_for_each_entry(peer_vm, &process_info->vm_list_head,
+                            vm_list_node) {
+                struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+                amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+        }
+validate_map_fail:
+        ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
+        amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+        mutex_unlock(&process_info->lock);
+        kfree(pd_bo_list);
+        return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 216799ccb545..9157745fce14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -36,6 +36,7 @@
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 static bool amdgpu_need_backup(struct amdgpu_device *adev)
 {
@@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
        struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
        struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+        if (bo->kfd_bo)
+                amdgpu_amdkfd_unreserve_system_memory_limit(bo);
        amdgpu_bo_kunmap(bo);
        drm_gem_object_release(&bo->gem_base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 1cef944ef98d..d4dbfe1f842e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -92,6 +92,8 @@ struct amdgpu_bo {
                struct list_head        mn_list;
                struct list_head        shadow_list;
        };
+        struct kgd_mem                  *kfd_bo;
 };
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1d0d250cbfdf..1a5911882657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -26,6 +26,7 @@
 #include <drm/amdgpu_drm.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
 /* max number of rings */
 #define AMDGPU_MAX_RINGS                18
@@ -35,8 +36,9 @@
 #define AMDGPU_MAX_UVD_ENC_RINGS        2
 /* some special values for the owner field */
-#define AMDGPU_FENCE_OWNER_UNDEFINED    ((void*)0ul)
+#define AMDGPU_FENCE_OWNER_UNDEFINED    ((void *)0ul)
-#define AMDGPU_FENCE_OWNER_VM           ((void*)1ul)
+#define AMDGPU_FENCE_OWNER_VM           ((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD          ((void *)2ul)
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index df65c66dc956..2d6f5ec77a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 struct amdgpu_sync_entry {
        struct hlist_node       node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
 */
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
-        struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+        struct drm_sched_fence *s_fence;
+        struct amdgpu_amdkfd_fence *kfd_fence;
+        if (!f)
+                return AMDGPU_FENCE_OWNER_UNDEFINED;
+        s_fence = to_drm_sched_fence(f);
        if (s_fence)
                return s_fence->owner;
+        kfd_fence = to_amdgpu_amdkfd_fence(f);
+        if (kfd_fence)
+                return AMDGPU_FENCE_OWNER_KFD;
        return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
        for (i = 0; i < flist->shared_count; ++i) {
                f = rcu_dereference_protected(flist->shared[i],
                                              reservation_object_held(resv));
+                /* We only want to trigger KFD eviction fences on
+                 * evict or move jobs. Skip KFD fences otherwise.
+                 */
+                fence_owner = amdgpu_sync_get_owner(f);
+                if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+                    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+                        continue;
                if (amdgpu_sync_same_dev(adev, f)) {
                        /* VM updates are only interesting
                         * for other VM updates and moves.
                         */
-                        fence_owner = amdgpu_sync_get_owner(f);
                        if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
                            (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
                            ((owner == AMDGPU_FENCE_OWNER_VM) !=
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
        return NULL;
 }
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+        struct amdgpu_sync_entry *e;
+        struct hlist_node *tmp;
+        struct dma_fence *f;
+        int i, r;
+        hash_for_each_safe(source->fences, i, tmp, e, node) {
+                f = e->fence;
+                if (!dma_fence_is_signaled(f)) {
+                        r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+                        if (r)
+                                return r;
+                } else {
+                        hash_del(&e->node);
+                        dma_fence_put(f);
+                        kmem_cache_free(amdgpu_sync_slab, e);
+                }
+        }
+        dma_fence_put(clone->last_vm_update);
+        clone->last_vm_update = dma_fence_get(source->last_vm_update);
+        return 0;
+}
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 {
        struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7aba38d5c9df..10cf23a57f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
                                     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 28c33d711bab..c2fae04d769a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -46,6 +46,7 @@
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 #include "bif/bif_4_1_d.h"
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+        /*
+         * Don't verify access for KFD BOs. They don't have a GEM
+         * object associated with them.
+         */
+        if (abo->kfd_bo)
+                return 0;
        if (amdgpu_ttm_tt_get_usermm(bo->ttm))
                return -EPERM;
        return drm_vma_node_verify_access(&abo->gem_base.vma_node,
@@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 {
        unsigned long num_pages = bo->mem.num_pages;
        struct drm_mm_node *node = bo->mem.mm_node;
+        struct reservation_object_list *flist;
+        struct dma_fence *f;
+        int i;
+        /* If bo is a KFD BO, check if the bo belongs to the current process.
+         * If true, then return false as any KFD process needs all its BOs to
+         * be resident to run successfully
+         */
+        flist = reservation_object_get_list(bo->resv);
+        if (flist) {
+                for (i = 0; i < flist->shared_count; ++i) {
+                        f = rcu_dereference_protected(flist->shared[i],
+                                reservation_object_held(bo->resv));
+                        if (amdkfd_fence_check_mm(f, current->mm))
+                                return false;
+                }
+        }
        switch (bo->mem.mem_type) {
        case TTM_PL_TT:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index fabf44b262be..e9841518343e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -28,6 +28,7 @@
 #include <linux/kfifo.h>
 #include <linux/rbtree.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index bc5a2945bd2b..ed2f06c9f346 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,7 @@
 config HSA_AMD
        tristate "HSA kernel driver for AMD GPU devices"
-        depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
+        depends on DRM_AMDGPU && X86_64
+        imply AMD_IOMMU_V2
        help
          Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index a317e76ffb5e..0d0242240c47 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -37,6 +37,10 @@ amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
                kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
                kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+ifneq ($(CONFIG_AMD_IOMMU_V2),)
+amdkfd-y += kfd_iommu.o
+endif
 amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
 obj-$(CONFIG_HSA_AMD)   += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 62c3d9cd6ef1..6fe24964540b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
        mutex_unlock(&p->mutex);
-        if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
+            pdd->qpd.vmid != 0)
                dev->kfd2kgd->set_scratch_backing_va(
                        dev->kgd, args->va_addr, pdd->qpd.vmid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2bc2816767a7..7493f47e7fe1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -22,10 +22,10 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
-#include <linux/amd-iommu.h>
 #include "kfd_crat.h"
 #include "kfd_priv.h"
 #include "kfd_topology.h"
+#include "kfd_iommu.h"
 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
 * GPU processor ID are expressed with Bit[31]=1.
@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
        struct crat_subtype_generic *sub_type_hdr;
        struct crat_subtype_computeunit *cu;
        struct kfd_cu_info cu_info;
-        struct amd_iommu_device_info iommu_info;
        int avail_size = *size;
        uint32_t total_num_of_cu;
        int num_of_cache_entries = 0;
        int cache_mem_filled = 0;
        int ret = 0;
-        const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
-                                         AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
-                                         AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
        struct kfd_local_mem_info local_mem_info;
        if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
        /* Check if this node supports IOMMU. During parsing this flag will
         * translate to HSA_CAP_ATS_PRESENT
         */
-        iommu_info.flags = 0;
+        if (!kfd_iommu_check_device(kdev))
-        if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
+                cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
-                if ((iommu_info.flags & required_iommu_flags) ==
-                                required_iommu_flags)
-                        cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
-        }
        crat_table->length += sub_type_hdr->length;
        crat_table->total_entries++;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 3da25f7bda6b..9d4af961c5d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -33,6 +33,7 @@
 #include "kfd_pm4_headers_diq.h"
 #include "kfd_dbgmgr.h"
 #include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
 static DEFINE_MUTEX(kfd_dbgmgr_mutex);
@@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
        }
        /* get actual type of DBGDevice cpsch or not */
-        if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+        if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
                type = DBGDEV_TYPE_NODIQ;
        kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a8fa33a08de3..3346699960dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,7 +20,9 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */
+#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
 #include <linux/amd-iommu.h>
+#endif
 #include <linux/bsearch.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
@@ -28,9 +30,12 @@
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_vi.h"
 #include "cwsr_trap_handler_gfx8.asm"
+#include "kfd_iommu.h"
 #define MQD_SIZE_ALIGNED 768
+static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
+#ifdef KFD_SUPPORT_IOMMU_V2
 static const struct kfd_device_info kaveri_device_info = {
        .asic_family = CHIP_KAVERI,
        .max_pasid_bits = 16,
@@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = {
        .num_of_watch_points = 4,
        .mqd_size_aligned = MQD_SIZE_ALIGNED,
        .supports_cwsr = false,
+        .needs_iommu_device = true,
+        .needs_pci_atomics = false,
 };
 static const struct kfd_device_info carrizo_device_info = {
@@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = {
        .num_of_watch_points = 4,
        .mqd_size_aligned = MQD_SIZE_ALIGNED,
        .supports_cwsr = true,
+        .needs_iommu_device = true,
+        .needs_pci_atomics = false,
 };
+#endif
+static const struct kfd_device_info hawaii_device_info = {
+        .asic_family = CHIP_HAWAII,
+        .max_pasid_bits = 16,
+        /* max num of queues for KV.TODO should be a dynamic value */
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = false,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = false,
+};
+static const struct kfd_device_info tonga_device_info = {
+        .asic_family = CHIP_TONGA,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = false,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = true,
+};
+static const struct kfd_device_info tonga_vf_device_info = {
+        .asic_family = CHIP_TONGA,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = false,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = false,
+};
+static const struct kfd_device_info fiji_device_info = {
+        .asic_family = CHIP_FIJI,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = true,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = true,
+};
+static const struct kfd_device_info fiji_vf_device_info = {
+        .asic_family = CHIP_FIJI,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = true,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = false,
+};
+static const struct kfd_device_info polaris10_device_info = {
+        .asic_family = CHIP_POLARIS10,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = true,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = true,
+};
+static const struct kfd_device_info polaris10_vf_device_info = {
+        .asic_family = CHIP_POLARIS10,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = true,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = false,
+};
+static const struct kfd_device_info polaris11_device_info = {
+        .asic_family = CHIP_POLARIS11,
+        .max_pasid_bits = 16,
+        .max_no_of_hqd  = 24,
+        .ih_ring_entry_size = 4 * sizeof(uint32_t),
+        .event_interrupt_class = &event_interrupt_class_cik,
+        .num_of_watch_points = 4,
+        .mqd_size_aligned = MQD_SIZE_ALIGNED,
+        .supports_cwsr = true,
+        .needs_iommu_device = false,
+        .needs_pci_atomics = true,
+};
 struct kfd_deviceid {
        unsigned short did;
        const struct kfd_device_info *device_info;
 };
-/* Please keep this sorted by increasing device id. */
 static const struct kfd_deviceid supported_devices[] = {
+#ifdef KFD_SUPPORT_IOMMU_V2
        { 0x1304, &kaveri_device_info },        /* Kaveri */
        { 0x1305, &kaveri_device_info },        /* Kaveri */
        { 0x1306, &kaveri_device_info },        /* Kaveri */
@@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = {
        { 0x9874, &carrizo_device_info },       /* Carrizo */
        { 0x9875, &carrizo_device_info },       /* Carrizo */
        { 0x9876, &carrizo_device_info },       /* Carrizo */
-        { 0x9877, &carrizo_device_info }        /* Carrizo */
+        { 0x9877, &carrizo_device_info },       /* Carrizo */
+#endif
+        { 0x67A0, &hawaii_device_info },        /* Hawaii */
+        { 0x67A1, &hawaii_device_info },        /* Hawaii */
+        { 0x67A2, &hawaii_device_info },        /* Hawaii */
+        { 0x67A8, &hawaii_device_info },        /* Hawaii */
+        { 0x67A9, &hawaii_device_info },        /* Hawaii */
+        { 0x67AA, &hawaii_device_info },        /* Hawaii */
+        { 0x67B0, &hawaii_device_info },        /* Hawaii */
+        { 0x67B1, &hawaii_device_info },        /* Hawaii */
+        { 0x67B8, &hawaii_device_info },        /* Hawaii */
+        { 0x67B9, &hawaii_device_info },        /* Hawaii */
+        { 0x67BA, &hawaii_device_info },        /* Hawaii */
+        { 0x67BE, &hawaii_device_info },        /* Hawaii */
+        { 0x6920, &tonga_device_info },         /* Tonga */
+        { 0x6921, &tonga_device_info },         /* Tonga */
+        { 0x6928, &tonga_device_info },         /* Tonga */
+        { 0x6929, &tonga_device_info },         /* Tonga */
+        { 0x692B, &tonga_device_info },         /* Tonga */
+        { 0x692F, &tonga_vf_device_info },      /* Tonga vf */
+        { 0x6938, &tonga_device_info },         /* Tonga */
+        { 0x6939, &tonga_device_info },         /* Tonga */
+        { 0x7300, &fiji_device_info },          /* Fiji */
+        { 0x730F, &fiji_vf_device_info },       /* Fiji vf*/
+        { 0x67C0, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C1, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C2, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C4, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C7, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C8, &polaris10_device_info },     /* Polaris10 */
+        { 0x67C9, &polaris10_device_info },     /* Polaris10 */
+        { 0x67CA, &polaris10_device_info },     /* Polaris10 */
+        { 0x67CC, &polaris10_device_info },     /* Polaris10 */
+        { 0x67CF, &polaris10_device_info },     /* Polaris10 */
+        { 0x67D0, &polaris10_vf_device_info },  /* Polaris10 vf*/
+        { 0x67DF, &polaris10_device_info },     /* Polaris10 */
+        { 0x67E0, &polaris11_device_info },     /* Polaris11 */
+        { 0x67E1, &polaris11_device_info },     /* Polaris11 */
+        { 0x67E3, &polaris11_device_info },     /* Polaris11 */
+        { 0x67E7, &polaris11_device_info },     /* Polaris11 */
+        { 0x67E8, &polaris11_device_info },     /* Polaris11 */
+        { 0x67E9, &polaris11_device_info },     /* Polaris11 */
+        { 0x67EB, &polaris11_device_info },     /* Polaris11 */
+        { 0x67EF, &polaris11_device_info },     /* Polaris11 */
+        { 0x67FF, &polaris11_device_info },     /* Polaris11 */
 };
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
                return NULL;
        }
+        if (device_info->needs_pci_atomics) {
+                /* Allow BIF to recode atomics to PCIe 3.0
+                 * AtomicOps. 32 and 64-bit requests are possible and
+                 * must be supported.
+                 */
+                if (pci_enable_atomic_ops_to_root(pdev,
+                                PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                                PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
+                        dev_info(kfd_device,
+                                "skipped device %x:%x, PCI rejects atomics",
+                                 pdev->vendor, pdev->device);
+                        return NULL;
+                }
+        }
        kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
        if (!kfd)
                return NULL;
@@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
        return kfd;
 }
-static bool device_iommu_pasid_init(struct kfd_dev *kfd)
-{
-        const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
-                                        AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
-                                        AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
-        struct amd_iommu_device_info iommu_info;
-        unsigned int pasid_limit;
-        int err;
-        err = amd_iommu_device_info(kfd->pdev, &iommu_info);
-        if (err < 0) {
-                dev_err(kfd_device,
-                        "error getting iommu info. is the iommu enabled?\n");
-                return false;
-        }
-        if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
-                dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
-                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
-                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
-                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
-                                                                        != 0);
-                return false;
-        }
-        pasid_limit = min_t(unsigned int,
-                        (unsigned int)(1 << kfd->device_info->max_pasid_bits),
-                        iommu_info.max_pasids);
-        if (!kfd_set_pasid_limit(pasid_limit)) {
-                dev_err(kfd_device, "error setting pasid limit\n");
-                return false;
-        }
-        return true;
-}
-static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
-{
-        struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
-        if (dev)
-                kfd_process_iommu_unbind_callback(dev, pasid);
-}
-/*
- * This function called by IOMMU driver on PPR failure
- */
-static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
-                unsigned long address, u16 flags)
-{
-        struct kfd_dev *dev;
-        dev_warn(kfd_device,
-                        "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
-                        PCI_BUS_NUM(pdev->devfn),
-                        PCI_SLOT(pdev->devfn),
-                        PCI_FUNC(pdev->devfn),
-                        pasid,
-                        address,
-                        flags);
-        dev = kfd_device_by_pci_dev(pdev);
-        if (!WARN_ON(!dev))
-                kfd_signal_iommu_event(dev, pasid, address,
-                        flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
-        return AMD_IOMMU_INV_PRI_RSP_INVALID;
-}
 static void kfd_cwsr_init(struct kfd_dev *kfd)
 {
        if (cwsr_enable && kfd->device_info->supports_cwsr) {
@@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                goto device_queue_manager_error;
        }
-        if (!device_iommu_pasid_init(kfd)) {
+        if (kfd_iommu_device_init(kfd)) {
-                dev_err(kfd_device,
+                dev_err(kfd_device, "Error initializing iommuv2\n");
-                        "Error initializing iommuv2 for device %x:%x\n",
+                goto device_iommu_error;
-                        kfd->pdev->vendor, kfd->pdev->device);
-                goto device_iommu_pasid_error;
        }
        kfd_cwsr_init(kfd);
@@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                 kfd->pdev->device);
        pr_debug("Starting kfd with the following scheduling policy %d\n",
-                sched_policy);
+                kfd->dqm->sched_policy);
        goto out;
 kfd_resume_error:
-device_iommu_pasid_error:
+device_iommu_error:
        device_queue_manager_uninit(kfd->dqm);
 device_queue_manager_error:
        kfd_interrupt_exit(kfd);
@@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
        if (!kfd->init_complete)
                return;
-        kfd->dqm->ops.stop(kfd->dqm);
+        /* For first KFD device suspend all the KFD processes */
+        if (atomic_inc_return(&kfd_device_suspended) == 1)
+                kfd_suspend_all_processes();
-        kfd_unbind_processes_from_device(kfd);
+        kfd->dqm->ops.stop(kfd->dqm);
-        amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
+        kfd_iommu_suspend(kfd);
-        amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
-        amd_iommu_free_device(kfd->pdev);
 }
 int kgd2kfd_resume(struct kfd_dev *kfd)
 {
+        int ret, count;
        if (!kfd->init_complete)
                return 0;
-        return kfd_resume(kfd);
+        ret = kfd_resume(kfd);
+        if (ret)
+                return ret;
+        count = atomic_dec_return(&kfd_device_suspended);
+        WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
+        if (count == 0)
+                ret = kfd_resume_all_processes();
+        return ret;
 }
 static int kfd_resume(struct kfd_dev *kfd)
 {
        int err = 0;
-        unsigned int pasid_limit = kfd_get_pasid_limit();
-        err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+        err = kfd_iommu_resume(kfd);
-        if (err)
+        if (err) {
-                return -ENXIO;
+                dev_err(kfd_device,
-        amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
+                        "Failed to resume IOMMU for device %x:%x\n",
-                                        iommu_pasid_shutdown_callback);
+                        kfd->pdev->vendor, kfd->pdev->device);
-        amd_iommu_set_invalid_ppr_cb(kfd->pdev,
+                return err;
-                                     iommu_invalid_ppr_cb);
+        }
-        err = kfd_bind_processes_to_device(kfd);
-        if (err)
-                goto processes_bind_error;
        err = kfd->dqm->ops.start(kfd->dqm);
        if (err) {
@@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd)
        return err;
 dqm_start_error:
-processes_bind_error:
+        kfd_iommu_suspend(kfd);
-        amd_iommu_free_device(kfd->pdev);
        return err;
 }
@@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
        spin_unlock(&kfd->interrupt_lock);
 }
+/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
+ *   prepare for safe eviction of KFD BOs that belong to the specified
+ *   process.
+ *
+ * @mm: mm_struct that identifies the specified KFD process
+ * @fence: eviction fence attached to KFD process BOs
+ *
+ */
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+                                               struct dma_fence *fence)
+{
+        struct kfd_process *p;
+        unsigned long active_time;
+        unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
+        if (!fence)
+                return -EINVAL;
+        if (dma_fence_is_signaled(fence))
+                return 0;
+        p = kfd_lookup_process_by_mm(mm);
+        if (!p)
+                return -ENODEV;
+        if (fence->seqno == p->last_eviction_seqno)
+                goto out;
+        p->last_eviction_seqno = fence->seqno;
+        /* Avoid KFD process starvation. Wait for at least
+         * PROCESS_ACTIVE_TIME_MS before evicting the process again
+         */
+        active_time = get_jiffies_64() - p->last_restore_timestamp;
+        if (delay_jiffies > active_time)
+                delay_jiffies -= active_time;
+        else
+                delay_jiffies = 0;
+        /* During process initialization eviction_work.dwork is initialized
+         * to kfd_evict_bo_worker
+         */
+        schedule_delayed_work(&p->eviction_work, delay_jiffies);
+out:
+        kfd_unref_process(p);
+        return 0;
+}
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
                                unsigned int chunk_size)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b21285afa4ea..b3b6dab71638 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -21,10 +21,11 @@
 *
 */
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/types.h>
-#include <linux/printk.h>
 #include <linux/bitops.h>
 #include <linux/sched.h>
 #include "kfd_priv.h"
@@ -118,9 +119,8 @@ static int allocate_vmid(struct device_queue_manager *dqm,
        if (dqm->vmid_bitmap == 0)
                return -ENOMEM;
-        bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap,
+        bit = ffs(dqm->vmid_bitmap) - 1;
-                                dqm->dev->vm_info.vmid_num_kfd);
+        dqm->vmid_bitmap &= ~(1 << bit);
-        clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
        allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
        pr_debug("vmid allocation %d\n", allocated_vmid);
@@ -130,6 +130,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
        set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
        program_sh_mem_settings(dqm, qpd);
+        /* qpd->page_table_base is set earlier when register_process()
+         * is called, i.e. when the first queue is created.
+         */
+        dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
+                        qpd->vmid,
+                        qpd->page_table_base);
+        /* invalidate the VM context after pasid and vmid mapping is set up */
+        kfd_flush_tlb(qpd_to_pdd(qpd));
        return 0;
 }
@@ -139,10 +148,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 {
        int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+        kfd_flush_tlb(qpd_to_pdd(qpd));
        /* Release the vmid mapping */
        set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
-        set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
+        dqm->vmid_bitmap |= (1 << bit);
        qpd->vmid = 0;
        q->properties.vmid = 0;
 }
@@ -170,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
                        goto out_unlock;
        }
        q->properties.vmid = qpd->vmid;
+        /*
+         * Eviction state logic: we only mark active queues as evicted
+         * to avoid the overhead of restoring inactive queues later
+         */
+        if (qpd->evicted)
+                q->properties.is_evicted = (q->properties.queue_size > 0 &&
+                                            q->properties.queue_percent > 0 &&
+                                            q->properties.queue_address != 0);
        q->properties.tba_addr = qpd->tba_addr;
        q->properties.tma_addr = qpd->tma_addr;
@@ -223,12 +242,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
                        continue;
                if (dqm->allocated_queues[pipe] != 0) {
-                        bit = find_first_bit(
+                        bit = ffs(dqm->allocated_queues[pipe]) - 1;
-                                (unsigned long *)&dqm->allocated_queues[pipe],
+                        dqm->allocated_queues[pipe] &= ~(1 << bit);
-                                get_queues_per_pipe(dqm));
-                        clear_bit(bit,
-                                (unsigned long *)&dqm->allocated_queues[pipe]);
                        q->pipe = pipe;
                        q->queue = bit;
                        set = true;
@@ -249,7 +264,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 static inline void deallocate_hqd(struct device_queue_manager *dqm,
                                struct queue *q)
 {
-        set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
+        dqm->allocated_queues[q->pipe] |= (1 << q->queue);
 }
 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
@@ -371,21 +386,35 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 {
        int retval;
        struct mqd_manager *mqd;
+        struct kfd_process_device *pdd;
        bool prev_active = false;
        mutex_lock(&dqm->lock);
+        pdd = kfd_get_process_device_data(q->device, q->process);
+        if (!pdd) {
+                retval = -ENODEV;
+                goto out_unlock;
+        }
        mqd = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
        if (!mqd) {
                retval = -ENOMEM;
                goto out_unlock;
        }
+        /*
+         * Eviction state logic: we only mark active queues as evicted
+         * to avoid the overhead of restoring inactive queues later
+         */
+        if (pdd->qpd.evicted)
+                q->properties.is_evicted = (q->properties.queue_size > 0 &&
+                                            q->properties.queue_percent > 0 &&
+                                            q->properties.queue_address != 0);
        /* Save previous activity state for counters */
        prev_active = q->properties.is_active;
        /* Make sure the queue is unmapped before updating the MQD */
-        if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
                retval = unmap_queues_cpsch(dqm,
                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
                if (retval) {
@@ -417,7 +446,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
        else if (!q->properties.is_active && prev_active)
                dqm->queue_count--;
-        if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
+        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
                retval = map_queues_cpsch(dqm);
        else if (q->properties.is_active &&
                 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
@@ -451,10 +480,193 @@ static struct mqd_manager *get_mqd_manager(
        return mqd;
 }
+static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+                                        struct qcm_process_device *qpd)
+{
+        struct queue *q;
+        struct mqd_manager *mqd;
+        struct kfd_process_device *pdd;
+        int retval = 0;
+        mutex_lock(&dqm->lock);
+        if (qpd->evicted++ > 0) /* already evicted, do nothing */
+                goto out;
+        pdd = qpd_to_pdd(qpd);
+        pr_info_ratelimited("Evicting PASID %u queues\n",
+                            pdd->process->pasid);
+        /* unactivate all active queues on the qpd */
+        list_for_each_entry(q, &qpd->queues_list, list) {
+                if (!q->properties.is_active)
+                        continue;
+                mqd = dqm->ops.get_mqd_manager(dqm,
+                        get_mqd_type_from_queue_type(q->properties.type));
+                if (!mqd) { /* should not be here */
+                        pr_err("Cannot evict queue, mqd mgr is NULL\n");
+                        retval = -ENOMEM;
+                        goto out;
+                }
+                q->properties.is_evicted = true;
+                q->properties.is_active = false;
+                retval = mqd->destroy_mqd(mqd, q->mqd,
+                                KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
+                if (retval)
+                        goto out;
+                dqm->queue_count--;
+        }
+out:
+        mutex_unlock(&dqm->lock);
+        return retval;
+}
+static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
+                                      struct qcm_process_device *qpd)
+{
+        struct queue *q;
+        struct kfd_process_device *pdd;
+        int retval = 0;
+        mutex_lock(&dqm->lock);
+        if (qpd->evicted++ > 0) /* already evicted, do nothing */
+                goto out;
+        pdd = qpd_to_pdd(qpd);
+        pr_info_ratelimited("Evicting PASID %u queues\n",
+                            pdd->process->pasid);
+        /* unactivate all active queues on the qpd */
+        list_for_each_entry(q, &qpd->queues_list, list) {
+                if (!q->properties.is_active)
+                        continue;
+                q->properties.is_evicted = true;
+                q->properties.is_active = false;
+                dqm->queue_count--;
+        }
+        retval = execute_queues_cpsch(dqm,
+                                qpd->is_debug ?
+                                KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+out:
+        mutex_unlock(&dqm->lock);
+        return retval;
+}
+static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
+                                          struct qcm_process_device *qpd)
+{
+        struct queue *q;
+        struct mqd_manager *mqd;
+        struct kfd_process_device *pdd;
+        uint32_t pd_base;
+        int retval = 0;
+        pdd = qpd_to_pdd(qpd);
+        /* Retrieve PD base */
+        pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+        mutex_lock(&dqm->lock);
+        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+                goto out;
+        if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+                qpd->evicted--;
+                goto out;
+        }
+        pr_info_ratelimited("Restoring PASID %u queues\n",
+                            pdd->process->pasid);
+        /* Update PD Base in QPD */
+        qpd->page_table_base = pd_base;
+        pr_debug("Updated PD address to 0x%08x\n", pd_base);
+        if (!list_empty(&qpd->queues_list)) {
+                dqm->dev->kfd2kgd->set_vm_context_page_table_base(
+                                dqm->dev->kgd,
+                                qpd->vmid,
+                                qpd->page_table_base);
+                kfd_flush_tlb(pdd);
+        }
+        /* activate all active queues on the qpd */
+        list_for_each_entry(q, &qpd->queues_list, list) {
+                if (!q->properties.is_evicted)
+                        continue;
+                mqd = dqm->ops.get_mqd_manager(dqm,
+                        get_mqd_type_from_queue_type(q->properties.type));
+                if (!mqd) { /* should not be here */
+                        pr_err("Cannot restore queue, mqd mgr is NULL\n");
+                        retval = -ENOMEM;
+                        goto out;
+                }
+                q->properties.is_evicted = false;
+                q->properties.is_active = true;
+                retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
+                                       q->queue, &q->properties,
+                                       q->process->mm);
+                if (retval)
+                        goto out;
+                dqm->queue_count++;
+        }
+        qpd->evicted = 0;
+out:
+        mutex_unlock(&dqm->lock);
+        return retval;
+}
+static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
+                                        struct qcm_process_device *qpd)
+{
+        struct queue *q;
+        struct kfd_process_device *pdd;
+        uint32_t pd_base;
+        int retval = 0;
+        pdd = qpd_to_pdd(qpd);
+        /* Retrieve PD base */
+        pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+        mutex_lock(&dqm->lock);
+        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+                goto out;
+        if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+                qpd->evicted--;
+                goto out;
+        }
+        pr_info_ratelimited("Restoring PASID %u queues\n",
+                            pdd->process->pasid);
+        /* Update PD Base in QPD */
+        qpd->page_table_base = pd_base;
+        pr_debug("Updated PD address to 0x%08x\n", pd_base);
+        /* activate all active queues on the qpd */
+        list_for_each_entry(q, &qpd->queues_list, list) {
+                if (!q->properties.is_evicted)
+                        continue;
+                q->properties.is_evicted = false;
+                q->properties.is_active = true;
+                dqm->queue_count++;
+        }
+        retval = execute_queues_cpsch(dqm,
+                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+        if (!retval)
+                qpd->evicted = 0;
+out:
+        mutex_unlock(&dqm->lock);
+        return retval;
+}
 static int register_process(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
        struct device_process_node *n;
+        struct kfd_process_device *pdd;
+        uint32_t pd_base;
        int retval;
        n = kzalloc(sizeof(*n), GFP_KERNEL);
@@ -463,9 +675,16 @@ static int register_process(struct device_queue_manager *dqm,
        n->qpd = qpd;
+        pdd = qpd_to_pdd(qpd);
+        /* Retrieve PD base */
+        pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
        mutex_lock(&dqm->lock);
        list_add(&n->list, &dqm->queues);
+        /* Update PD Base in QPD */
+        qpd->page_table_base = pd_base;
        retval = dqm->asic_ops.update_qpd(dqm, qpd);
        dqm->processes_count++;
@@ -589,10 +808,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
        if (dqm->sdma_bitmap == 0)
                return -ENOMEM;
-        bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
+        bit = ffs(dqm->sdma_bitmap) - 1;
-                                CIK_SDMA_QUEUES);
+        dqm->sdma_bitmap &= ~(1 << bit);
-        clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
        *sdma_queue_id = bit;
        return 0;
@@ -603,7 +820,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
 {
        if (sdma_queue_id >= CIK_SDMA_QUEUES)
                return;
-        set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
+        dqm->sdma_bitmap |= (1 << sdma_queue_id);
 }
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -840,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                retval = -ENOMEM;
                goto out;
        }
+        /*
+         * Eviction state logic: we only mark active queues as evicted
+         * to avoid the overhead of restoring inactive queues later
+         */
+        if (qpd->evicted)
+                q->properties.is_evicted = (q->properties.queue_size > 0 &&
+                                            q->properties.queue_percent > 0 &&
+                                            q->properties.queue_address != 0);
        dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1097,7 +1322,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
                        alternate_aperture_base,
                        alternate_aperture_size);
-        if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
+        if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
                program_sh_mem_settings(dqm, qpd);
        pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
@@ -1242,8 +1467,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
        if (!dqm)
                return NULL;
+        switch (dev->device_info->asic_family) {
+        /* HWS is not available on Hawaii. */
+        case CHIP_HAWAII:
+        /* HWS depends on CWSR for timely dequeue. CWSR is not
+         * available on Tonga.
+         *
+         * FIXME: This argument also applies to Kaveri.
+         */
+        case CHIP_TONGA:
+                dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
+                break;
+        default:
+                dqm->sched_policy = sched_policy;
+                break;
+        }
        dqm->dev = dev;
-        switch (sched_policy) {
+        switch (dqm->sched_policy) {
        case KFD_SCHED_POLICY_HWS:
        case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
                /* initialize dqm for cp scheduling */
@@ -1262,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
                dqm->ops.set_trap_handler = set_trap_handler;
                dqm->ops.process_termination = process_termination_cpsch;
+                dqm->ops.evict_process_queues = evict_process_queues_cpsch;
+                dqm->ops.restore_process_queues = restore_process_queues_cpsch;
                break;
        case KFD_SCHED_POLICY_NO_HWS:
                /* initialize dqm for no cp scheduling */
@@ -1278,9 +1521,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
                dqm->ops.set_trap_handler = set_trap_handler;
                dqm->ops.process_termination = process_termination_nocpsch;
+                dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
+                dqm->ops.restore_process_queues =
+                        restore_process_queues_nocpsch;
                break;
        default:
-                pr_err("Invalid scheduling policy %d\n", sched_policy);
+                pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
                goto out_free;
        }
@@ -1292,6 +1538,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
        case CHIP_KAVERI:
                device_queue_manager_init_cik(&dqm->asic_ops);
                break;
+        case CHIP_HAWAII:
+                device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
+                break;
+        case CHIP_TONGA:
+        case CHIP_FIJI:
+        case CHIP_POLARIS10:
+        case CHIP_POLARIS11:
+                device_queue_manager_init_vi_tonga(&dqm->asic_ops);
+                break;
        default:
                WARN(1, "Unexpected ASIC family %u",
                     dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c61b693bfa8c..412beff3281d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -79,6 +79,10 @@ struct device_process_node {
 *
 * @process_termination: Clears all process queues belongs to that device.
 *
+ * @evict_process_queues: Evict all active queues of a process
+ *
+ * @restore_process_queues: Restore all evicted queues queues of a process
+ *
 */
 struct device_queue_manager_ops {
@@ -129,6 +133,11 @@ struct device_queue_manager_ops {
        int (*process_termination)(struct device_queue_manager *dqm,
                        struct qcm_process_device *qpd);
+        int (*evict_process_queues)(struct device_queue_manager *dqm,
+                                    struct qcm_process_device *qpd);
+        int (*restore_process_queues)(struct device_queue_manager *dqm,
+                                      struct qcm_process_device *qpd);
 };
 struct device_queue_manager_asic_ops {
@@ -180,12 +189,17 @@ struct device_queue_manager {
        unsigned int            *fence_addr;
        struct kfd_mem_obj      *fence_mem;
        bool                    active_runlist;
+        int                     sched_policy;
 };
 void device_queue_manager_init_cik(
                struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_cik_hawaii(
+                struct device_queue_manager_asic_ops *asic_ops);
 void device_queue_manager_init_vi(
                struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_vi_tonga(
+                struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
 unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 28e48c90c596..aed4c21417bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
                                   uint64_t alternate_aperture_size);
 static int update_qpd_cik(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
+                                        struct qcm_process_device *qpd);
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
                                struct qcm_process_device *qpd);
+static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
+                                struct queue *q,
+                                struct qcm_process_device *qpd);
 void device_queue_manager_init_cik(
                struct device_queue_manager_asic_ops *asic_ops)
@@ -45,6 +50,14 @@ void device_queue_manager_init_cik(
        asic_ops->init_sdma_vm = init_sdma_vm;
 }
+void device_queue_manager_init_cik_hawaii(
+                struct device_queue_manager_asic_ops *asic_ops)
+{
+        asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+        asic_ops->update_qpd = update_qpd_cik_hawaii;
+        asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
+}
 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
 {
        /* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm,
        return 0;
 }
+static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
+                struct qcm_process_device *qpd)
+{
+        struct kfd_process_device *pdd;
+        unsigned int temp;
+        pdd = qpd_to_pdd(qpd);
+        /* check if sh_mem_config register already configured */
+        if (qpd->sh_mem_config == 0) {
+                qpd->sh_mem_config =
+                        ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
+                        DEFAULT_MTYPE(MTYPE_NONCACHED) |
+                        APE1_MTYPE(MTYPE_NONCACHED);
+                qpd->sh_mem_ape1_limit = 0;
+                qpd->sh_mem_ape1_base = 0;
+        }
+        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+         * aperture addresses.
+         */
+        temp = get_sh_mem_bases_nybble_64(pdd);
+        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+        pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+                qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+        return 0;
+}
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
                                struct qcm_process_device *qpd)
 {
@@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
        q->properties.sdma_vm_addr = value;
 }
+static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
+                                struct queue *q,
+                                struct qcm_process_device *qpd)
+{
+        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+         * aperture addresses.
+         */
+        q->properties.sdma_vm_addr =
+                ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+                 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+                SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 2fbce57a2f21..fd60a116be37 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
                                   uint64_t alternate_aperture_size);
+static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd,
+                        enum cache_policy default_policy,
+                        enum cache_policy alternate_policy,
+                        void __user *alternate_aperture_base,
+                        uint64_t alternate_aperture_size);
 static int update_qpd_vi(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd);
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
                                struct qcm_process_device *qpd);
+static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
+                        struct queue *q,
+                        struct qcm_process_device *qpd);
 void device_queue_manager_init_vi(
                struct device_queue_manager_asic_ops *asic_ops)
@@ -46,6 +57,14 @@ void device_queue_manager_init_vi(
        asic_ops->init_sdma_vm = init_sdma_vm;
 }
+void device_queue_manager_init_vi_tonga(
+                struct device_queue_manager_asic_ops *asic_ops)
+{
+        asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
+        asic_ops->update_qpd = update_qpd_vi_tonga;
+        asic_ops->init_sdma_vm = init_sdma_vm_tonga;
+}
 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
 {
        /* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
        return true;
 }
+static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
+                struct qcm_process_device *qpd,
+                enum cache_policy default_policy,
+                enum cache_policy alternate_policy,
+                void __user *alternate_aperture_base,
+                uint64_t alternate_aperture_size)
+{
+        uint32_t default_mtype;
+        uint32_t ape1_mtype;
+        default_mtype = (default_policy == cache_policy_coherent) ?
+                        MTYPE_UC :
+                        MTYPE_NC;
+        ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+                        MTYPE_UC :
+                        MTYPE_NC;
+        qpd->sh_mem_config =
+                        SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                                   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+                        default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+                        ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
+        return true;
+}
 static int update_qpd_vi(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
@@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm,
        return 0;
 }
+static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd)
+{
+        struct kfd_process_device *pdd;
+        unsigned int temp;
+        pdd = qpd_to_pdd(qpd);
+        /* check if sh_mem_config register already configured */
+        if (qpd->sh_mem_config == 0) {
+                qpd->sh_mem_config =
+                                SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+                                MTYPE_UC <<
+                                        SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+                                MTYPE_UC <<
+                                        SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
+                qpd->sh_mem_ape1_limit = 0;
+                qpd->sh_mem_ape1_base = 0;
+        }
+        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+         * aperture addresses.
+         */
+        temp = get_sh_mem_bases_nybble_64(pdd);
+        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+        pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
+                temp, qpd->sh_mem_bases);
+        return 0;
+}
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
                                struct qcm_process_device *qpd)
 {
@@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
        q->properties.sdma_vm_addr = value;
 }
+static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
+                        struct queue *q,
+                        struct qcm_process_device *qpd)
+{
+        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+         * aperture addresses.
+         */
+        q->properties.sdma_vm_addr =
+                ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+                 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+                SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 93aae5c1e78b..6fb9c0d46d63 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -30,6 +30,7 @@
 #include <linux/memory.h>
 #include "kfd_priv.h"
 #include "kfd_events.h"
+#include "kfd_iommu.h"
 #include <linux/device.h>
 /*
@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
        }
 }
+#ifdef KFD_SUPPORT_IOMMU_V2
 void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
                unsigned long address, bool is_write_requested,
                bool is_execute_requested)
@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
        mutex_unlock(&p->event_mutex);
        kfd_unref_process(p);
 }
+#endif /* KFD_SUPPORT_IOMMU_V2 */
 void kfd_signal_hw_exception_event(unsigned int pasid)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
new file mode 100644
index 000000000000..c71817963eea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/printk.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/amd-iommu.h>
+#include "kfd_priv.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_topology.h"
+#include "kfd_iommu.h"
+static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
+                                        AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
+                                        AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+/** kfd_iommu_check_device - Check whether IOMMU is available for device
+ */
+int kfd_iommu_check_device(struct kfd_dev *kfd)
+{
+        struct amd_iommu_device_info iommu_info;
+        int err;
+        if (!kfd->device_info->needs_iommu_device)
+                return -ENODEV;
+        iommu_info.flags = 0;
+        err = amd_iommu_device_info(kfd->pdev, &iommu_info);
+        if (err)
+                return err;
+        if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
+                return -ENODEV;
+        return 0;
+}
+/** kfd_iommu_device_init - Initialize IOMMU for device
+ */
+int kfd_iommu_device_init(struct kfd_dev *kfd)
+{
+        struct amd_iommu_device_info iommu_info;
+        unsigned int pasid_limit;
+        int err;
+        if (!kfd->device_info->needs_iommu_device)
+                return 0;
+        iommu_info.flags = 0;
+        err = amd_iommu_device_info(kfd->pdev, &iommu_info);
+        if (err < 0) {
+                dev_err(kfd_device,
+                        "error getting iommu info. is the iommu enabled?\n");
+                return -ENODEV;
+        }
+        if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
+                dev_err(kfd_device,
+                        "error required iommu flags ats %i, pri %i, pasid %i\n",
+                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
+                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
+                       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
+                                                                        != 0);
+                return -ENODEV;
+        }
+        pasid_limit = min_t(unsigned int,
+                        (unsigned int)(1 << kfd->device_info->max_pasid_bits),
+                        iommu_info.max_pasids);
+        if (!kfd_set_pasid_limit(pasid_limit)) {
+                dev_err(kfd_device, "error setting pasid limit\n");
+                return -EBUSY;
+        }
+        return 0;
+}
+/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
+ *
+ * Binds the given process to the given device using its PASID. This
+ * enables IOMMUv2 address translation for the process on the device.
+ *
+ * This function assumes that the process mutex is held.
+ */
+int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
+{
+        struct kfd_dev *dev = pdd->dev;
+        struct kfd_process *p = pdd->process;
+        int err;
+        if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
+                return 0;
+        if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
+                pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
+                return -EINVAL;
+        }
+        err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
+        if (!err)
+                pdd->bound = PDD_BOUND;
+        return err;
+}
+/** kfd_iommu_unbind_process - Unbind process from all devices
+ *
+ * This removes all IOMMU device bindings of the process. To be used
+ * before process termination.
+ */
+void kfd_iommu_unbind_process(struct kfd_process *p)
+{
+        struct kfd_process_device *pdd;
+        list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+                if (pdd->bound == PDD_BOUND)
+                        amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
+}
+/* Callback for process shutdown invoked by the IOMMU driver */
+static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
+{
+        struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
+        struct kfd_process *p;
+        struct kfd_process_device *pdd;
+        if (!dev)
+                return;
+        /*
+         * Look for the process that matches the pasid. If there is no such
+         * process, we either released it in amdkfd's own notifier, or there
+         * is a bug. Unfortunately, there is no way to tell...
+         */
+        p = kfd_lookup_process_by_pasid(pasid);
+        if (!p)
+                return;
+        pr_debug("Unbinding process %d from IOMMU\n", pasid);
+        mutex_lock(kfd_get_dbgmgr_mutex());
+        if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+                if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+                        kfd_dbgmgr_destroy(dev->dbgmgr);
+                        dev->dbgmgr = NULL;
+                }
+        }
+        mutex_unlock(kfd_get_dbgmgr_mutex());
+        mutex_lock(&p->mutex);
+        pdd = kfd_get_process_device_data(dev, p);
+        if (pdd)
+                /* For GPU relying on IOMMU, we need to dequeue here
+                 * when PASID is still bound.
+                 */
+                kfd_process_dequeue_from_device(pdd);
+        mutex_unlock(&p->mutex);
+        kfd_unref_process(p);
+}
+/* This function called by IOMMU driver on PPR failure */
+static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
+                unsigned long address, u16 flags)
+{
+        struct kfd_dev *dev;
+        dev_warn(kfd_device,
+                        "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
+                        PCI_BUS_NUM(pdev->devfn),
+                        PCI_SLOT(pdev->devfn),
+                        PCI_FUNC(pdev->devfn),
+                        pasid,
+                        address,
+                        flags);
+        dev = kfd_device_by_pci_dev(pdev);
+        if (!WARN_ON(!dev))
+                kfd_signal_iommu_event(dev, pasid, address,
+                        flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
+        return AMD_IOMMU_INV_PRI_RSP_INVALID;
+}
+/*
+ * Bind processes do the device that have been temporarily unbound
+ * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
+ */
+static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
+{
+        struct kfd_process_device *pdd;
+        struct kfd_process *p;
+        unsigned int temp;
+        int err = 0;
+        int idx = srcu_read_lock(&kfd_processes_srcu);
+        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+                mutex_lock(&p->mutex);
+                pdd = kfd_get_process_device_data(kfd, p);
+                if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
+                        mutex_unlock(&p->mutex);
+                        continue;
+                }
+                err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
+                                p->lead_thread);
+                if (err < 0) {
+                        pr_err("Unexpected pasid %d binding failure\n",
+                                        p->pasid);
+                        mutex_unlock(&p->mutex);
+                        break;
+                }
+                pdd->bound = PDD_BOUND;
+                mutex_unlock(&p->mutex);
+        }
+        srcu_read_unlock(&kfd_processes_srcu, idx);
+        return err;
+}
+/*
+ * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
+ * processes will be restored to PDD_BOUND state in
+ * kfd_bind_processes_to_device.
+ */
+static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
+{
+        struct kfd_process_device *pdd;
+        struct kfd_process *p;
+        unsigned int temp;
+        int idx = srcu_read_lock(&kfd_processes_srcu);
+        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+                mutex_lock(&p->mutex);
+                pdd = kfd_get_process_device_data(kfd, p);
+                if (WARN_ON(!pdd)) {
+                        mutex_unlock(&p->mutex);
+                        continue;
+                }
+                if (pdd->bound == PDD_BOUND)
+                        pdd->bound = PDD_BOUND_SUSPENDED;
+                mutex_unlock(&p->mutex);
+        }
+        srcu_read_unlock(&kfd_processes_srcu, idx);
+}
+/** kfd_iommu_suspend - Prepare IOMMU for suspend
+ *
+ * This unbinds processes from the device and disables the IOMMU for
+ * the device.
+ */
+void kfd_iommu_suspend(struct kfd_dev *kfd)
+{
+        if (!kfd->device_info->needs_iommu_device)
+                return;
+        kfd_unbind_processes_from_device(kfd);
+        amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
+        amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
+        amd_iommu_free_device(kfd->pdev);
+}
+/** kfd_iommu_resume - Restore IOMMU after resume
+ *
+ * This reinitializes the IOMMU for the device and re-binds previously
+ * suspended processes to the device.
+ */
+int kfd_iommu_resume(struct kfd_dev *kfd)
+{
+        unsigned int pasid_limit;
+        int err;
+        if (!kfd->device_info->needs_iommu_device)
+                return 0;
+        pasid_limit = kfd_get_pasid_limit();
+        err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+        if (err)
+                return -ENXIO;
+        amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
+                                        iommu_pasid_shutdown_callback);
+        amd_iommu_set_invalid_ppr_cb(kfd->pdev,
+                                     iommu_invalid_ppr_cb);
+        err = kfd_bind_processes_to_device(kfd);
+        if (err) {
+                amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
+                amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
+                amd_iommu_free_device(kfd->pdev);
+                return err;
+        }
+        return 0;
+}
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
+ */
+int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
+{
+        struct kfd_perf_properties *props;
+        if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
+                return 0;
+        if (!amd_iommu_pc_supported())
+                return 0;
+        props = kfd_alloc_struct(props);
+        if (!props)
+                return -ENOMEM;
+        strcpy(props->block_name, "iommu");
+        props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
+                amd_iommu_pc_get_max_counters(0); /* assume one iommu */
+        list_add_tail(&props->list, &kdev->perf_props);
+        return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
new file mode 100644
index 000000000000..dd23d9fdf6a8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __KFD_IOMMU_H__
+#define __KFD_IOMMU_H__
+#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
+#define KFD_SUPPORT_IOMMU_V2
+int kfd_iommu_check_device(struct kfd_dev *kfd);
+int kfd_iommu_device_init(struct kfd_dev *kfd);
+int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
+void kfd_iommu_unbind_process(struct kfd_process *p);
+void kfd_iommu_suspend(struct kfd_dev *kfd);
+int kfd_iommu_resume(struct kfd_dev *kfd);
+int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
+#else
+static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
+{
+        return -ENODEV;
+}
+static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
+{
+        return 0;
+}
+static inline int kfd_iommu_bind_process_to_device(
+        struct kfd_process_device *pdd)
+{
+        return 0;
+}
+static inline void kfd_iommu_unbind_process(struct kfd_process *p)
+{
+        /* empty */
+}
+static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
+{
+        /* empty */
+}
+static inline int kfd_iommu_resume(struct kfd_dev *kfd)
+{
+        return 0;
+}
+static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
+{
+        return 0;
+}
+#endif /* defined(CONFIG_AMD_IOMMU_V2) */
+#endif /* __KFD_IOMMU_H__ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 5dc6567d4a13..69f496485331 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
        switch (dev->device_info->asic_family) {
        case CHIP_CARRIZO:
+        case CHIP_TONGA:
+        case CHIP_FIJI:
+        case CHIP_POLARIS10:
+        case CHIP_POLARIS11:
                kernel_queue_init_vi(&kq->ops_asic_specific);
                break;
        case CHIP_KAVERI:
+        case CHIP_HAWAII:
                kernel_queue_init_cik(&kq->ops_asic_specific);
                break;
        default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 3ac72bed4f31..65574c6a10b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
        .interrupt      = kgd2kfd_interrupt,
        .suspend        = kgd2kfd_suspend,
        .resume         = kgd2kfd_resume,
+        .schedule_evict_and_restore_process =
+                          kgd2kfd_schedule_evict_and_restore_process,
 };
 int sched_policy = KFD_SCHED_POLICY_HWS;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index dfd260ef81ff..ee7061e1c466 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
        switch (dev->device_info->asic_family) {
        case CHIP_KAVERI:
                return mqd_manager_init_cik(type, dev);
+        case CHIP_HAWAII:
+                return mqd_manager_init_cik_hawaii(type, dev);
        case CHIP_CARRIZO:
                return mqd_manager_init_vi(type, dev);
+        case CHIP_TONGA:
+        case CHIP_FIJI:
+        case CHIP_POLARIS10:
+        case CHIP_POLARIS11:
+                return mqd_manager_init_vi_tonga(type, dev);
        default:
                WARN(1, "Unexpected ASIC family %u",
                     dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index f8ef4a051e08..c00c325ed3c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
                                               mms);
 }
-static int update_mqd(struct mqd_manager *mm, void *mqd,
+static int __update_mqd(struct mqd_manager *mm, void *mqd,
-                        struct queue_properties *q)
+                        struct queue_properties *q, unsigned int atc_bit)
 {
        struct cik_mqd *m;
        m = get_mqd(mqd);
        m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
-                                DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+                                DEFAULT_MIN_AVAIL_SIZE;
+        m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+        if (atc_bit) {
+                m->cp_hqd_pq_control |= PQ_ATC_EN;
+                m->cp_hqd_ib_control |= IB_ATC_EN;
+        }
        /*
         * Calculating queue size which is log base 2 of actual queue size -1
@@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
        q->is_active = (q->queue_size > 0 &&
                        q->queue_address != 0 &&
-                        q->queue_percent > 0);
+                        q->queue_percent > 0 &&
+                        !q->is_evicted);
        return 0;
 }
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+                        struct queue_properties *q)
+{
+        return __update_mqd(mm, mqd, q, 1);
+}
+static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
+                        struct queue_properties *q)
+{
+        return __update_mqd(mm, mqd, q, 0);
+}
 static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
                                struct queue_properties *q)
 {
@@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
        q->is_active = (q->queue_size > 0 &&
                        q->queue_address != 0 &&
-                        q->queue_percent > 0);
+                        q->queue_percent > 0 &&
+                        !q->is_evicted);
        return 0;
 }
@@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
        q->is_active = (q->queue_size > 0 &&
                        q->queue_address != 0 &&
-                        q->queue_percent > 0);
+                        q->queue_percent > 0 &&
+                        !q->is_evicted);
        return 0;
 }
@@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
        return mqd;
 }
+struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
+                        struct kfd_dev *dev)
+{
+        struct mqd_manager *mqd;
+        mqd = mqd_manager_init_cik(type, dev);
+        if (!mqd)
+                return NULL;
+        if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+                mqd->update_mqd = update_mqd_hawaii;
+        return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 971aec0637dc..89e4242e43e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
        m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
        m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+        m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+        m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
        m->cp_hqd_pq_doorbell_control =
                q->doorbell_off <<
@@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
        q->is_active = (q->queue_size > 0 &&
                        q->queue_address != 0 &&
-                        q->queue_percent > 0);
+                        q->queue_percent > 0 &&
+                        !q->is_evicted);
        return 0;
 }
@@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
        return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
 }
+static int update_mqd_tonga(struct mqd_manager *mm, void *mqd,
+                        struct queue_properties *q)
+{
+        return __update_mqd(mm, mqd, q, MTYPE_UC, 0);
+}
 static int destroy_mqd(struct mqd_manager *mm, void *mqd,
                        enum kfd_preempt_type type,
                        unsigned int timeout, uint32_t pipe_id,
@@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
        q->is_active = (q->queue_size > 0 &&
                        q->queue_address != 0 &&
-                        q->queue_percent > 0);
+                        q->queue_percent > 0 &&
+                        !q->is_evicted);
        return 0;
 }
@@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
        return mqd;
 }
+struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
+                        struct kfd_dev *dev)
+{
+        struct mqd_manager *mqd;
+        mqd = mqd_manager_init_vi(type, dev);
+        if (!mqd)
+                return NULL;
+        if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+                mqd->update_mqd = update_mqd_tonga;
+        return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0bedcf9cc08c..cac7aa258162 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -158,6 +158,8 @@ struct kfd_device_info {
        uint8_t num_of_watch_points;
        uint16_t mqd_size_aligned;
        bool supports_cwsr;
+        bool needs_iommu_device;
+        bool needs_pci_atomics;
 };
 struct kfd_mem_obj {
@@ -333,7 +335,11 @@ enum kfd_queue_format {
 * @is_interop: Defines if this is a interop queue. Interop queue means that
 * the queue can access both graphics and compute resources.
 *
- * @is_active: Defines if the queue is active or not.
+ * @is_evicted: Defines if the queue is evicted. Only active queues
+ * are evicted, rendering them inactive.
+ *
+ * @is_active: Defines if the queue is active or not. @is_active and
+ * @is_evicted are protected by the DQM lock.
 *
 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
 * of the queue.
@@ -355,6 +361,7 @@ struct queue_properties {
        uint32_t __iomem *doorbell_ptr;
        uint32_t doorbell_off;
        bool is_interop;
+        bool is_evicted;
        bool is_active;
        /* Not relevant for user mode queues in cp scheduling */
        unsigned int vmid;
@@ -458,6 +465,7 @@ struct qcm_process_device {
        unsigned int queue_count;
        unsigned int vmid;
        bool is_debug;
+        unsigned int evicted; /* eviction counter, 0=active */
        /* This flag tells if we should reset all wavefronts on
         * process termination
@@ -484,6 +492,17 @@ struct qcm_process_device {
        uint64_t tma_addr;
 };
+/* KFD Memory Eviction */
+/* Approx. wait time before attempting to restore evicted BOs */
+#define PROCESS_RESTORE_TIME_MS 100
+/* Approx. back off time if restore fails due to lack of memory */
+#define PROCESS_BACK_OFF_TIME_MS 100
+/* Approx. time before evicting the process again */
+#define PROCESS_ACTIVE_TIME_MS 10
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+                                               struct dma_fence *fence);
 enum kfd_pdd_bound {
        PDD_UNBOUND = 0,
@@ -516,8 +535,8 @@ struct kfd_process_device {
        uint64_t scratch_base;
        uint64_t scratch_limit;
-        /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
+        /* VM context for GPUVM allocations */
-        enum kfd_pdd_bound bound;
+        void *vm;
        /* Flag used to tell the pdd has dequeued from the dqm.
         * This is used to prevent dev->dqm->ops.process_termination() from
@@ -525,6 +544,9 @@ struct kfd_process_device {
         * function.
         */
        bool already_dequeued;
+        /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
+        enum kfd_pdd_bound bound;
 };
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -587,8 +609,30 @@ struct kfd_process {
        size_t signal_mapped_size;
        size_t signal_event_count;
        bool signal_event_limit_reached;
+        /* Information used for memory eviction */
+        void *kgd_process_info;
+        /* Eviction fence that is attached to all the BOs of this process. The
+         * fence will be triggered during eviction and new one will be created
+         * during restore
+         */
+        struct dma_fence *ef;
+        /* Work items for evicting and restoring BOs */
+        struct delayed_work eviction_work;
+        struct delayed_work restore_work;
+        /* seqno of the last scheduled eviction */
+        unsigned int last_eviction_seqno;
+        /* Approx. the last timestamp (in jiffies) when the process was
+         * restored after an eviction
+         */
+        unsigned long last_restore_timestamp;
 };
+#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
+extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
+extern struct srcu_struct kfd_processes_srcu;
 /**
 * Ioctl function type.
 *
@@ -612,13 +656,13 @@ void kfd_process_destroy_wq(void);
 struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 void kfd_unref_process(struct kfd_process *p);
+void kfd_suspend_all_processes(void);
+int kfd_resume_all_processes(void);
 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
                                                struct kfd_process *p);
-int kfd_bind_processes_to_device(struct kfd_dev *dev);
-void kfd_unbind_processes_from_device(struct kfd_dev *dev);
-void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
                                                        struct kfd_process *p);
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
@@ -705,8 +749,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
                                        struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
                struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
+                struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
                struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
+                struct kfd_dev *dev);
 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
 void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -795,6 +843,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
                     uint64_t *event_page_offset, uint32_t *event_slot_index);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
+void kfd_flush_tlb(struct kfd_process_device *pdd);
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 /* Debugfs */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4ff5f0fe6db8..18b2b86ad503 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -34,17 +34,18 @@
 struct mm_struct;
 #include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
+#include "kfd_iommu.h"
 /*
 * List of struct kfd_process (field kfd_process).
 * Unique/indexed by mm_struct*
 */
-#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
+DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
-static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
 static DEFINE_MUTEX(kfd_processes_mutex);
-DEFINE_STATIC_SRCU(kfd_processes_srcu);
+DEFINE_SRCU(kfd_processes_srcu);
 static struct workqueue_struct *kfd_process_wq;
@@ -54,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
                                        struct file *filep);
 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+static void evict_process_worker(struct work_struct *work);
+static void restore_process_worker(struct work_struct *work);
 void kfd_process_create_wq(void)
 {
@@ -154,6 +158,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
                pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
                                pdd->dev->id, p->pasid);
+                if (pdd->vm)
+                        pdd->dev->kfd2kgd->destroy_process_vm(
+                                pdd->dev->kgd, pdd->vm);
                list_del(&pdd->per_device_list);
                if (pdd->qpd.cwsr_kaddr)
@@ -173,16 +181,11 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
        struct kfd_process *p = container_of(work, struct kfd_process,
                                             release_work);
-        struct kfd_process_device *pdd;
-        pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
+        kfd_iommu_unbind_process(p);
-        list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-                if (pdd->bound == PDD_BOUND)
-                        amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
-        }
        kfd_process_destroy_pdds(p);
+        dma_fence_put(p->ef);
        kfd_event_free_process(p);
@@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
        mutex_unlock(&kfd_processes_mutex);
        synchronize_srcu(&kfd_processes_srcu);
+        cancel_delayed_work_sync(&p->eviction_work);
+        cancel_delayed_work_sync(&p->restore_work);
        mutex_lock(&p->mutex);
        /* Iterate over all process device data structures and if the
@@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread,
        if (err != 0)
                goto err_init_apertures;
+        INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+        INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+        process->last_restore_timestamp = get_jiffies_64();
        err = kfd_process_init_cwsr(process, filep);
        if (err)
                goto err_init_cwsr;
@@ -402,12 +412,24 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
        pdd->qpd.dqm = dev->dqm;
        pdd->qpd.pqm = &p->pqm;
+        pdd->qpd.evicted = 0;
        pdd->process = p;
        pdd->bound = PDD_UNBOUND;
        pdd->already_dequeued = false;
        list_add(&pdd->per_device_list, &p->per_device_data);
+        /* Create the GPUVM context for this specific device */
+        if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
+                                            &p->kgd_process_info, &p->ef)) {
+                pr_err("Failed to create process VM object\n");
+                goto err_create_pdd;
+        }
        return pdd;
+err_create_pdd:
+        list_del(&pdd->per_device_list);
+        kfree(pdd);
+        return NULL;
 }
 /*
@@ -429,174 +451,256 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
                return ERR_PTR(-ENOMEM);
        }
-        if (pdd->bound == PDD_BOUND) {
+        err = kfd_iommu_bind_process_to_device(pdd);
-                return pdd;
+        if (err)
-        } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
-                pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
-                return ERR_PTR(-EINVAL);
-        }
-        err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
-        if (err < 0)
                return ERR_PTR(err);
-        pdd->bound = PDD_BOUND;
        return pdd;
 }
-/*
+struct kfd_process_device *kfd_get_first_process_device_data(
- * Bind processes do the device that have been temporarily unbound
+                                                struct kfd_process *p)
- * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
- */
-int kfd_bind_processes_to_device(struct kfd_dev *dev)
 {
-        struct kfd_process_device *pdd;
+        return list_first_entry(&p->per_device_data,
-        struct kfd_process *p;
+                                struct kfd_process_device,
+                                per_device_list);
+}
+struct kfd_process_device *kfd_get_next_process_device_data(
+                                                struct kfd_process *p,
+                                                struct kfd_process_device *pdd)
+{
+        if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+                return NULL;
+        return list_next_entry(pdd, per_device_list);
+}
+bool kfd_has_process_device_data(struct kfd_process *p)
+{
+        return !(list_empty(&p->per_device_data));
+}
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
+{
+        struct kfd_process *p, *ret_p = NULL;
        unsigned int temp;
-        int err = 0;
        int idx = srcu_read_lock(&kfd_processes_srcu);
        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-                mutex_lock(&p->mutex);
+                if (p->pasid == pasid) {
-                pdd = kfd_get_process_device_data(dev, p);
+                        kref_get(&p->ref);
+                        ret_p = p;
-                if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
-                        mutex_unlock(&p->mutex);
-                        continue;
-                }
-                err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
-                                p->lead_thread);
-                if (err < 0) {
-                        pr_err("Unexpected pasid %d binding failure\n",
-                                        p->pasid);
-                        mutex_unlock(&p->mutex);
                        break;
                }
-                pdd->bound = PDD_BOUND;
-                mutex_unlock(&p->mutex);
        }
        srcu_read_unlock(&kfd_processes_srcu, idx);
-        return err;
+        return ret_p;
 }
-/*
+/* This increments the process->ref counter. */
- * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
- * processes will be restored to PDD_BOUND state in
- * kfd_bind_processes_to_device.
- */
-void kfd_unbind_processes_from_device(struct kfd_dev *dev)
 {
-        struct kfd_process_device *pdd;
        struct kfd_process *p;
-        unsigned int temp;
        int idx = srcu_read_lock(&kfd_processes_srcu);
-        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+        p = find_process_by_mm(mm);
-                mutex_lock(&p->mutex);
+        if (p)
-                pdd = kfd_get_process_device_data(dev, p);
+                kref_get(&p->ref);
-                if (WARN_ON(!pdd)) {
-                        mutex_unlock(&p->mutex);
-                        continue;
-                }
-                if (pdd->bound == PDD_BOUND)
-                        pdd->bound = PDD_BOUND_SUSPENDED;
-                mutex_unlock(&p->mutex);
-        }
        srcu_read_unlock(&kfd_processes_srcu, idx);
+        return p;
 }
-void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
+/* process_evict_queues - Evict all user queues of a process
+ *
+ * Eviction is reference-counted per process-device. This means multiple
+ * evictions from different sources can be nested safely.
+ */
+static int process_evict_queues(struct kfd_process *p)
 {
-        struct kfd_process *p;
        struct kfd_process_device *pdd;
+        int r = 0;
+        unsigned int n_evicted = 0;
-        /*
+        list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-         * Look for the process that matches the pasid. If there is no such
+                r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
-         * process, we either released it in amdkfd's own notifier, or there
+                                                            &pdd->qpd);
-         * is a bug. Unfortunately, there is no way to tell...
+                if (r) {
-         */
+                        pr_err("Failed to evict process queues\n");
-        p = kfd_lookup_process_by_pasid(pasid);
+                        goto fail;
-        if (!p)
+                }
-                return;
+                n_evicted++;
+        }
-        pr_debug("Unbinding process %d from IOMMU\n", pasid);
+        return r;
-        mutex_lock(kfd_get_dbgmgr_mutex());
+fail:
+        /* To keep state consistent, roll back partial eviction by
+         * restoring queues
+         */
+        list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+                if (n_evicted == 0)
+                        break;
+                if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+                                                              &pdd->qpd))
+                        pr_err("Failed to restore queues\n");
-        if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+                n_evicted--;
-                if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
-                        kfd_dbgmgr_destroy(dev->dbgmgr);
-                        dev->dbgmgr = NULL;
-                }
        }
-        mutex_unlock(kfd_get_dbgmgr_mutex());
+        return r;
+}
-        mutex_lock(&p->mutex);
-        pdd = kfd_get_process_device_data(dev, p);
+/* process_restore_queues - Restore all user queues of a process */
-        if (pdd)
+static  int process_restore_queues(struct kfd_process *p)
-                /* For GPU relying on IOMMU, we need to dequeue here
+{
-                 * when PASID is still bound.
+        struct kfd_process_device *pdd;
-                 */
+        int r, ret = 0;
-                kfd_process_dequeue_from_device(pdd);
-        mutex_unlock(&p->mutex);
+        list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+                r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+                                                              &pdd->qpd);
+                if (r) {
+                        pr_err("Failed to restore process queues\n");
+                        if (!ret)
+                                ret = r;
+                }
+        }
-        kfd_unref_process(p);
+        return ret;
 }
-struct kfd_process_device *kfd_get_first_process_device_data(
+static void evict_process_worker(struct work_struct *work)
-                                                struct kfd_process *p)
 {
-        return list_first_entry(&p->per_device_data,
+        int ret;
-                                struct kfd_process_device,
+        struct kfd_process *p;
-                                per_device_list);
+        struct delayed_work *dwork;
+        dwork = to_delayed_work(work);
+        /* Process termination destroys this worker thread. So during the
+         * lifetime of this thread, kfd_process p will be valid
+         */
+        p = container_of(dwork, struct kfd_process, eviction_work);
+        WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
+                  "Eviction fence mismatch\n");
+        /* Narrow window of overlap between restore and evict work
+         * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
+         * unreserves KFD BOs, it is possible to evicted again. But
+         * restore has few more steps of finish. So lets wait for any
+         * previous restore work to complete
+         */
+        flush_delayed_work(&p->restore_work);
+        pr_debug("Started evicting pasid %d\n", p->pasid);
+        ret = process_evict_queues(p);
+        if (!ret) {
+                dma_fence_signal(p->ef);
+                dma_fence_put(p->ef);
+                p->ef = NULL;
+                schedule_delayed_work(&p->restore_work,
+                                msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+                pr_debug("Finished evicting pasid %d\n", p->pasid);
+        } else
+                pr_err("Failed to evict queues of pasid %d\n", p->pasid);
 }
-struct kfd_process_device *kfd_get_next_process_device_data(
+static void restore_process_worker(struct work_struct *work)
-                                                struct kfd_process *p,
-                                                struct kfd_process_device *pdd)
 {
-        if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+        struct delayed_work *dwork;
-                return NULL;
+        struct kfd_process *p;
-        return list_next_entry(pdd, per_device_list);
+        struct kfd_process_device *pdd;
+        int ret = 0;
+        dwork = to_delayed_work(work);
+        /* Process termination destroys this worker thread. So during the
+         * lifetime of this thread, kfd_process p will be valid
+         */
+        p = container_of(dwork, struct kfd_process, restore_work);
+        /* Call restore_process_bos on the first KGD device. This function
+         * takes care of restoring the whole process including other devices.
+         * Restore can fail if enough memory is not available. If so,
+         * reschedule again.
+         */
+        pdd = list_first_entry(&p->per_device_data,
+                               struct kfd_process_device,
+                               per_device_list);
+        pr_debug("Started restoring pasid %d\n", p->pasid);
+        /* Setting last_restore_timestamp before successful restoration.
+         * Otherwise this would have to be set by KGD (restore_process_bos)
+         * before KFD BOs are unreserved. If not, the process can be evicted
+         * again before the timestamp is set.
+         * If restore fails, the timestamp will be set again in the next
+         * attempt. This would mean that the minimum GPU quanta would be
+         * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
+         * functions)
+         */
+        p->last_restore_timestamp = get_jiffies_64();
+        ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
+                                                     &p->ef);
+        if (ret) {
+                pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
+                         p->pasid, PROCESS_BACK_OFF_TIME_MS);
+                ret = schedule_delayed_work(&p->restore_work,
+                                msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
+                WARN(!ret, "reschedule restore work failed\n");
+                return;
+        }
+        ret = process_restore_queues(p);
+        if (!ret)
+                pr_debug("Finished restoring pasid %d\n", p->pasid);
+        else
+                pr_err("Failed to restore queues of pasid %d\n", p->pasid);
 }
-bool kfd_has_process_device_data(struct kfd_process *p)
+void kfd_suspend_all_processes(void)
 {
-        return !(list_empty(&p->per_device_data));
+        struct kfd_process *p;
+        unsigned int temp;
+        int idx = srcu_read_lock(&kfd_processes_srcu);
+        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+                cancel_delayed_work_sync(&p->eviction_work);
+                cancel_delayed_work_sync(&p->restore_work);
+                if (process_evict_queues(p))
+                        pr_err("Failed to suspend process %d\n", p->pasid);
+                dma_fence_signal(p->ef);
+                dma_fence_put(p->ef);
+                p->ef = NULL;
+        }
+        srcu_read_unlock(&kfd_processes_srcu, idx);
 }
-/* This increments the process->ref counter. */
+int kfd_resume_all_processes(void)
-struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
 {
-        struct kfd_process *p, *ret_p = NULL;
+        struct kfd_process *p;
        unsigned int temp;
+        int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
-        int idx = srcu_read_lock(&kfd_processes_srcu);
        hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-                if (p->pasid == pasid) {
+                if (!schedule_delayed_work(&p->restore_work, 0)) {
-                        kref_get(&p->ref);
+                        pr_err("Restore process %d failed during resume\n",
-                        ret_p = p;
+                               p->pasid);
-                        break;
+                        ret = -EFAULT;
                }
        }
        srcu_read_unlock(&kfd_processes_srcu, idx);
+        return ret;
-        return ret_p;
 }
 int kfd_reserved_mem_mmap(struct kfd_process *process,
@@ -633,6 +737,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
                               KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
+void kfd_flush_tlb(struct kfd_process_device *pdd)
+{
+        struct kfd_dev *dev = pdd->dev;
+        const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
+        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+                /* Nothing to flush until a VMID is assigned, which
+                 * only happens when the first queue is created.
+                 */
+                if (pdd->qpd.vmid)
+                        f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+        } else {
+                f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+        }
+}
 #if defined(CONFIG_DEBUG_FS)
 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 876380632668..7817e327ea6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        case KFD_QUEUE_TYPE_COMPUTE:
                /* check if there is over subscription */
-                if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
+                if ((dev->dqm->sched_policy ==
+                     KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
                ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
                (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
                        pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c6a76090a725..250615535563 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -35,6 +35,7 @@
 #include "kfd_crat.h"
 #include "kfd_topology.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_iommu.h"
 /* topology_device_list - Master list of all topology devices */
 static struct list_head topology_device_list;
@@ -677,7 +678,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
        }
        /* All hardware blocks have the same number of attributes. */
-        num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
+        num_attrs = ARRAY_SIZE(perf_attr_iommu);
        list_for_each_entry(perf, &dev->perf_props, list) {
                perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
                        * num_attrs + sizeof(struct attribute_group),
@@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm,
 */
 static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
 {
-        struct kfd_perf_properties *props;
+        /* These are the only counters supported so far */
+        return kfd_iommu_add_perf_counters(kdev);
-        if (amd_iommu_pc_supported()) {
-                props = kfd_alloc_struct(props);
-                if (!props)
-                        return -ENOMEM;
-                strcpy(props->block_name, "iommu");
-                props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
-                        amd_iommu_pc_get_max_counters(0); /* assume one iommu */
-                list_add_tail(&props->list, &kdev->perf_props);
-        }
-        return 0;
 }
 /* kfd_add_non_crat_information - Add information that is not currently
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 53fca1f45401..c0be2be6dca5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -25,7 +25,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
-#include "kfd_priv.h"
+#include "kfd_crat.h"
 #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device(
                struct list_head *device_list);
 void kfd_release_topology_device_list(struct list_head *device_list);
-extern bool amd_iommu_pc_supported(void);
-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
 #endif /* __KFD_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a6752bd0c871..1e5c22ceb256 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -30,6 +30,7 @@
 #include <linux/types.h>
 #include <linux/bitmap.h>
+#include <linux/dma-fence.h>
 struct pci_dev;
@@ -107,6 +108,12 @@ struct kgd2kfd_shared_resources {
        /* Number of bytes at start of aperture reserved for KGD. */
        size_t doorbell_start_offset;
+        /* GPUVM address space size in bytes */
+        uint64_t gpuvm_size;
+        /* Minor device number of the render node */
+        int drm_render_minor;
 };
 struct tile_config {
@@ -120,6 +127,25 @@ struct tile_config {
        uint32_t num_ranks;
 };
+/*
+ * Allocation flag domains
+ */
+#define ALLOC_MEM_FLAGS_VRAM            (1 << 0)
+#define ALLOC_MEM_FLAGS_GTT             (1 << 1)
+#define ALLOC_MEM_FLAGS_USERPTR         (1 << 2) /* TODO */
+#define ALLOC_MEM_FLAGS_DOORBELL        (1 << 3) /* TODO */
+/*
+ * Allocation flags attributes/access options.
+ */
+#define ALLOC_MEM_FLAGS_WRITABLE        (1 << 31)
+#define ALLOC_MEM_FLAGS_EXECUTABLE      (1 << 30)
+#define ALLOC_MEM_FLAGS_PUBLIC          (1 << 29)
+#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE   (1 << 28) /* TODO */
+#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM   (1 << 27)
+#define ALLOC_MEM_FLAGS_COHERENT        (1 << 26) /* For GFXv9 or later */
 /**
 * struct kfd2kgd_calls
 *
@@ -179,6 +205,45 @@ struct tile_config {
 *
 * @get_vram_usage: Returns current VRAM usage
 *
+ * @create_process_vm: Create a VM address space for a given process and GPU
+ *
+ * @destroy_process_vm: Destroy a VM
+ *
+ * @get_process_page_dir: Get physical address of a VM page directory
+ *
+ * @set_vm_context_page_table_base: Program page table base for a VMID
+ *
+ * @alloc_memory_of_gpu: Allocate GPUVM memory
+ *
+ * @free_memory_of_gpu: Free GPUVM memory
+ *
+ * @map_memory_to_gpu: Map GPUVM memory into a specific VM address
+ * space. Allocates and updates page tables and page directories as
+ * needed. This function may return before all page table updates have
+ * completed. This allows multiple map operations (on multiple GPUs)
+ * to happen concurrently. Use sync_memory to synchronize with all
+ * pending updates.
+ *
+ * @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space
+ *
+ * @sync_memory: Wait for pending page table updates to complete
+ *
+ * @map_gtt_bo_to_kernel: Map a GTT BO for kernel access
+ * Pins the BO, maps it to kernel address space. Such BOs are never evicted.
+ * The kernel virtual address remains valid until the BO is freed.
+ *
+ * @restore_process_bos: Restore all BOs that belong to the
+ * process. This is intended for restoring memory mappings after a TTM
+ * eviction.
+ *
+ * @invalidate_tlbs: Invalidate TLBs for a specific PASID
+ *
+ * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID
+ *
+ * @submit_ib: Submits an IB to the engine specified by inserting the
+ * IB to the corresponding ring (ring type). The IB is executed with the
+ * specified VMID in a user mode context.
+ *
 * This structure contains function pointers to services that the kgd driver
 * provides to amdkfd driver.
 *
@@ -258,8 +323,6 @@ struct kfd2kgd_calls {
        uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
                                        struct kgd_dev *kgd,
                                        uint8_t vmid);
-        void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
-                                        uint8_t vmid);
        uint16_t (*get_fw_version)(struct kgd_dev *kgd,
                                enum kgd_engine_type type);
@@ -270,6 +333,33 @@ struct kfd2kgd_calls {
        void (*get_cu_info)(struct kgd_dev *kgd,
                        struct kfd_cu_info *cu_info);
        uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
+        int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
+                        void **process_info, struct dma_fence **ef);
+        void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
+        uint32_t (*get_process_page_dir)(void *vm);
+        void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
+                        uint32_t vmid, uint32_t page_table_base);
+        int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
+                        uint64_t size, void *vm,
+                        struct kgd_mem **mem, uint64_t *offset,
+                        uint32_t flags);
+        int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem);
+        int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
+                        void *vm);
+        int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
+                        void *vm);
+        int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+        int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem,
+                        void **kptr, uint64_t *size);
+        int (*restore_process_bos)(void *process_info, struct dma_fence **ef);
+        int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
+        int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
+        int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
+                        uint32_t vmid, uint64_t gpu_addr,
+                        uint32_t *ib_cmd, uint32_t ib_len);
 };
 /**
@@ -288,6 +378,9 @@ struct kfd2kgd_calls {
 *
 * @resume: Notifies amdkfd about a resume action done to a kgd device
 *
+ * @schedule_evict_and_restore_process: Schedules work queue that will prepare
+ * for safe eviction of KFD BOs that belong to the specified process.
+ *
 * This structure contains function callback pointers so the kgd driver
 * will notify to the amdkfd about certain status changes.
 *
@@ -302,6 +395,8 @@ struct kgd2kfd_calls {
        void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
        void (*suspend)(struct kfd_dev *kfd);
        int (*resume)(struct kfd_dev *kfd);
+        int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
+                        struct dma_fence *fence);
 };
 int kgd2kfd_init(unsigned interface_version,
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f4cab5b3ba9a..111d73ba2d96 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
 };
 struct kfd_ioctl_set_trap_handler_args {
-        uint64_t tba_addr;              /* to KFD */
+        __u64 tba_addr;         /* to KFD */
-        uint64_t tma_addr;              /* to KFD */
+        __u64 tma_addr;         /* to KFD */
-        uint32_t gpu_id;                /* to KFD */
+        __u32 gpu_id;           /* to KFD */
-        uint32_t pad;
+        __u32 pad;
 };
 #define AMDKFD_IOCTL_BASE 'K'
author	Dave Airlie <airlied@redhat.com>	2018-03-13 21:06:38 -0400
committer	Dave Airlie <airlied@redhat.com>	2018-03-13 21:06:38 -0400
commit	6fa7324ac5489ad43c4b6351355b869bc5458bef (patch)
tree	97de1061f074d0a76c83d8cb364c67094a33a0fa
parent	0b8eeac5c6ca6dcb19cce04bf8910006ac73dbd3 (diff)
parent	a11024457d348672b26b3d4581ed19c793399b48 (diff)