gpu: nvgpu: VM unmap refactoring

Re-organize the unmap code to be better split between OS specific requirements and common core requirements. The new code flow works as follows: nvgpu_vm_unmap() Is the primary entrance to the unmap path. It takes a VM and a GPU virtual address to unmap. There's also an optional batch mapping struct. This function is responsible for making sure there is a real buffer and that if it's being called on a fixed mapping then the mapping will definitely be freed (since buffers are ref-counted). Then this function decrements the ref-count and returns. If the ref-count hits zero then __nvgpu_vm_unmap_ref() is called which just calls __nvgpu_vm_unmap() with the relevant batch struct if present. This is where the real work is done. __nvgpu_vm_unmap() clears the GMMU mapping, removes the mapped buffer from the various lists and trees it may be in and then calls the nvgpu_vm_unmap_system() function. This function handles any OS specific stuff and must be defined by all VM OS implementations. There's a a short cut used by some other core VM code to free mappings without going through nvgpu_vm_map(). Mostly they just directly decrement the mapping ref-count which can then call __nvgpu_vm_unmap_ref() if the ref-count hits zero. JIRA NVGPU-30 JIRA NVGPU-71 Change-Id: Ic626d37ab936819841bab45214f027b40ffa4e5a Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583982 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-10-17 13:55:00 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-01 22:06:23 -0400
commit: d13c256d5ee11da1664377481543005142d9bd30 (patch)
tree: e00ef697d05a20428619c8920ddfcb645fc095d8 /drivers/gpu/nvgpu/common/mm/vm.c
parent: a37cec19f0cf5212cbd472cd8d94acaa1e1cff6d (diff)
1 files changed, 106 insertions, 12 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 88af6456..3d10ff48 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -36,6 +36,9 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+                             struct vm_gk20a_mapping_batch *batch);
 int vm_aspace_id(struct vm_gk20a *vm)
 {
        return vm->as_share ? vm->as_share->id : -1;
@@ -538,7 +541,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
        nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
        while (node) {
                mapped_buffer = mapped_buffer_from_rbtree_node(node);
-                nvgpu_vm_unmap_locked(mapped_buffer, NULL);
+                __nvgpu_vm_unmap(mapped_buffer, NULL);
                nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
        }
@@ -702,8 +705,7 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
        vm->kref_put_batch = &batch;
        for (i = 0; i < num_buffers; ++i)
-                nvgpu_ref_put(&mapped_buffers[i]->ref,
+                nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);
-                         nvgpu_vm_unmap_locked_ref);
        vm->kref_put_batch = NULL;
        nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
@@ -712,26 +714,118 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
        nvgpu_big_free(vm->mm->g, mapped_buffers);
 }
-void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref)
+/*
+ * Really unmap. This does the real GMMU unmap and removes the mapping from the
+ * VM map tracking tree (and vm_area list if necessary).
+ */
+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+                             struct vm_gk20a_mapping_batch *batch)
+{
+        struct vm_gk20a *vm = mapped_buffer->vm;
+        struct gk20a *g = vm->mm->g;
+        vm->num_user_mapped_buffers--;
+        g->ops.mm.gmmu_unmap(vm,
+                             mapped_buffer->addr,
+                             mapped_buffer->size,
+                             mapped_buffer->pgsz_idx,
+                             mapped_buffer->va_allocated,
+                             gk20a_mem_flag_none,
+                             mapped_buffer->vm_area ?
+                             mapped_buffer->vm_area->sparse : false,
+                             batch);
+        /*
+         * Remove from mapped buffer tree. Then delete the buffer from the
+         * linked list of mapped buffers; though note: not all mapped buffers
+         * are part of a vm_area.
+         */
+        nvgpu_remove_mapped_buf(vm, mapped_buffer);
+        nvgpu_list_del(&mapped_buffer->buffer_list);
+        /*
+         * OS specific freeing. This is after the generic freeing incase the
+         * generic freeing relies on some component of the OS specific
+         * nvgpu_mapped_buf in some abstraction or the like.
+         */
+        nvgpu_vm_unmap_system(mapped_buffer);
+        nvgpu_kfree(g, mapped_buffer);
+}
+void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
 {
        struct nvgpu_mapped_buf *mapped_buffer =
                container_of(ref, struct nvgpu_mapped_buf, ref);
-        nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
+        __nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
 }
-void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
+/*
+ * For fixed-offset buffers we must sync the buffer. That means we wait for the
+ * buffer to hit a ref-count of 1 before proceeding.
+ *
+ * Note: this requires the update_gmmu_lock to be held since we release it and
+ * re-aquire it in this function.
+ */
+static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
+                                      struct nvgpu_mapped_buf *mapped_buffer)
+{
+        struct nvgpu_timeout timeout;
+        int ret = 0;
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        /*
+         * 500ms second timer.
+         */
+        nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
+        do {
+                if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
+                        break;
+                nvgpu_msleep(10);
+        } while (!nvgpu_timeout_expired_msg(&timeout,
+                                            "sync-unmap failed on 0x%llx"));
+        if (nvgpu_timeout_expired(&timeout))
+                ret = -ETIMEDOUT;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        return ret;
+}
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
+                    struct vm_gk20a_mapping_batch *batch)
 {
-        struct gk20a *g = vm->mm->g;
        struct nvgpu_mapped_buf *mapped_buffer;
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
-        if (!mapped_buffer) {
+        if (!mapped_buffer)
-                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                goto done;
-                nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
-                return;
+        if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+                if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
+                        /*
+                         * Looks like we have failed... Better not continue in
+                         * case the buffer is in use.
+                         */
+                        goto done;
        }
-        nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
+        /*
+         * Make sure we have access to the batch if we end up calling through to
+         * the unmap_ref function.
+         */
+        vm->kref_put_batch = batch;
+        nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
+        vm->kref_put_batch = NULL;
+done:
        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return;
 }
author	Alex Waterman <alexw@nvidia.com>	2017-10-17 13:55:00 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-01 22:06:23 -0400
commit	d13c256d5ee11da1664377481543005142d9bd30 (patch)
tree	e00ef697d05a20428619c8920ddfcb645fc095d8 /drivers/gpu/nvgpu/common/mm/vm.c
parent	a37cec19f0cf5212cbd472cd8d94acaa1e1cff6d (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 88af6456..3d10ff48 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -36,6 +36,9 @@
36	#include "gk20a/gk20a.h"	36	#include "gk20a/gk20a.h"
37	#include "gk20a/mm_gk20a.h"	37	#include "gk20a/mm_gk20a.h"
38		38
		39	static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
		40	struct vm_gk20a_mapping_batch *batch);
		41
39	int vm_aspace_id(struct vm_gk20a *vm)	42	int vm_aspace_id(struct vm_gk20a *vm)
40	{	43	{
41	return vm->as_share ? vm->as_share->id : -1;	44	return vm->as_share ? vm->as_share->id : -1;
@@ -538,7 +541,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
538	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);	541	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
539	while (node) {	542	while (node) {
540	mapped_buffer = mapped_buffer_from_rbtree_node(node);	543	mapped_buffer = mapped_buffer_from_rbtree_node(node);
541	nvgpu_vm_unmap_locked(mapped_buffer, NULL);	544	__nvgpu_vm_unmap(mapped_buffer, NULL);
542	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);	545	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
543	}	546	}
544		547
@@ -702,8 +705,7 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
702	vm->kref_put_batch = &batch;	705	vm->kref_put_batch = &batch;
703		706
704	for (i = 0; i < num_buffers; ++i)	707	for (i = 0; i < num_buffers; ++i)
705	nvgpu_ref_put(&mapped_buffers[i]->ref,	708	nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);
706	nvgpu_vm_unmap_locked_ref);
707		709
708	vm->kref_put_batch = NULL;	710	vm->kref_put_batch = NULL;
709	nvgpu_vm_mapping_batch_finish_locked(vm, &batch);	711	nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
@@ -712,26 +714,118 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
712	nvgpu_big_free(vm->mm->g, mapped_buffers);	714	nvgpu_big_free(vm->mm->g, mapped_buffers);
713	}	715	}
714		716
715	void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref)	717	/*
		718	* Really unmap. This does the real GMMU unmap and removes the mapping from the
		719	* VM map tracking tree (and vm_area list if necessary).
		720	*/
		721	static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
		722	struct vm_gk20a_mapping_batch *batch)
		723	{
		724	struct vm_gk20a *vm = mapped_buffer->vm;
		725	struct gk20a *g = vm->mm->g;
		726
		727	vm->num_user_mapped_buffers--;
		728
		729	g->ops.mm.gmmu_unmap(vm,
		730	mapped_buffer->addr,
		731	mapped_buffer->size,
		732	mapped_buffer->pgsz_idx,
		733	mapped_buffer->va_allocated,
		734	gk20a_mem_flag_none,
		735	mapped_buffer->vm_area ?
		736	mapped_buffer->vm_area->sparse : false,
		737	batch);
		738
		739	/*
		740	* Remove from mapped buffer tree. Then delete the buffer from the
		741	* linked list of mapped buffers; though note: not all mapped buffers
		742	* are part of a vm_area.
		743	*/
		744	nvgpu_remove_mapped_buf(vm, mapped_buffer);
		745	nvgpu_list_del(&mapped_buffer->buffer_list);
		746
		747	/*
		748	* OS specific freeing. This is after the generic freeing incase the
		749	* generic freeing relies on some component of the OS specific
		750	* nvgpu_mapped_buf in some abstraction or the like.
		751	*/
		752	nvgpu_vm_unmap_system(mapped_buffer);
		753
		754	nvgpu_kfree(g, mapped_buffer);
		755	}
		756
		757	void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
716	{	758	{
717	struct nvgpu_mapped_buf *mapped_buffer =	759	struct nvgpu_mapped_buf *mapped_buffer =
718	container_of(ref, struct nvgpu_mapped_buf, ref);	760	container_of(ref, struct nvgpu_mapped_buf, ref);
719	nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);	761
		762	__nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
720	}	763	}
721		764
722	void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)	765	/*
		766	* For fixed-offset buffers we must sync the buffer. That means we wait for the
		767	* buffer to hit a ref-count of 1 before proceeding.
		768	*
		769	* Note: this requires the update_gmmu_lock to be held since we release it and
		770	* re-aquire it in this function.
		771	*/
		772	static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
		773	struct nvgpu_mapped_buf *mapped_buffer)
		774	{
		775	struct nvgpu_timeout timeout;
		776	int ret = 0;
		777
		778	nvgpu_mutex_release(&vm->update_gmmu_lock);
		779
		780	/*
		781	* 500ms second timer.
		782	*/
		783	nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
		784
		785	do {
		786	if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
		787	break;
		788	nvgpu_msleep(10);
		789	} while (!nvgpu_timeout_expired_msg(&timeout,
		790	"sync-unmap failed on 0x%llx"));
		791
		792	if (nvgpu_timeout_expired(&timeout))
		793	ret = -ETIMEDOUT;
		794
		795	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
		796
		797	return ret;
		798	}
		799
		800	void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
		801	struct vm_gk20a_mapping_batch *batch)
723	{	802	{
724	struct gk20a *g = vm->mm->g;
725	struct nvgpu_mapped_buf *mapped_buffer;	803	struct nvgpu_mapped_buf *mapped_buffer;
726		804
727	nvgpu_mutex_acquire(&vm->update_gmmu_lock);	805	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
		806
728	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);	807	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
729	if (!mapped_buffer) {	808	if (!mapped_buffer)
730	nvgpu_mutex_release(&vm->update_gmmu_lock);	809	goto done;
731	nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);	810
732	return;	811	if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
		812	if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
		813	/*
		814	* Looks like we have failed... Better not continue in
		815	* case the buffer is in use.
		816	*/
		817	goto done;
733	}	818	}
734		819
735	nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);	820	/*
		821	* Make sure we have access to the batch if we end up calling through to
		822	* the unmap_ref function.
		823	*/
		824	vm->kref_put_batch = batch;
		825	nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
		826	vm->kref_put_batch = NULL;
		827
		828	done:
736	nvgpu_mutex_release(&vm->update_gmmu_lock);	829	nvgpu_mutex_release(&vm->update_gmmu_lock);
		830	return;
737	}	831	}