gpu: nvgpu: VM map path refactoring

Final VM mapping refactoring. Move most of the logic in the VM map path to the common/mm/vm.c code and use the generic APIs previously implemented to deal with comptags and map caching. This also updates the mapped_buffer struct to finally be free of the Linux dma_buf and scatter gather table pointers. This is replaced with the nvgpu_os_buffer struct. JIRA NVGPU-30 JIRA NVGPU-71 JIRA NVGPU-224 Change-Id: If5b32886221c3e5af2f3d7ddd4fa51dd487bb981 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583987 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-10-20 13:26:22 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-10 18:47:01 -0500
commit: 01c98eb68055f0b18d4f5b9dc4aa78601a00bc1e (patch)
tree: 535c341ede5f52165c074a860f8e4c81247e34c5 /drivers/gpu/nvgpu/common/mm/vm.c
parent: 8428c82c816f361ce7bbb1fe4804f350b8cbea2f (diff)
1 files changed, 244 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 46783e4e..97c6d4ca 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -20,6 +20,7 @@
 * DEALINGS IN THE SOFTWARE.
 */
+#include <nvgpu/bug.h>
 #include <nvgpu/log.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/vm.h>
@@ -712,6 +713,249 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
        nvgpu_big_free(vm->mm->g, mapped_buffers);
 }
+struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
+                                      struct nvgpu_os_buffer *os_buf,
+                                      struct nvgpu_sgt *sgt,
+                                      u64 map_addr,
+                                      u64 map_size,
+                                      u64 phys_offset,
+                                      int rw,
+                                      u32 flags,
+                                      s16 compr_kind,
+                                      s16 incompr_kind,
+                                      struct vm_gk20a_mapping_batch *batch,
+                                      enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_mapped_buf *mapped_buffer = NULL;
+        struct nvgpu_ctag_buffer_info binfo = { 0 };
+        struct gk20a_comptags comptags;
+        struct nvgpu_vm_area *vm_area = NULL;
+        int err = 0;
+        u64 align;
+        u32 ctag_offset;
+        bool clear_ctags = false;
+        bool va_allocated = true;
+        /*
+         * The kind used as part of the key for map caching. HW may
+         * actually be programmed with the fallback kind in case the
+         * key kind is compressible but we're out of comptags.
+         */
+        s16 map_key_kind;
+        if (vm->userspace_managed &&
+            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+                nvgpu_err(g,
+                          "non-fixed-offset mapping not available on "
+                          "userspace managed address spaces");
+                return ERR_PTR(-EINVAL);
+        }
+        binfo.flags = flags;
+        binfo.size = nvgpu_os_buf_get_size(os_buf);
+        binfo.compr_kind = compr_kind;
+        binfo.incompr_kind = incompr_kind;
+        if (compr_kind != NV_KIND_INVALID)
+                map_key_kind = compr_kind;
+        else
+                map_key_kind = incompr_kind;
+        /*
+         * Check if this buffer is already mapped.
+         */
+        if (!vm->userspace_managed) {
+                nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+                mapped_buffer = nvgpu_vm_find_mapping(vm,
+                                                      os_buf,
+                                                      map_addr,
+                                                      flags,
+                                                      map_key_kind);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                if (mapped_buffer) {
+                        nvgpu_ref_get(&mapped_buffer->ref);
+                        return mapped_buffer;
+                }
+        }
+        /*
+         * Generate a new mapping!
+         */
+        mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
+        if (!mapped_buffer) {
+                nvgpu_warn(g, "oom allocating tracking buffer");
+                return ERR_PTR(-ENOMEM);
+        }
+        align = nvgpu_sgt_alignment(g, sgt);
+        if (g->mm.disable_bigpage)
+                binfo.pgsz_idx = gmmu_page_size_small;
+        else
+                binfo.pgsz_idx = __get_pte_size(vm, map_addr,
+                                                min_t(u64, binfo.size, align));
+        map_size = map_size ? map_size : binfo.size;
+        map_size = ALIGN(map_size, SZ_4K);
+        if ((map_size > binfo.size) ||
+            (phys_offset > (binfo.size - map_size))) {
+                err = -EINVAL;
+                goto clean_up_nolock;
+        }
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        /*
+         * Check if we should use a fixed offset for mapping this buffer.
+         */
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
+                err = nvgpu_vm_area_validate_buffer(vm,
+                                                    map_addr,
+                                                    map_size,
+                                                    binfo.pgsz_idx,
+                                                    &vm_area);
+                if (err)
+                        goto clean_up;
+                va_allocated = false;
+        }
+        err = nvgpu_vm_compute_compression(vm, &binfo);
+        if (err) {
+                nvgpu_err(g, "failure setting up compression");
+                goto clean_up;
+        }
+        /*
+         * bar1 and pmu VMs don't need ctags.
+         */
+        if (!vm->enable_ctag)
+                binfo.ctag_lines = 0;
+        gk20a_get_comptags(os_buf, &comptags);
+        if (binfo.ctag_lines && !comptags.lines) {
+                /*
+                 * Allocate compression resources if needed.
+                 */
+                if (gk20a_alloc_comptags(g,
+                                         os_buf,
+                                         &g->gr.comp_tags,
+                                         binfo.ctag_lines)) {
+                        /*
+                         * Prevent compression...
+                         */
+                        binfo.compr_kind = NV_KIND_INVALID;
+                        /*
+                         * ... And make sure we have a fallback.
+                         */
+                        if (binfo.incompr_kind == NV_KIND_INVALID) {
+                                nvgpu_err(g, "comptag alloc failed and no "
+                                             "fallback kind specified");
+                                err = -ENOMEM;
+                                /*
+                                 * Any alloced comptags are cleaned up when the
+                                 * dmabuf is freed.
+                                 */
+                                goto clean_up;
+                        }
+                } else {
+                        gk20a_get_comptags(os_buf, &comptags);
+                        if (g->ops.ltc.cbc_ctrl)
+                                g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+                                              comptags.offset,
+                                              comptags.offset +
+                                                  comptags.allocated_lines - 1);
+                        else
+                                clear_ctags = true;
+                }
+        }
+        /*
+         * Calculate comptag index for this mapping. Differs in case of partial
+         * mapping.
+         */
+        ctag_offset = comptags.offset;
+        if (ctag_offset)
+                ctag_offset += phys_offset >>
+                               ilog2(g->ops.fb.compression_page_size(g));
+        map_addr = g->ops.mm.gmmu_map(vm,
+                                      map_addr,
+                                      sgt,
+                                      phys_offset,
+                                      map_size,
+                                      binfo.pgsz_idx,
+                                      binfo.compr_kind != NV_KIND_INVALID ?
+                                          binfo.compr_kind : binfo.incompr_kind,
+                                      ctag_offset,
+                                      flags,
+                                      rw,
+                                      clear_ctags,
+                                      false,
+                                      false,
+                                      batch,
+                                      aperture);
+        if (!map_addr) {
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        nvgpu_init_list_node(&mapped_buffer->buffer_list);
+        nvgpu_ref_init(&mapped_buffer->ref);
+        mapped_buffer->addr         = map_addr;
+        mapped_buffer->size         = map_size;
+        mapped_buffer->pgsz_idx     = binfo.pgsz_idx;
+        mapped_buffer->ctag_offset  = ctag_offset;
+        mapped_buffer->ctag_lines   = binfo.ctag_lines;
+        mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
+        mapped_buffer->vm           = vm;
+        mapped_buffer->flags        = flags;
+        mapped_buffer->kind         = map_key_kind;
+        mapped_buffer->va_allocated = va_allocated;
+        mapped_buffer->vm_area      = vm_area;
+        err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
+        if (err) {
+                nvgpu_err(g, "failed to insert into mapped buffer tree");
+                goto clean_up;
+        }
+        vm->num_user_mapped_buffers++;
+        if (vm_area) {
+                nvgpu_list_add_tail(&mapped_buffer->buffer_list,
+                              &vm_area->buffer_list_head);
+                mapped_buffer->vm_area = vm_area;
+        }
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return mapped_buffer;
+clean_up:
+        if (mapped_buffer->addr)
+                g->ops.mm.gmmu_unmap(vm,
+                                     mapped_buffer->addr,
+                                     mapped_buffer->size,
+                                     mapped_buffer->pgsz_idx,
+                                     mapped_buffer->va_allocated,
+                                     gk20a_mem_flag_none,
+                                     mapped_buffer->vm_area ?
+                                     mapped_buffer->vm_area->sparse : false,
+                                     NULL);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+clean_up_nolock:
+        nvgpu_kfree(g, mapped_buffer);
+        return ERR_PTR(err);
+}
 /*
 * Really unmap. This does the real GMMU unmap and removes the mapping from the
 * VM map tracking tree (and vm_area list if necessary).
author	Alex Waterman <alexw@nvidia.com>	2017-10-20 13:26:22 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-10 18:47:01 -0500
commit	01c98eb68055f0b18d4f5b9dc4aa78601a00bc1e (patch)
tree	535c341ede5f52165c074a860f8e4c81247e34c5 /drivers/gpu/nvgpu/common/mm/vm.c
parent	8428c82c816f361ce7bbb1fe4804f350b8cbea2f (diff)

diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 46783e4e..97c6d4ca 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -20,6 +20,7 @@
20	* DEALINGS IN THE SOFTWARE.	20	* DEALINGS IN THE SOFTWARE.
21	*/	21	*/
22		22
		23	#include <nvgpu/bug.h>
23	#include <nvgpu/log.h>	24	#include <nvgpu/log.h>
24	#include <nvgpu/dma.h>	25	#include <nvgpu/dma.h>
25	#include <nvgpu/vm.h>	26	#include <nvgpu/vm.h>
@@ -712,6 +713,249 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
712	nvgpu_big_free(vm->mm->g, mapped_buffers);	713	nvgpu_big_free(vm->mm->g, mapped_buffers);
713	}	714	}
714		715
		716	struct nvgpu_mapped_buf nvgpu_vm_map(struct vm_gk20a vm,
		717	struct nvgpu_os_buffer *os_buf,
		718	struct nvgpu_sgt *sgt,
		719	u64 map_addr,
		720	u64 map_size,
		721	u64 phys_offset,
		722	int rw,
		723	u32 flags,
		724	s16 compr_kind,
		725	s16 incompr_kind,
		726	struct vm_gk20a_mapping_batch *batch,
		727	enum nvgpu_aperture aperture)
		728	{
		729	struct gk20a *g = gk20a_from_vm(vm);
		730	struct nvgpu_mapped_buf *mapped_buffer = NULL;
		731	struct nvgpu_ctag_buffer_info binfo = { 0 };
		732	struct gk20a_comptags comptags;
		733	struct nvgpu_vm_area *vm_area = NULL;
		734	int err = 0;
		735	u64 align;
		736	u32 ctag_offset;
		737	bool clear_ctags = false;
		738	bool va_allocated = true;
		739
		740	/*
		741	* The kind used as part of the key for map caching. HW may
		742	* actually be programmed with the fallback kind in case the
		743	* key kind is compressible but we're out of comptags.
		744	*/
		745	s16 map_key_kind;
		746
		747	if (vm->userspace_managed &&
		748	!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
		749	nvgpu_err(g,
		750	"non-fixed-offset mapping not available on "
		751	"userspace managed address spaces");
		752	return ERR_PTR(-EINVAL);
		753	}
		754
		755	binfo.flags = flags;
		756	binfo.size = nvgpu_os_buf_get_size(os_buf);
		757	binfo.compr_kind = compr_kind;
		758	binfo.incompr_kind = incompr_kind;
		759
		760	if (compr_kind != NV_KIND_INVALID)
		761	map_key_kind = compr_kind;
		762	else
		763	map_key_kind = incompr_kind;
		764
		765	/*
		766	* Check if this buffer is already mapped.
		767	*/
		768	if (!vm->userspace_managed) {
		769	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
		770	mapped_buffer = nvgpu_vm_find_mapping(vm,
		771	os_buf,
		772	map_addr,
		773	flags,
		774	map_key_kind);
		775	nvgpu_mutex_release(&vm->update_gmmu_lock);
		776
		777	if (mapped_buffer) {
		778	nvgpu_ref_get(&mapped_buffer->ref);
		779	return mapped_buffer;
		780	}
		781	}
		782
		783	/*
		784	* Generate a new mapping!
		785	*/
		786	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
		787	if (!mapped_buffer) {
		788	nvgpu_warn(g, "oom allocating tracking buffer");
		789	return ERR_PTR(-ENOMEM);
		790	}
		791
		792	align = nvgpu_sgt_alignment(g, sgt);
		793	if (g->mm.disable_bigpage)
		794	binfo.pgsz_idx = gmmu_page_size_small;
		795	else
		796	binfo.pgsz_idx = __get_pte_size(vm, map_addr,
		797	min_t(u64, binfo.size, align));
		798	map_size = map_size ? map_size : binfo.size;
		799	map_size = ALIGN(map_size, SZ_4K);
		800
		801	if ((map_size > binfo.size) \|\|
		802	(phys_offset > (binfo.size - map_size))) {
		803	err = -EINVAL;
		804	goto clean_up_nolock;
		805	}
		806
		807	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
		808
		809	/*
		810	* Check if we should use a fixed offset for mapping this buffer.
		811	*/
		812	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
		813	err = nvgpu_vm_area_validate_buffer(vm,
		814	map_addr,
		815	map_size,
		816	binfo.pgsz_idx,
		817	&vm_area);
		818	if (err)
		819	goto clean_up;
		820
		821	va_allocated = false;
		822	}
		823
		824	err = nvgpu_vm_compute_compression(vm, &binfo);
		825	if (err) {
		826	nvgpu_err(g, "failure setting up compression");
		827	goto clean_up;
		828	}
		829
		830	/*
		831	* bar1 and pmu VMs don't need ctags.
		832	*/
		833	if (!vm->enable_ctag)
		834	binfo.ctag_lines = 0;
		835
		836	gk20a_get_comptags(os_buf, &comptags);
		837
		838	if (binfo.ctag_lines && !comptags.lines) {
		839	/*
		840	* Allocate compression resources if needed.
		841	*/
		842	if (gk20a_alloc_comptags(g,
		843	os_buf,
		844	&g->gr.comp_tags,
		845	binfo.ctag_lines)) {
		846
		847	/*
		848	* Prevent compression...
		849	*/
		850	binfo.compr_kind = NV_KIND_INVALID;
		851
		852	/*
		853	* ... And make sure we have a fallback.
		854	*/
		855	if (binfo.incompr_kind == NV_KIND_INVALID) {
		856	nvgpu_err(g, "comptag alloc failed and no "
		857	"fallback kind specified");
		858	err = -ENOMEM;
		859
		860	/*
		861	* Any alloced comptags are cleaned up when the
		862	* dmabuf is freed.
		863	*/
		864	goto clean_up;
		865	}
		866	} else {
		867	gk20a_get_comptags(os_buf, &comptags);
		868
		869	if (g->ops.ltc.cbc_ctrl)
		870	g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
		871	comptags.offset,
		872	comptags.offset +
		873	comptags.allocated_lines - 1);
		874	else
		875	clear_ctags = true;
		876	}
		877	}
		878
		879	/*
		880	* Calculate comptag index for this mapping. Differs in case of partial
		881	* mapping.
		882	*/
		883	ctag_offset = comptags.offset;
		884	if (ctag_offset)
		885	ctag_offset += phys_offset >>
		886	ilog2(g->ops.fb.compression_page_size(g));
		887
		888	map_addr = g->ops.mm.gmmu_map(vm,
		889	map_addr,
		890	sgt,
		891	phys_offset,
		892	map_size,
		893	binfo.pgsz_idx,
		894	binfo.compr_kind != NV_KIND_INVALID ?
		895	binfo.compr_kind : binfo.incompr_kind,
		896	ctag_offset,
		897	flags,
		898	rw,
		899	clear_ctags,
		900	false,
		901	false,
		902	batch,
		903	aperture);
		904	if (!map_addr) {
		905	err = -ENOMEM;
		906	goto clean_up;
		907	}
		908
		909	nvgpu_init_list_node(&mapped_buffer->buffer_list);
		910	nvgpu_ref_init(&mapped_buffer->ref);
		911	mapped_buffer->addr = map_addr;
		912	mapped_buffer->size = map_size;
		913	mapped_buffer->pgsz_idx = binfo.pgsz_idx;
		914	mapped_buffer->ctag_offset = ctag_offset;
		915	mapped_buffer->ctag_lines = binfo.ctag_lines;
		916	mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
		917	mapped_buffer->vm = vm;
		918	mapped_buffer->flags = flags;
		919	mapped_buffer->kind = map_key_kind;
		920	mapped_buffer->va_allocated = va_allocated;
		921	mapped_buffer->vm_area = vm_area;
		922
		923	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
		924	if (err) {
		925	nvgpu_err(g, "failed to insert into mapped buffer tree");
		926	goto clean_up;
		927	}
		928
		929	vm->num_user_mapped_buffers++;
		930
		931	if (vm_area) {
		932	nvgpu_list_add_tail(&mapped_buffer->buffer_list,
		933	&vm_area->buffer_list_head);
		934	mapped_buffer->vm_area = vm_area;
		935	}
		936
		937	nvgpu_mutex_release(&vm->update_gmmu_lock);
		938
		939	return mapped_buffer;
		940
		941	clean_up:
		942	if (mapped_buffer->addr)
		943	g->ops.mm.gmmu_unmap(vm,
		944	mapped_buffer->addr,
		945	mapped_buffer->size,
		946	mapped_buffer->pgsz_idx,
		947	mapped_buffer->va_allocated,
		948	gk20a_mem_flag_none,
		949	mapped_buffer->vm_area ?
		950	mapped_buffer->vm_area->sparse : false,
		951	NULL);
		952	nvgpu_mutex_release(&vm->update_gmmu_lock);
		953	clean_up_nolock:
		954	nvgpu_kfree(g, mapped_buffer);
		955
		956	return ERR_PTR(err);
		957	}
		958
715	/*	959	/*
716	* Really unmap. This does the real GMMU unmap and removes the mapping from the	960	* Really unmap. This does the real GMMU unmap and removes the mapping from the
717	* VM map tracking tree (and vm_area list if necessary).	961	* VM map tracking tree (and vm_area list if necessary).