gpu: nvgpu: Move all FB programming to FB HAL

Move all programming of FB to fb_*.c files, and remove the inclusion of FB hardware headers from other files. TLB invalidate function took previously a pointer to VM, but the new API takes only a PDB mem_desc, because FB does not need to know about higher level VM. GPC MMU is programmed from the same function as FB MMU, so added dependency to GR hardware header to FB. GP106 ACR was also triggering a VPR fetch, but that's not applicable to dGPU, so removed that call. Change-Id: I4eb69377ac3745da205907626cf60948b7c5392a Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1321516 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2017-03-15 17:08:32 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-17 11:44:03 -0400
commit: ca762e42205febba72ce063417e1ac598610941d (patch)
tree: caff32adafb4c09ed335756da3e8beb7b87fe47b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent: 0742f4e7032aabf44a31792330fcd4f55f4540f7 (diff)
1 files changed, 4 insertions, 99 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index dd62ab42..32d1f32f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -42,7 +42,6 @@
 #include "kind_gk20a.h"
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_bus_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
@@ -1084,9 +1083,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
                mm->use_full_comp_tag_line =
                        g->ops.fb.set_use_full_comp_tag_line(g);
-        gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
+        g->ops.fb.init_hw(g);
-                     g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
-                     >> 8);
        if (g->ops.mm.bar1_bind)
                g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
@@ -1538,7 +1535,7 @@ void gk20a_vm_mapping_batch_finish_locked(
        if (mapping_batch->need_tlb_invalidate) {
                struct gk20a *g = gk20a_from_vm(vm);
-                g->ops.mm.tlb_invalidate(vm);
+                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
        }
 }
@@ -1959,7 +1956,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
        }
        if (!batch)
-                g->ops.mm.tlb_invalidate(vm);
+                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
        else
                batch->need_tlb_invalidate = true;
@@ -2018,7 +2015,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
        if (!batch) {
                gk20a_mm_l2_flush(g, true);
-                g->ops.mm.tlb_invalidate(vm);
+                g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
        } else {
                if (!batch->gpu_l2_flushed) {
                        gk20a_mm_l2_flush(g, true);
@@ -5344,70 +5341,6 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
        return 0;
 }
-void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
-{
-        struct gk20a *g = gk20a_from_vm(vm);
-        struct nvgpu_timeout timeout;
-        u32 addr_lo;
-        u32 data;
-        gk20a_dbg_fn("");
-        /* pagetables are considered sw states which are preserved after
-           prepare_poweroff. When gk20a deinit releases those pagetables,
-           common code in vm unmap path calls tlb invalidate that touches
-           hw. Use the power_on flag to skip tlb invalidation when gpu
-           power is turned off */
-        if (!g->power_on)
-                return;
-        addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
-        nvgpu_mutex_acquire(&g->mm.tlb_lock);
-        trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
-        nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
-        do {
-                data = gk20a_readl(g, fb_mmu_ctrl_r());
-                if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
-                        break;
-                udelay(2);
-        } while (!nvgpu_timeout_expired_msg(&timeout,
-                                         "wait mmu fifo space"));
-        if (nvgpu_timeout_peek_expired(&timeout))
-                goto out;
-        nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
-        gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
-                fb_mmu_invalidate_pdb_addr_f(addr_lo) |
-                gk20a_aperture_mask(g, &vm->pdb.mem,
-                  fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
-                  fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
-        gk20a_writel(g, fb_mmu_invalidate_r(),
-                fb_mmu_invalidate_all_va_true_f() |
-                fb_mmu_invalidate_trigger_true_f());
-        do {
-                data = gk20a_readl(g, fb_mmu_ctrl_r());
-                if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
-                        fb_mmu_ctrl_pri_fifo_empty_false_f())
-                        break;
-                udelay(2);
-        } while (!nvgpu_timeout_expired_msg(&timeout,
-                                         "wait mmu invalidate"));
-        trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
-out:
-        nvgpu_mutex_release(&g->mm.tlb_lock);
-}
 int gk20a_mm_suspend(struct gk20a *g)
 {
        gk20a_dbg_fn("");
@@ -5423,31 +5356,6 @@ int gk20a_mm_suspend(struct gk20a *g)
        return 0;
 }
-bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
-{
-        u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
-        return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
-                fb_mmu_debug_ctrl_debug_enabled_v();
-}
-static void gk20a_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
-{
-        u32 reg_val, debug_ctrl;
-        reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
-        if (enable) {
-                debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
-                g->mmu_debug_ctrl = true;
-        } else {
-                debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
-                g->mmu_debug_ctrl = false;
-        }
-        reg_val = set_field(reg_val,
-                                fb_mmu_debug_ctrl_debug_m(), debug_ctrl);
-        gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
-}
 u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
 {
        return 34;
@@ -5510,8 +5418,6 @@ void gk20a_mm_debugfs_init(struct device *dev)
 void gk20a_init_mm(struct gpu_ops *gops)
 {
-        gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
-        gops->mm.set_debug_mode = gk20a_mm_mmu_set_debug_mode;
        gops->mm.gmmu_map = gk20a_locked_gmmu_map;
        gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
        gops->mm.vm_remove = gk20a_vm_remove_support;
@@ -5521,7 +5427,6 @@ void gk20a_init_mm(struct gpu_ops *gops)
        gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
        gops->mm.l2_flush = gk20a_mm_l2_flush;
        gops->mm.cbc_clean = gk20a_mm_cbc_clean;
-        gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
        gops->mm.get_iova_addr = gk20a_mm_iova_addr;
        gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
        gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;
author	Terje Bergstrom <tbergstrom@nvidia.com>	2017-03-15 17:08:32 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-17 11:44:03 -0400
commit	ca762e42205febba72ce063417e1ac598610941d (patch)
tree	caff32adafb4c09ed335756da3e8beb7b87fe47b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent	0742f4e7032aabf44a31792330fcd4f55f4540f7 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index dd62ab42..32d1f32f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -42,7 +42,6 @@
42	#include "kind_gk20a.h"	42	#include "kind_gk20a.h"
43		43
44	#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>	44	#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
45	#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
46	#include <nvgpu/hw/gk20a/hw_bus_gk20a.h>	45	#include <nvgpu/hw/gk20a/hw_bus_gk20a.h>
47	#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>	46	#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
48	#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>	47	#include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
@@ -1084,9 +1083,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
1084	mm->use_full_comp_tag_line =	1083	mm->use_full_comp_tag_line =
1085	g->ops.fb.set_use_full_comp_tag_line(g);	1084	g->ops.fb.set_use_full_comp_tag_line(g);
1086		1085
1087	gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),	1086	g->ops.fb.init_hw(g);
1088	g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
1089	>> 8);
1090		1087
1091	if (g->ops.mm.bar1_bind)	1088	if (g->ops.mm.bar1_bind)
1092	g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);	1089	g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
@@ -1538,7 +1535,7 @@ void gk20a_vm_mapping_batch_finish_locked(
1538		1535
1539	if (mapping_batch->need_tlb_invalidate) {	1536	if (mapping_batch->need_tlb_invalidate) {
1540	struct gk20a *g = gk20a_from_vm(vm);	1537	struct gk20a *g = gk20a_from_vm(vm);
1541	g->ops.mm.tlb_invalidate(vm);	1538	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1542	}	1539	}
1543	}	1540	}
1544		1541
@@ -1959,7 +1956,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1959	}	1956	}
1960		1957
1961	if (!batch)	1958	if (!batch)
1962	g->ops.mm.tlb_invalidate(vm);	1959	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
1963	else	1960	else
1964	batch->need_tlb_invalidate = true;	1961	batch->need_tlb_invalidate = true;
1965		1962
@@ -2018,7 +2015,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
2018		2015
2019	if (!batch) {	2016	if (!batch) {
2020	gk20a_mm_l2_flush(g, true);	2017	gk20a_mm_l2_flush(g, true);
2021	g->ops.mm.tlb_invalidate(vm);	2018	g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
2022	} else {	2019	} else {
2023	if (!batch->gpu_l2_flushed) {	2020	if (!batch->gpu_l2_flushed) {
2024	gk20a_mm_l2_flush(g, true);	2021	gk20a_mm_l2_flush(g, true);
@@ -5344,70 +5341,6 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
5344	return 0;	5341	return 0;
5345	}	5342	}
5346		5343
5347	void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
5348	{
5349	struct gk20a *g = gk20a_from_vm(vm);
5350	struct nvgpu_timeout timeout;
5351	u32 addr_lo;
5352	u32 data;
5353
5354	gk20a_dbg_fn("");
5355
5356	/* pagetables are considered sw states which are preserved after
5357	prepare_poweroff. When gk20a deinit releases those pagetables,
5358	common code in vm unmap path calls tlb invalidate that touches
5359	hw. Use the power_on flag to skip tlb invalidation when gpu
5360	power is turned off */
5361
5362	if (!g->power_on)
5363	return;
5364
5365	addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
5366
5367	nvgpu_mutex_acquire(&g->mm.tlb_lock);
5368
5369	trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
5370
5371	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
5372
5373	do {
5374	data = gk20a_readl(g, fb_mmu_ctrl_r());
5375	if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
5376	break;
5377	udelay(2);
5378	} while (!nvgpu_timeout_expired_msg(&timeout,
5379	"wait mmu fifo space"));
5380
5381	if (nvgpu_timeout_peek_expired(&timeout))
5382	goto out;
5383
5384	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
5385
5386	gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
5387	fb_mmu_invalidate_pdb_addr_f(addr_lo) \|
5388	gk20a_aperture_mask(g, &vm->pdb.mem,
5389	fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
5390	fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
5391
5392	gk20a_writel(g, fb_mmu_invalidate_r(),
5393	fb_mmu_invalidate_all_va_true_f() \|
5394	fb_mmu_invalidate_trigger_true_f());
5395
5396	do {
5397	data = gk20a_readl(g, fb_mmu_ctrl_r());
5398	if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
5399	fb_mmu_ctrl_pri_fifo_empty_false_f())
5400	break;
5401	udelay(2);
5402	} while (!nvgpu_timeout_expired_msg(&timeout,
5403	"wait mmu invalidate"));
5404
5405	trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
5406
5407	out:
5408	nvgpu_mutex_release(&g->mm.tlb_lock);
5409	}
5410
5411	int gk20a_mm_suspend(struct gk20a *g)	5344	int gk20a_mm_suspend(struct gk20a *g)
5412	{	5345	{
5413	gk20a_dbg_fn("");	5346	gk20a_dbg_fn("");
@@ -5423,31 +5356,6 @@ int gk20a_mm_suspend(struct gk20a *g)
5423	return 0;	5356	return 0;
5424	}	5357	}
5425		5358
5426	bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
5427	{
5428	u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
5429	return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
5430	fb_mmu_debug_ctrl_debug_enabled_v();
5431	}
5432
5433	static void gk20a_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
5434	{
5435	u32 reg_val, debug_ctrl;
5436
5437	reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
5438	if (enable) {
5439	debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
5440	g->mmu_debug_ctrl = true;
5441	} else {
5442	debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
5443	g->mmu_debug_ctrl = false;
5444	}
5445
5446	reg_val = set_field(reg_val,
5447	fb_mmu_debug_ctrl_debug_m(), debug_ctrl);
5448	gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
5449	}
5450
5451	u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)	5359	u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
5452	{	5360	{
5453	return 34;	5361	return 34;
@@ -5510,8 +5418,6 @@ void gk20a_mm_debugfs_init(struct device *dev)
5510		5418
5511	void gk20a_init_mm(struct gpu_ops *gops)	5419	void gk20a_init_mm(struct gpu_ops *gops)
5512	{	5420	{
5513	gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
5514	gops->mm.set_debug_mode = gk20a_mm_mmu_set_debug_mode;
5515	gops->mm.gmmu_map = gk20a_locked_gmmu_map;	5421	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
5516	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;	5422	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
5517	gops->mm.vm_remove = gk20a_vm_remove_support;	5423	gops->mm.vm_remove = gk20a_vm_remove_support;
@@ -5521,7 +5427,6 @@ void gk20a_init_mm(struct gpu_ops *gops)
5521	gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;	5427	gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
5522	gops->mm.l2_flush = gk20a_mm_l2_flush;	5428	gops->mm.l2_flush = gk20a_mm_l2_flush;
5523	gops->mm.cbc_clean = gk20a_mm_cbc_clean;	5429	gops->mm.cbc_clean = gk20a_mm_cbc_clean;
5524	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
5525	gops->mm.get_iova_addr = gk20a_mm_iova_addr;	5430	gops->mm.get_iova_addr = gk20a_mm_iova_addr;
5526	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;	5431	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
5527	gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;	5432	gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;