diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 102 |
1 files changed, 93 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index eb4f01e0..ec946fb6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "hw_fb_gk20a.h" | 37 | #include "hw_fb_gk20a.h" |
38 | #include "hw_bus_gk20a.h" | 38 | #include "hw_bus_gk20a.h" |
39 | #include "hw_ram_gk20a.h" | 39 | #include "hw_ram_gk20a.h" |
40 | #include "hw_pram_gk20a.h" | ||
40 | #include "hw_mc_gk20a.h" | 41 | #include "hw_mc_gk20a.h" |
41 | #include "hw_flush_gk20a.h" | 42 | #include "hw_flush_gk20a.h" |
42 | #include "hw_ltc_gk20a.h" | 43 | #include "hw_ltc_gk20a.h" |
@@ -44,10 +45,20 @@ | |||
44 | #include "kind_gk20a.h" | 45 | #include "kind_gk20a.h" |
45 | #include "semaphore_gk20a.h" | 46 | #include "semaphore_gk20a.h" |
46 | 47 | ||
48 | /* | ||
49 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
50 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
51 | * in debugfs, called "force_pramin". | ||
52 | */ | ||
53 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
54 | |||
47 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 55 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
48 | { | 56 | { |
49 | void *cpu_va; | 57 | void *cpu_va; |
50 | 58 | ||
59 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
60 | return 0; | ||
61 | |||
51 | if (WARN_ON(mem->cpu_va)) { | 62 | if (WARN_ON(mem->cpu_va)) { |
52 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | 63 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); |
53 | return -EBUSY; | 64 | return -EBUSY; |
@@ -66,20 +77,66 @@ int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | |||
66 | 77 | ||
67 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | 78 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) |
68 | { | 79 | { |
80 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
81 | return; | ||
82 | |||
69 | vunmap(mem->cpu_va); | 83 | vunmap(mem->cpu_va); |
70 | mem->cpu_va = NULL; | 84 | mem->cpu_va = NULL; |
71 | } | 85 | } |
72 | 86 | ||
87 | /* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ | ||
88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
89 | { | ||
90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
91 | u64 addr = bufbase + w * sizeof(u32); | ||
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
94 | u32 lo = (addr & 0xfffff); | ||
95 | |||
96 | gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); | ||
97 | |||
98 | WARN_ON(!bufbase); | ||
99 | spin_lock(&g->mm.pramin_base_lock); | ||
100 | if (g->mm.pramin_base != hi) { | ||
101 | gk20a_writel(g, bus_bar0_window_r(), | ||
102 | (g->mm.vidmem_is_vidmem | ||
103 | && mem->aperture == APERTURE_SYSMEM ? | ||
104 | bus_bar0_window_target_sys_mem_noncoherent_f() : | ||
105 | bus_bar0_window_target_vid_mem_f()) | | ||
106 | bus_bar0_window_base_f(hi)); | ||
107 | gk20a_readl(g, bus_bar0_window_r()); | ||
108 | g->mm.pramin_base = hi; | ||
109 | } | ||
110 | |||
111 | return lo; | ||
112 | } | ||
113 | |||
114 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) | ||
115 | { | ||
116 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); | ||
117 | spin_unlock(&g->mm.pramin_base_lock); | ||
118 | } | ||
119 | |||
73 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 120 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
74 | { | 121 | { |
75 | u32 *ptr = mem->cpu_va; | 122 | u32 data = 0; |
76 | u32 data; | 123 | |
124 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
125 | u32 *ptr = mem->cpu_va; | ||
77 | 126 | ||
78 | WARN_ON(!ptr); | 127 | WARN_ON(!ptr); |
79 | data = ptr[w]; | 128 | data = ptr[w]; |
80 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 129 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
81 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 130 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
82 | #endif | 131 | #endif |
132 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
133 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
134 | data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
135 | gk20a_pramin_exit(g, mem); | ||
136 | } else { | ||
137 | WARN_ON("Accessing unallocated mem_desc"); | ||
138 | } | ||
139 | |||
83 | return data; | 140 | return data; |
84 | } | 141 | } |
85 | 142 | ||
@@ -106,13 +163,23 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | |||
106 | 163 | ||
107 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | 164 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) |
108 | { | 165 | { |
109 | u32 *ptr = mem->cpu_va; | 166 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { |
167 | u32 *ptr = mem->cpu_va; | ||
110 | 168 | ||
111 | WARN_ON(!ptr); | 169 | WARN_ON(!ptr); |
112 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 170 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
113 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 171 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
114 | #endif | 172 | #endif |
115 | ptr[w] = data; | 173 | ptr[w] = data; |
174 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
175 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
176 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); | ||
177 | /* read back to synchronize accesses*/ | ||
178 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
179 | gk20a_pramin_exit(g, mem); | ||
180 | } else { | ||
181 | WARN_ON("Accessing unallocated mem_desc"); | ||
182 | } | ||
116 | } | 183 | } |
117 | 184 | ||
118 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | 185 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) |
@@ -535,6 +602,13 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
535 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); | 602 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); |
536 | } | 603 | } |
537 | 604 | ||
605 | static void gk20a_init_pramin(struct mm_gk20a *mm) | ||
606 | { | ||
607 | mm->pramin_base = 0; | ||
608 | spin_lock_init(&mm->pramin_base_lock); | ||
609 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
610 | } | ||
611 | |||
538 | int gk20a_init_mm_setup_sw(struct gk20a *g) | 612 | int gk20a_init_mm_setup_sw(struct gk20a *g) |
539 | { | 613 | { |
540 | struct mm_gk20a *mm = &g->mm; | 614 | struct mm_gk20a *mm = &g->mm; |
@@ -558,6 +632,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
558 | (int)(mm->channel.user_size >> 20), | 632 | (int)(mm->channel.user_size >> 20), |
559 | (int)(mm->channel.kernel_size >> 20)); | 633 | (int)(mm->channel.kernel_size >> 20)); |
560 | 634 | ||
635 | gk20a_init_pramin(mm); | ||
636 | |||
561 | err = gk20a_alloc_sysmem_flush(g); | 637 | err = gk20a_alloc_sysmem_flush(g); |
562 | if (err) | 638 | if (err) |
563 | return err; | 639 | return err; |
@@ -586,6 +662,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
586 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 662 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
587 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 663 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
588 | mm->remove_support = gk20a_remove_mm_support; | 664 | mm->remove_support = gk20a_remove_mm_support; |
665 | |||
589 | mm->sw_ready = true; | 666 | mm->sw_ready = true; |
590 | 667 | ||
591 | gk20a_dbg_fn("done"); | 668 | gk20a_dbg_fn("done"); |
@@ -690,6 +767,7 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | |||
690 | entry->mem.cpu_va = page_address(pages); | 767 | entry->mem.cpu_va = page_address(pages); |
691 | memset(entry->mem.cpu_va, 0, len); | 768 | memset(entry->mem.cpu_va, 0, len); |
692 | entry->mem.size = len; | 769 | entry->mem.size = len; |
770 | entry->mem.aperture = APERTURE_SYSMEM; | ||
693 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); | 771 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); |
694 | 772 | ||
695 | return 0; | 773 | return 0; |
@@ -716,6 +794,7 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm, | |||
716 | kfree(entry->mem.sgt); | 794 | kfree(entry->mem.sgt); |
717 | entry->mem.sgt = NULL; | 795 | entry->mem.sgt = NULL; |
718 | entry->mem.size = 0; | 796 | entry->mem.size = 0; |
797 | entry->mem.aperture = APERTURE_INVALID; | ||
719 | } | 798 | } |
720 | 799 | ||
721 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | 800 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) |
@@ -2164,6 +2243,7 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, stru | |||
2164 | goto fail_free; | 2243 | goto fail_free; |
2165 | 2244 | ||
2166 | mem->size = size; | 2245 | mem->size = size; |
2246 | mem->aperture = APERTURE_SYSMEM; | ||
2167 | 2247 | ||
2168 | gk20a_dbg_fn("done"); | 2248 | gk20a_dbg_fn("done"); |
2169 | 2249 | ||
@@ -2210,6 +2290,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | |||
2210 | gk20a_free_sgtable(&mem->sgt); | 2290 | gk20a_free_sgtable(&mem->sgt); |
2211 | 2291 | ||
2212 | mem->size = 0; | 2292 | mem->size = 0; |
2293 | mem->aperture = APERTURE_INVALID; | ||
2213 | } | 2294 | } |
2214 | 2295 | ||
2215 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | 2296 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) |
@@ -4015,6 +4096,9 @@ void gk20a_mm_debugfs_init(struct device *dev) | |||
4015 | 4096 | ||
4016 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, | 4097 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, |
4017 | &g->separate_fixed_allocs); | 4098 | &g->separate_fixed_allocs); |
4099 | |||
4100 | debugfs_create_bool("force_pramin", 0664, gpu_root, | ||
4101 | &g->mm.force_pramin); | ||
4018 | } | 4102 | } |
4019 | 4103 | ||
4020 | void gk20a_init_mm(struct gpu_ops *gops) | 4104 | void gk20a_init_mm(struct gpu_ops *gops) |