summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-16 04:33:38 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-24 15:39:06 -0400
commit3e431e26c5c3aba6da8a6555ec3d7b7df53f534a (patch)
treede7baabb5bbc4a5d27af36d62c00827b7bad3f54 /drivers/gpu/nvgpu/gk20a
parentdc7af18bf8056c213165d4cd1c55ea0fba9f1341 (diff)
gpu: nvgpu: add PRAMIN support for mem accessors
To support vidmem, implement a way to access buffers via the PRAMIN window instead of just kernel-mapped sysmem buffers for iGPU as of now. Depending on the buffer aperture, choose between the two access types in the buffer memory accessor functions. vmap()/vunmap() pairs are no-ops for buffers that can't be cpu-mapped. Two uses of DMA_ATTR_READ_ONLY are removed in the ucode loading path to support writing to them too via the indirection in addition to cpu. JIRA DNVGPU-23 Change-Id: I282dba6741c6b8224bc12e69c1fb3936bde7e6ed Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1141314 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h24
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h57
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c102
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h15
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c3
6 files changed, 191 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index a5caf048..076ddd12 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2036,8 +2036,7 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2036 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), 2036 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
2037 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); 2037 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
2038 2038
2039 err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size, 2039 err = gk20a_gmmu_alloc(g, ucode_size, &ucode_info->surface_desc);
2040 &ucode_info->surface_desc);
2041 if (err) 2040 if (err)
2042 goto clean_up; 2041 goto clean_up;
2043 2042
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
index 8a69c573..2c902f52 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h
@@ -50,6 +50,30 @@
50#ifndef _hw_bus_gk20a_h_ 50#ifndef _hw_bus_gk20a_h_
51#define _hw_bus_gk20a_h_ 51#define _hw_bus_gk20a_h_
52 52
53static inline u32 bus_bar0_window_r(void)
54{
55 return 0x00001700;
56}
57static inline u32 bus_bar0_window_base_f(u32 v)
58{
59 return (v & 0xffffff) << 0;
60}
61static inline u32 bus_bar0_window_target_vid_mem_f(void)
62{
63 return 0x0;
64}
65static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void)
66{
67 return 0x2000000;
68}
69static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void)
70{
71 return 0x3000000;
72}
73static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void)
74{
75 return 0x00000010;
76}
53static inline u32 bus_bar1_block_r(void) 77static inline u32 bus_bar1_block_r(void)
54{ 78{
55 return 0x00001704; 79 return 0x00001704;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h
new file mode 100644
index 00000000..918dad9a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h
@@ -0,0 +1,57 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16/*
17 * Function naming determines intended use:
18 *
19 * <x>_r(void) : Returns the offset for register <x>.
20 *
21 * <x>_o(void) : Returns the offset for element <x>.
22 *
23 * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
24 *
25 * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
26 *
27 * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
28 * and masked to place it at field <y> of register <x>. This value
29 * can be |'d with others to produce a full register value for
30 * register <x>.
31 *
32 * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
33 * value can be ~'d and then &'d to clear the value of field <y> for
34 * register <x>.
35 *
36 * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
37 * to place it at field <y> of register <x>. This value can be |'d
38 * with others to produce a full register value for <x>.
39 *
40 * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
41 * <x> value 'r' after being shifted to place its LSB at bit 0.
42 * This value is suitable for direct comparison with other unshifted
43 * values appropriate for use in field <y> of register <x>.
44 *
45 * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
46 * field <y> of register <x>. This value is suitable for direct
47 * comparison with unshifted values appropriate for use in field <y>
48 * of register <x>.
49 */
50#ifndef _hw_pram_gk20a_h_
51#define _hw_pram_gk20a_h_
52
53static inline u32 pram_data032_r(u32 i)
54{
55 return 0x00700000 + i*4;
56}
57#endif
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index eb4f01e0..ec946fb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -37,6 +37,7 @@
37#include "hw_fb_gk20a.h" 37#include "hw_fb_gk20a.h"
38#include "hw_bus_gk20a.h" 38#include "hw_bus_gk20a.h"
39#include "hw_ram_gk20a.h" 39#include "hw_ram_gk20a.h"
40#include "hw_pram_gk20a.h"
40#include "hw_mc_gk20a.h" 41#include "hw_mc_gk20a.h"
41#include "hw_flush_gk20a.h" 42#include "hw_flush_gk20a.h"
42#include "hw_ltc_gk20a.h" 43#include "hw_ltc_gk20a.h"
@@ -44,10 +45,20 @@
44#include "kind_gk20a.h" 45#include "kind_gk20a.h"
45#include "semaphore_gk20a.h" 46#include "semaphore_gk20a.h"
46 47
48/*
49 * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the
50 * boot, even for buffers that would work via cpu_va. In runtime, the flag is
51 * in debugfs, called "force_pramin".
52 */
53#define GK20A_FORCE_PRAMIN_DEFAULT false
54
47int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) 55int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
48{ 56{
49 void *cpu_va; 57 void *cpu_va;
50 58
59 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
60 return 0;
61
51 if (WARN_ON(mem->cpu_va)) { 62 if (WARN_ON(mem->cpu_va)) {
52 gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); 63 gk20a_warn(dev_from_gk20a(g), "nested %s", __func__);
53 return -EBUSY; 64 return -EBUSY;
@@ -66,20 +77,66 @@ int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
66 77
67void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) 78void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
68{ 79{
80 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
81 return;
82
69 vunmap(mem->cpu_va); 83 vunmap(mem->cpu_va);
70 mem->cpu_va = NULL; 84 mem->cpu_va = NULL;
71} 85}
72 86
87/* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */
88static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w)
89{
90 u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
91 u64 addr = bufbase + w * sizeof(u32);
92 u32 hi = (u32)((addr & ~(u64)0xfffff)
93 >> bus_bar0_window_target_bar0_window_base_shift_v());
94 u32 lo = (addr & 0xfffff);
95
96 gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem);
97
98 WARN_ON(!bufbase);
99 spin_lock(&g->mm.pramin_base_lock);
100 if (g->mm.pramin_base != hi) {
101 gk20a_writel(g, bus_bar0_window_r(),
102 (g->mm.vidmem_is_vidmem
103 && mem->aperture == APERTURE_SYSMEM ?
104 bus_bar0_window_target_sys_mem_noncoherent_f() :
105 bus_bar0_window_target_vid_mem_f()) |
106 bus_bar0_window_base_f(hi));
107 gk20a_readl(g, bus_bar0_window_r());
108 g->mm.pramin_base = hi;
109 }
110
111 return lo;
112}
113
114static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem)
115{
116 gk20a_dbg(gpu_dbg_mem, "end for %p", mem);
117 spin_unlock(&g->mm.pramin_base_lock);
118}
119
73u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) 120u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
74{ 121{
75 u32 *ptr = mem->cpu_va; 122 u32 data = 0;
76 u32 data; 123
124 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
125 u32 *ptr = mem->cpu_va;
77 126
78 WARN_ON(!ptr); 127 WARN_ON(!ptr);
79 data = ptr[w]; 128 data = ptr[w];
80#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM 129#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
81 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); 130 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
82#endif 131#endif
132 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
133 u32 addr = gk20a_pramin_enter(g, mem, w);
134 data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
135 gk20a_pramin_exit(g, mem);
136 } else {
137 WARN_ON("Accessing unallocated mem_desc");
138 }
139
83 return data; 140 return data;
84} 141}
85 142
@@ -106,13 +163,23 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
106 163
107void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) 164void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
108{ 165{
109 u32 *ptr = mem->cpu_va; 166 if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) {
167 u32 *ptr = mem->cpu_va;
110 168
111 WARN_ON(!ptr); 169 WARN_ON(!ptr);
112#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM 170#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
113 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); 171 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
114#endif 172#endif
115 ptr[w] = data; 173 ptr[w] = data;
174 } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
175 u32 addr = gk20a_pramin_enter(g, mem, w);
176 gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data);
177 /* read back to synchronize accesses*/
178 gk20a_readl(g, pram_data032_r(addr / sizeof(u32)));
179 gk20a_pramin_exit(g, mem);
180 } else {
181 WARN_ON("Accessing unallocated mem_desc");
182 }
116} 183}
117 184
118void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) 185void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data)
@@ -535,6 +602,13 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g)
535 return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); 602 return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush);
536} 603}
537 604
605static void gk20a_init_pramin(struct mm_gk20a *mm)
606{
607 mm->pramin_base = 0;
608 spin_lock_init(&mm->pramin_base_lock);
609 mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT;
610}
611
538int gk20a_init_mm_setup_sw(struct gk20a *g) 612int gk20a_init_mm_setup_sw(struct gk20a *g)
539{ 613{
540 struct mm_gk20a *mm = &g->mm; 614 struct mm_gk20a *mm = &g->mm;
@@ -558,6 +632,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
558 (int)(mm->channel.user_size >> 20), 632 (int)(mm->channel.user_size >> 20),
559 (int)(mm->channel.kernel_size >> 20)); 633 (int)(mm->channel.kernel_size >> 20));
560 634
635 gk20a_init_pramin(mm);
636
561 err = gk20a_alloc_sysmem_flush(g); 637 err = gk20a_alloc_sysmem_flush(g);
562 if (err) 638 if (err)
563 return err; 639 return err;
@@ -586,6 +662,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
586 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ 662 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
587 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; 663 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
588 mm->remove_support = gk20a_remove_mm_support; 664 mm->remove_support = gk20a_remove_mm_support;
665
589 mm->sw_ready = true; 666 mm->sw_ready = true;
590 667
591 gk20a_dbg_fn("done"); 668 gk20a_dbg_fn("done");
@@ -690,6 +767,7 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
690 entry->mem.cpu_va = page_address(pages); 767 entry->mem.cpu_va = page_address(pages);
691 memset(entry->mem.cpu_va, 0, len); 768 memset(entry->mem.cpu_va, 0, len);
692 entry->mem.size = len; 769 entry->mem.size = len;
770 entry->mem.aperture = APERTURE_SYSMEM;
693 FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); 771 FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len);
694 772
695 return 0; 773 return 0;
@@ -716,6 +794,7 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm,
716 kfree(entry->mem.sgt); 794 kfree(entry->mem.sgt);
717 entry->mem.sgt = NULL; 795 entry->mem.sgt = NULL;
718 entry->mem.size = 0; 796 entry->mem.size = 0;
797 entry->mem.aperture = APERTURE_INVALID;
719} 798}
720 799
721static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) 800static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry)
@@ -2164,6 +2243,7 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, stru
2164 goto fail_free; 2243 goto fail_free;
2165 2244
2166 mem->size = size; 2245 mem->size = size;
2246 mem->aperture = APERTURE_SYSMEM;
2167 2247
2168 gk20a_dbg_fn("done"); 2248 gk20a_dbg_fn("done");
2169 2249
@@ -2210,6 +2290,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr,
2210 gk20a_free_sgtable(&mem->sgt); 2290 gk20a_free_sgtable(&mem->sgt);
2211 2291
2212 mem->size = 0; 2292 mem->size = 0;
2293 mem->aperture = APERTURE_INVALID;
2213} 2294}
2214 2295
2215void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) 2296void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
@@ -4015,6 +4096,9 @@ void gk20a_mm_debugfs_init(struct device *dev)
4015 4096
4016 debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, 4097 debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root,
4017 &g->separate_fixed_allocs); 4098 &g->separate_fixed_allocs);
4099
4100 debugfs_create_bool("force_pramin", 0664, gpu_root,
4101 &g->mm.force_pramin);
4018} 4102}
4019 4103
4020void gk20a_init_mm(struct gpu_ops *gops) 4104void gk20a_init_mm(struct gpu_ops *gops)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d943b231..c58a4fec 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -40,10 +40,17 @@
40 outer_flush_range(pa, pa + (size_t)(size)); \ 40 outer_flush_range(pa, pa + (size_t)(size)); \
41 } while (0) 41 } while (0)
42 42
43enum gk20a_aperture {
44 APERTURE_INVALID, /* e.g., unallocated */
45 APERTURE_SYSMEM,
46 APERTURE_VIDMEM
47};
48
43struct mem_desc { 49struct mem_desc {
44 void *cpu_va; 50 void *cpu_va;
45 struct page **pages; 51 struct page **pages;
46 struct sg_table *sgt; 52 struct sg_table *sgt;
53 enum gk20a_aperture aperture;
47 size_t size; 54 size_t size;
48 u64 gpu_va; 55 u64 gpu_va;
49}; 56};
@@ -357,6 +364,14 @@ struct mm_gk20a {
357 bool vidmem_is_vidmem; 364 bool vidmem_is_vidmem;
358 365
359 struct mem_desc sysmem_flush; 366 struct mem_desc sysmem_flush;
367
368 u32 pramin_base;
369 spinlock_t pramin_base_lock;
370#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
371 u32 force_pramin; /* via debugfs */
372#else
373 bool force_pramin; /* via debugfs */
374#endif
360}; 375};
361 376
362int gk20a_mm_init(struct mm_gk20a *mm); 377int gk20a_mm_init(struct mm_gk20a *mm);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 8bf382fd..08ef7738 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2443,8 +2443,7 @@ static int gk20a_prepare_ucode(struct gk20a *g)
2443 pmu->ucode_image = (u32 *)((u8 *)pmu->desc + 2443 pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
2444 pmu->desc->descriptor_size); 2444 pmu->desc->descriptor_size);
2445 2445
2446 err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY, 2446 err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode);
2447 GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode);
2448 if (err) 2447 if (err)
2449 goto err_release_fw; 2448 goto err_release_fw;
2450 2449