diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-16 04:33:38 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-24 15:39:06 -0400 |
commit | 3e431e26c5c3aba6da8a6555ec3d7b7df53f534a (patch) | |
tree | de7baabb5bbc4a5d27af36d62c00827b7bad3f54 /drivers | |
parent | dc7af18bf8056c213165d4cd1c55ea0fba9f1341 (diff) |
gpu: nvgpu: add PRAMIN support for mem accessors
To support vidmem, implement a way to access buffers via the PRAMIN
window instead of just kernel-mapped sysmem buffers for iGPU as of now.
Depending on the buffer aperture, choose between the two access types in
the buffer memory accessor functions.
vmap()/vunmap() pairs are no-ops for buffers that can't be cpu-mapped.
Two uses of DMA_ATTR_READ_ONLY are removed in the ucode loading path to
support writing to them too via the indirection in addition to cpu.
JIRA DNVGPU-23
Change-Id: I282dba6741c6b8224bc12e69c1fb3936bde7e6ed
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1141314
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h | 57 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 102 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h | 57 |
8 files changed, 272 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a5caf048..076ddd12 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -2036,8 +2036,7 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2036 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), | 2036 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), |
2037 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); | 2037 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); |
2038 | 2038 | ||
2039 | err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size, | 2039 | err = gk20a_gmmu_alloc(g, ucode_size, &ucode_info->surface_desc); |
2040 | &ucode_info->surface_desc); | ||
2041 | if (err) | 2040 | if (err) |
2042 | goto clean_up; | 2041 | goto clean_up; |
2043 | 2042 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h index 8a69c573..2c902f52 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | |||
@@ -50,6 +50,30 @@ | |||
50 | #ifndef _hw_bus_gk20a_h_ | 50 | #ifndef _hw_bus_gk20a_h_ |
51 | #define _hw_bus_gk20a_h_ | 51 | #define _hw_bus_gk20a_h_ |
52 | 52 | ||
53 | static inline u32 bus_bar0_window_r(void) | ||
54 | { | ||
55 | return 0x00001700; | ||
56 | } | ||
57 | static inline u32 bus_bar0_window_base_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xffffff) << 0; | ||
60 | } | ||
61 | static inline u32 bus_bar0_window_target_vid_mem_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void) | ||
66 | { | ||
67 | return 0x2000000; | ||
68 | } | ||
69 | static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void) | ||
70 | { | ||
71 | return 0x3000000; | ||
72 | } | ||
73 | static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void) | ||
74 | { | ||
75 | return 0x00000010; | ||
76 | } | ||
53 | static inline u32 bus_bar1_block_r(void) | 77 | static inline u32 bus_bar1_block_r(void) |
54 | { | 78 | { |
55 | return 0x00001704; | 79 | return 0x00001704; |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h new file mode 100644 index 00000000..918dad9a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pram_gk20a_h_ | ||
51 | #define _hw_pram_gk20a_h_ | ||
52 | |||
53 | static inline u32 pram_data032_r(u32 i) | ||
54 | { | ||
55 | return 0x00700000 + i*4; | ||
56 | } | ||
57 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index eb4f01e0..ec946fb6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "hw_fb_gk20a.h" | 37 | #include "hw_fb_gk20a.h" |
38 | #include "hw_bus_gk20a.h" | 38 | #include "hw_bus_gk20a.h" |
39 | #include "hw_ram_gk20a.h" | 39 | #include "hw_ram_gk20a.h" |
40 | #include "hw_pram_gk20a.h" | ||
40 | #include "hw_mc_gk20a.h" | 41 | #include "hw_mc_gk20a.h" |
41 | #include "hw_flush_gk20a.h" | 42 | #include "hw_flush_gk20a.h" |
42 | #include "hw_ltc_gk20a.h" | 43 | #include "hw_ltc_gk20a.h" |
@@ -44,10 +45,20 @@ | |||
44 | #include "kind_gk20a.h" | 45 | #include "kind_gk20a.h" |
45 | #include "semaphore_gk20a.h" | 46 | #include "semaphore_gk20a.h" |
46 | 47 | ||
48 | /* | ||
49 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
50 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
51 | * in debugfs, called "force_pramin". | ||
52 | */ | ||
53 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
54 | |||
47 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 55 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
48 | { | 56 | { |
49 | void *cpu_va; | 57 | void *cpu_va; |
50 | 58 | ||
59 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
60 | return 0; | ||
61 | |||
51 | if (WARN_ON(mem->cpu_va)) { | 62 | if (WARN_ON(mem->cpu_va)) { |
52 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | 63 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); |
53 | return -EBUSY; | 64 | return -EBUSY; |
@@ -66,20 +77,66 @@ int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | |||
66 | 77 | ||
67 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | 78 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) |
68 | { | 79 | { |
80 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
81 | return; | ||
82 | |||
69 | vunmap(mem->cpu_va); | 83 | vunmap(mem->cpu_va); |
70 | mem->cpu_va = NULL; | 84 | mem->cpu_va = NULL; |
71 | } | 85 | } |
72 | 86 | ||
87 | /* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ | ||
88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
89 | { | ||
90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
91 | u64 addr = bufbase + w * sizeof(u32); | ||
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
94 | u32 lo = (addr & 0xfffff); | ||
95 | |||
96 | gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); | ||
97 | |||
98 | WARN_ON(!bufbase); | ||
99 | spin_lock(&g->mm.pramin_base_lock); | ||
100 | if (g->mm.pramin_base != hi) { | ||
101 | gk20a_writel(g, bus_bar0_window_r(), | ||
102 | (g->mm.vidmem_is_vidmem | ||
103 | && mem->aperture == APERTURE_SYSMEM ? | ||
104 | bus_bar0_window_target_sys_mem_noncoherent_f() : | ||
105 | bus_bar0_window_target_vid_mem_f()) | | ||
106 | bus_bar0_window_base_f(hi)); | ||
107 | gk20a_readl(g, bus_bar0_window_r()); | ||
108 | g->mm.pramin_base = hi; | ||
109 | } | ||
110 | |||
111 | return lo; | ||
112 | } | ||
113 | |||
114 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) | ||
115 | { | ||
116 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); | ||
117 | spin_unlock(&g->mm.pramin_base_lock); | ||
118 | } | ||
119 | |||
73 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 120 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
74 | { | 121 | { |
75 | u32 *ptr = mem->cpu_va; | 122 | u32 data = 0; |
76 | u32 data; | 123 | |
124 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
125 | u32 *ptr = mem->cpu_va; | ||
77 | 126 | ||
78 | WARN_ON(!ptr); | 127 | WARN_ON(!ptr); |
79 | data = ptr[w]; | 128 | data = ptr[w]; |
80 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 129 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
81 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 130 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
82 | #endif | 131 | #endif |
132 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
133 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
134 | data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
135 | gk20a_pramin_exit(g, mem); | ||
136 | } else { | ||
137 | WARN_ON("Accessing unallocated mem_desc"); | ||
138 | } | ||
139 | |||
83 | return data; | 140 | return data; |
84 | } | 141 | } |
85 | 142 | ||
@@ -106,13 +163,23 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | |||
106 | 163 | ||
107 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | 164 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) |
108 | { | 165 | { |
109 | u32 *ptr = mem->cpu_va; | 166 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { |
167 | u32 *ptr = mem->cpu_va; | ||
110 | 168 | ||
111 | WARN_ON(!ptr); | 169 | WARN_ON(!ptr); |
112 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 170 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
113 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 171 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
114 | #endif | 172 | #endif |
115 | ptr[w] = data; | 173 | ptr[w] = data; |
174 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
175 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
176 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); | ||
177 | /* read back to synchronize accesses*/ | ||
178 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
179 | gk20a_pramin_exit(g, mem); | ||
180 | } else { | ||
181 | WARN_ON("Accessing unallocated mem_desc"); | ||
182 | } | ||
116 | } | 183 | } |
117 | 184 | ||
118 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | 185 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) |
@@ -535,6 +602,13 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
535 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); | 602 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); |
536 | } | 603 | } |
537 | 604 | ||
605 | static void gk20a_init_pramin(struct mm_gk20a *mm) | ||
606 | { | ||
607 | mm->pramin_base = 0; | ||
608 | spin_lock_init(&mm->pramin_base_lock); | ||
609 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
610 | } | ||
611 | |||
538 | int gk20a_init_mm_setup_sw(struct gk20a *g) | 612 | int gk20a_init_mm_setup_sw(struct gk20a *g) |
539 | { | 613 | { |
540 | struct mm_gk20a *mm = &g->mm; | 614 | struct mm_gk20a *mm = &g->mm; |
@@ -558,6 +632,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
558 | (int)(mm->channel.user_size >> 20), | 632 | (int)(mm->channel.user_size >> 20), |
559 | (int)(mm->channel.kernel_size >> 20)); | 633 | (int)(mm->channel.kernel_size >> 20)); |
560 | 634 | ||
635 | gk20a_init_pramin(mm); | ||
636 | |||
561 | err = gk20a_alloc_sysmem_flush(g); | 637 | err = gk20a_alloc_sysmem_flush(g); |
562 | if (err) | 638 | if (err) |
563 | return err; | 639 | return err; |
@@ -586,6 +662,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
586 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 662 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
587 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 663 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
588 | mm->remove_support = gk20a_remove_mm_support; | 664 | mm->remove_support = gk20a_remove_mm_support; |
665 | |||
589 | mm->sw_ready = true; | 666 | mm->sw_ready = true; |
590 | 667 | ||
591 | gk20a_dbg_fn("done"); | 668 | gk20a_dbg_fn("done"); |
@@ -690,6 +767,7 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | |||
690 | entry->mem.cpu_va = page_address(pages); | 767 | entry->mem.cpu_va = page_address(pages); |
691 | memset(entry->mem.cpu_va, 0, len); | 768 | memset(entry->mem.cpu_va, 0, len); |
692 | entry->mem.size = len; | 769 | entry->mem.size = len; |
770 | entry->mem.aperture = APERTURE_SYSMEM; | ||
693 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); | 771 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); |
694 | 772 | ||
695 | return 0; | 773 | return 0; |
@@ -716,6 +794,7 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm, | |||
716 | kfree(entry->mem.sgt); | 794 | kfree(entry->mem.sgt); |
717 | entry->mem.sgt = NULL; | 795 | entry->mem.sgt = NULL; |
718 | entry->mem.size = 0; | 796 | entry->mem.size = 0; |
797 | entry->mem.aperture = APERTURE_INVALID; | ||
719 | } | 798 | } |
720 | 799 | ||
721 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | 800 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) |
@@ -2164,6 +2243,7 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, stru | |||
2164 | goto fail_free; | 2243 | goto fail_free; |
2165 | 2244 | ||
2166 | mem->size = size; | 2245 | mem->size = size; |
2246 | mem->aperture = APERTURE_SYSMEM; | ||
2167 | 2247 | ||
2168 | gk20a_dbg_fn("done"); | 2248 | gk20a_dbg_fn("done"); |
2169 | 2249 | ||
@@ -2210,6 +2290,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | |||
2210 | gk20a_free_sgtable(&mem->sgt); | 2290 | gk20a_free_sgtable(&mem->sgt); |
2211 | 2291 | ||
2212 | mem->size = 0; | 2292 | mem->size = 0; |
2293 | mem->aperture = APERTURE_INVALID; | ||
2213 | } | 2294 | } |
2214 | 2295 | ||
2215 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | 2296 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) |
@@ -4015,6 +4096,9 @@ void gk20a_mm_debugfs_init(struct device *dev) | |||
4015 | 4096 | ||
4016 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, | 4097 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, |
4017 | &g->separate_fixed_allocs); | 4098 | &g->separate_fixed_allocs); |
4099 | |||
4100 | debugfs_create_bool("force_pramin", 0664, gpu_root, | ||
4101 | &g->mm.force_pramin); | ||
4018 | } | 4102 | } |
4019 | 4103 | ||
4020 | void gk20a_init_mm(struct gpu_ops *gops) | 4104 | void gk20a_init_mm(struct gpu_ops *gops) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d943b231..c58a4fec 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -40,10 +40,17 @@ | |||
40 | outer_flush_range(pa, pa + (size_t)(size)); \ | 40 | outer_flush_range(pa, pa + (size_t)(size)); \ |
41 | } while (0) | 41 | } while (0) |
42 | 42 | ||
43 | enum gk20a_aperture { | ||
44 | APERTURE_INVALID, /* e.g., unallocated */ | ||
45 | APERTURE_SYSMEM, | ||
46 | APERTURE_VIDMEM | ||
47 | }; | ||
48 | |||
43 | struct mem_desc { | 49 | struct mem_desc { |
44 | void *cpu_va; | 50 | void *cpu_va; |
45 | struct page **pages; | 51 | struct page **pages; |
46 | struct sg_table *sgt; | 52 | struct sg_table *sgt; |
53 | enum gk20a_aperture aperture; | ||
47 | size_t size; | 54 | size_t size; |
48 | u64 gpu_va; | 55 | u64 gpu_va; |
49 | }; | 56 | }; |
@@ -357,6 +364,14 @@ struct mm_gk20a { | |||
357 | bool vidmem_is_vidmem; | 364 | bool vidmem_is_vidmem; |
358 | 365 | ||
359 | struct mem_desc sysmem_flush; | 366 | struct mem_desc sysmem_flush; |
367 | |||
368 | u32 pramin_base; | ||
369 | spinlock_t pramin_base_lock; | ||
370 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) | ||
371 | u32 force_pramin; /* via debugfs */ | ||
372 | #else | ||
373 | bool force_pramin; /* via debugfs */ | ||
374 | #endif | ||
360 | }; | 375 | }; |
361 | 376 | ||
362 | int gk20a_mm_init(struct mm_gk20a *mm); | 377 | int gk20a_mm_init(struct mm_gk20a *mm); |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 8bf382fd..08ef7738 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -2443,8 +2443,7 @@ static int gk20a_prepare_ucode(struct gk20a *g) | |||
2443 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + | 2443 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + |
2444 | pmu->desc->descriptor_size); | 2444 | pmu->desc->descriptor_size); |
2445 | 2445 | ||
2446 | err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY, | 2446 | err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); |
2447 | GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); | ||
2448 | if (err) | 2447 | if (err) |
2449 | goto err_release_fw; | 2448 | goto err_release_fw; |
2450 | 2449 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h index e69275e0..0b4eefe0 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h | |||
@@ -50,6 +50,30 @@ | |||
50 | #ifndef _hw_bus_gm20b_h_ | 50 | #ifndef _hw_bus_gm20b_h_ |
51 | #define _hw_bus_gm20b_h_ | 51 | #define _hw_bus_gm20b_h_ |
52 | 52 | ||
53 | static inline u32 bus_bar0_window_r(void) | ||
54 | { | ||
55 | return 0x00001700; | ||
56 | } | ||
57 | static inline u32 bus_bar0_window_base_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xffffff) << 0; | ||
60 | } | ||
61 | static inline u32 bus_bar0_window_target_vid_mem_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void) | ||
66 | { | ||
67 | return 0x2000000; | ||
68 | } | ||
69 | static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void) | ||
70 | { | ||
71 | return 0x3000000; | ||
72 | } | ||
73 | static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void) | ||
74 | { | ||
75 | return 0x00000010; | ||
76 | } | ||
53 | static inline u32 bus_bar1_block_r(void) | 77 | static inline u32 bus_bar1_block_r(void) |
54 | { | 78 | { |
55 | return 0x00001704; | 79 | return 0x00001704; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h new file mode 100644 index 00000000..f9c6f3d4 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pram_gm20b_h_ | ||
51 | #define _hw_pram_gm20b_h_ | ||
52 | |||
53 | static inline u32 pram_data032_r(u32 i) | ||
54 | { | ||
55 | return 0x00700000 + i*4; | ||
56 | } | ||
57 | #endif | ||