diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.nvgpu | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pramin.c | 129 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 178 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | 71 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pramin_gk20a.h | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/pramin.h | 46 |
11 files changed, 301 insertions, 168 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 9cf5195b..143224c8 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -30,11 +30,13 @@ nvgpu-y := \ | |||
30 | common/mm/buddy_allocator.o \ | 30 | common/mm/buddy_allocator.o \ |
31 | common/mm/page_allocator.o \ | 31 | common/mm/page_allocator.o \ |
32 | common/mm/lockless_allocator.o \ | 32 | common/mm/lockless_allocator.o \ |
33 | common/pramin.o \ | ||
33 | common/nvgpu_common.o \ | 34 | common/nvgpu_common.o \ |
34 | common/semaphore.o \ | 35 | common/semaphore.o \ |
35 | common/vbios/bios.o \ | 36 | common/vbios/bios.o \ |
36 | gk20a/gk20a.o \ | 37 | gk20a/gk20a.o \ |
37 | gk20a/bus_gk20a.o \ | 38 | gk20a/bus_gk20a.o \ |
39 | gk20a/pramin_gk20a.o \ | ||
38 | gk20a/sched_gk20a.o \ | 40 | gk20a/sched_gk20a.o \ |
39 | gk20a/as_gk20a.o \ | 41 | gk20a/as_gk20a.o \ |
40 | gk20a/ctrl_gk20a.o \ | 42 | gk20a/ctrl_gk20a.o \ |
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c new file mode 100644 index 00000000..b9216309 --- /dev/null +++ b/drivers/gpu/nvgpu/common/pramin.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/pramin.h> | ||
18 | #include <nvgpu/page_allocator.h> | ||
19 | |||
20 | #include "gk20a/gk20a.h" | ||
21 | |||
22 | /* | ||
23 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
24 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
25 | * in debugfs, called "force_pramin". | ||
26 | */ | ||
27 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
28 | |||
29 | void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
30 | { | ||
31 | u32 r = start, *dest_u32 = *arg; | ||
32 | |||
33 | if (!g->regs) { | ||
34 | __gk20a_warn_on_no_regs(); | ||
35 | return; | ||
36 | } | ||
37 | |||
38 | while (words--) { | ||
39 | *dest_u32++ = gk20a_readl(g, r); | ||
40 | r += sizeof(u32); | ||
41 | } | ||
42 | |||
43 | *arg = dest_u32; | ||
44 | } | ||
45 | |||
46 | void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
47 | { | ||
48 | u32 r = start, *src_u32 = *arg; | ||
49 | |||
50 | if (!g->regs) { | ||
51 | __gk20a_warn_on_no_regs(); | ||
52 | return; | ||
53 | } | ||
54 | |||
55 | while (words--) { | ||
56 | writel_relaxed(*src_u32++, g->regs + r); | ||
57 | r += sizeof(u32); | ||
58 | } | ||
59 | |||
60 | *arg = src_u32; | ||
61 | } | ||
62 | |||
63 | void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
64 | { | ||
65 | u32 r = start, repeat = **arg; | ||
66 | |||
67 | if (!g->regs) { | ||
68 | __gk20a_warn_on_no_regs(); | ||
69 | return; | ||
70 | } | ||
71 | |||
72 | while (words--) { | ||
73 | writel_relaxed(repeat, g->regs + r); | ||
74 | r += sizeof(u32); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that. | ||
80 | * This same loop is used for read/write/memset. Offset and size in bytes. | ||
81 | * One call to "loop" is done per range, with "arg" supplied. | ||
82 | */ | ||
83 | void nvgpu_pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | ||
84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | ||
85 | { | ||
86 | struct nvgpu_page_alloc *alloc = NULL; | ||
87 | struct page_alloc_chunk *chunk = NULL; | ||
88 | u32 byteoff, start_reg, until_end, n; | ||
89 | |||
90 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); | ||
91 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
92 | if (offset >= chunk->length) | ||
93 | offset -= chunk->length; | ||
94 | else | ||
95 | break; | ||
96 | } | ||
97 | |||
98 | offset /= sizeof(u32); | ||
99 | |||
100 | while (size) { | ||
101 | byteoff = g->ops.pramin.enter(g, mem, chunk, offset); | ||
102 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); | ||
103 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | ||
104 | |||
105 | n = min3(size, until_end, (u32)(chunk->length - offset)); | ||
106 | |||
107 | loop(g, start_reg, n / sizeof(u32), arg); | ||
108 | |||
109 | /* read back to synchronize accesses */ | ||
110 | gk20a_readl(g, start_reg); | ||
111 | g->ops.pramin.exit(g, mem, chunk); | ||
112 | |||
113 | size -= n; | ||
114 | |||
115 | if (n == (chunk->length - offset)) { | ||
116 | chunk = list_next_entry(chunk, list_entry); | ||
117 | offset = 0; | ||
118 | } else { | ||
119 | offset += n / sizeof(u32); | ||
120 | } | ||
121 | } | ||
122 | } | ||
123 | |||
124 | void nvgpu_init_pramin(struct mm_gk20a *mm) | ||
125 | { | ||
126 | mm->pramin_window = 0; | ||
127 | nvgpu_spinlock_init(&mm->pramin_window_lock); | ||
128 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
129 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 88acc3ec..451e32ca 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -40,6 +40,7 @@ struct dbg_profiler_object_data; | |||
40 | 40 | ||
41 | #include "../../../arch/arm/mach-tegra/iomap.h" | 41 | #include "../../../arch/arm/mach-tegra/iomap.h" |
42 | 42 | ||
43 | #include <nvgpu/pramin.h> | ||
43 | #include <nvgpu/acr/nvgpu_acr.h> | 44 | #include <nvgpu/acr/nvgpu_acr.h> |
44 | 45 | ||
45 | #include "as_gk20a.h" | 46 | #include "as_gk20a.h" |
@@ -70,6 +71,8 @@ struct dbg_profiler_object_data; | |||
70 | x = val | 71 | x = val |
71 | #endif | 72 | #endif |
72 | 73 | ||
74 | struct page_alloc_chunk; | ||
75 | |||
73 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. | 76 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. |
74 | 32 ns is the resolution of ptimer. */ | 77 | 32 ns is the resolution of ptimer. */ |
75 | #define PTIMER_REF_FREQ_HZ 31250000 | 78 | #define PTIMER_REF_FREQ_HZ 31250000 |
@@ -648,6 +651,13 @@ struct gpu_ops { | |||
648 | bool (*mmu_fault_pending)(struct gk20a *g); | 651 | bool (*mmu_fault_pending)(struct gk20a *g); |
649 | } mm; | 652 | } mm; |
650 | struct { | 653 | struct { |
654 | u32 (*enter)(struct gk20a *g, struct mem_desc *mem, | ||
655 | struct page_alloc_chunk *chunk, u32 w); | ||
656 | void (*exit)(struct gk20a *g, struct mem_desc *mem, | ||
657 | struct page_alloc_chunk *chunk); | ||
658 | u32 (*data032_r)(u32 i); | ||
659 | } pramin; | ||
660 | struct { | ||
651 | int (*init_therm_setup_hw)(struct gk20a *g); | 661 | int (*init_therm_setup_hw)(struct gk20a *g); |
652 | int (*elcg_init_idle_filters)(struct gk20a *g); | 662 | int (*elcg_init_idle_filters)(struct gk20a *g); |
653 | void (*therm_debugfs_init)(struct gk20a *g); | 663 | void (*therm_debugfs_init)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index dee8a19f..7a13ed9c 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include "tsg_gk20a.h" | 33 | #include "tsg_gk20a.h" |
34 | #include "dbg_gpu_gk20a.h" | 34 | #include "dbg_gpu_gk20a.h" |
35 | #include "css_gr_gk20a.h" | 35 | #include "css_gr_gk20a.h" |
36 | #include "pramin_gk20a.h" | ||
36 | 37 | ||
37 | #include <nvgpu/hw/gk20a/hw_proj_gk20a.h> | 38 | #include <nvgpu/hw/gk20a/hw_proj_gk20a.h> |
38 | 39 | ||
@@ -165,6 +166,7 @@ int gk20a_init_hal(struct gk20a *g) | |||
165 | gk20a_init_dbg_session_ops(gops); | 166 | gk20a_init_dbg_session_ops(gops); |
166 | gk20a_init_therm_ops(gops); | 167 | gk20a_init_therm_ops(gops); |
167 | gk20a_init_tsg_ops(gops); | 168 | gk20a_init_tsg_ops(gops); |
169 | gk20a_init_pramin_ops(gops); | ||
168 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 170 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
169 | gk20a_init_css_ops(gops); | 171 | gk20a_init_css_ops(gops); |
170 | #endif | 172 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 05535412..94d31273 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -32,6 +32,7 @@ | |||
32 | 32 | ||
33 | #include <nvgpu/kmem.h> | 33 | #include <nvgpu/kmem.h> |
34 | #include <nvgpu/timers.h> | 34 | #include <nvgpu/timers.h> |
35 | #include <nvgpu/pramin.h> | ||
35 | #include <nvgpu/allocator.h> | 36 | #include <nvgpu/allocator.h> |
36 | #include <nvgpu/semaphore.h> | 37 | #include <nvgpu/semaphore.h> |
37 | #include <nvgpu/page_allocator.h> | 38 | #include <nvgpu/page_allocator.h> |
@@ -50,13 +51,6 @@ | |||
50 | #include <nvgpu/hw/gk20a/hw_flush_gk20a.h> | 51 | #include <nvgpu/hw/gk20a/hw_flush_gk20a.h> |
51 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> | 52 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> |
52 | 53 | ||
53 | /* | ||
54 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
55 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
56 | * in debugfs, called "force_pramin". | ||
57 | */ | ||
58 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
59 | |||
60 | #if defined(CONFIG_GK20A_VIDMEM) | 54 | #if defined(CONFIG_GK20A_VIDMEM) |
61 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); | 55 | static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); |
62 | #endif | 56 | #endif |
@@ -74,7 +68,7 @@ is_vidmem_page_alloc(u64 addr) | |||
74 | return !!(addr & 1ULL); | 68 | return !!(addr & 1ULL); |
75 | } | 69 | } |
76 | 70 | ||
77 | static inline struct nvgpu_page_alloc * | 71 | struct nvgpu_page_alloc * |
78 | get_vidmem_page_alloc(struct scatterlist *sgl) | 72 | get_vidmem_page_alloc(struct scatterlist *sgl) |
79 | { | 73 | { |
80 | u64 addr; | 74 | u64 addr; |
@@ -121,151 +115,6 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | |||
121 | mem->cpu_va = NULL; | 115 | mem->cpu_va = NULL; |
122 | } | 116 | } |
123 | 117 | ||
124 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | ||
125 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, | ||
126 | struct page_alloc_chunk *chunk, u32 w) | ||
127 | { | ||
128 | u64 bufbase = chunk->base; | ||
129 | u64 addr = bufbase + w * sizeof(u32); | ||
130 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
131 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
132 | u32 lo = (u32)(addr & 0xfffff); | ||
133 | u32 win = gk20a_aperture_mask(g, mem, | ||
134 | bus_bar0_window_target_sys_mem_noncoherent_f(), | ||
135 | bus_bar0_window_target_vid_mem_f()) | | ||
136 | bus_bar0_window_base_f(hi); | ||
137 | |||
138 | gk20a_dbg(gpu_dbg_mem, | ||
139 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", | ||
140 | hi, lo, mem, chunk, bufbase, | ||
141 | bufbase + chunk->length, chunk->length); | ||
142 | |||
143 | WARN_ON(!bufbase); | ||
144 | |||
145 | nvgpu_spinlock_acquire(&g->mm.pramin_window_lock); | ||
146 | |||
147 | if (g->mm.pramin_window != win) { | ||
148 | gk20a_writel(g, bus_bar0_window_r(), win); | ||
149 | gk20a_readl(g, bus_bar0_window_r()); | ||
150 | g->mm.pramin_window = win; | ||
151 | } | ||
152 | |||
153 | return lo; | ||
154 | } | ||
155 | |||
156 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem, | ||
157 | struct page_alloc_chunk *chunk) | ||
158 | { | ||
159 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); | ||
160 | |||
161 | nvgpu_spinlock_release(&g->mm.pramin_window_lock); | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * Batch innerloop for the function below once per each PRAMIN range (some | ||
166 | * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. | ||
167 | */ | ||
168 | typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | ||
169 | u32 **arg); | ||
170 | |||
171 | /* | ||
172 | * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that. | ||
173 | * This same loop is used for read/write/memset. Offset and size in bytes. | ||
174 | * One call to "loop" is done per range, with "arg" supplied. | ||
175 | */ | ||
176 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | ||
177 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | ||
178 | { | ||
179 | struct nvgpu_page_alloc *alloc = NULL; | ||
180 | struct page_alloc_chunk *chunk = NULL; | ||
181 | u32 byteoff, start_reg, until_end, n; | ||
182 | |||
183 | alloc = get_vidmem_page_alloc(mem->sgt->sgl); | ||
184 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
185 | if (offset >= chunk->length) | ||
186 | offset -= chunk->length; | ||
187 | else | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | offset /= sizeof(u32); | ||
192 | |||
193 | while (size) { | ||
194 | byteoff = gk20a_pramin_enter(g, mem, chunk, offset); | ||
195 | start_reg = pram_data032_r(byteoff / sizeof(u32)); | ||
196 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | ||
197 | |||
198 | n = min3(size, until_end, (u32)(chunk->length - offset)); | ||
199 | |||
200 | loop(g, start_reg, n / sizeof(u32), arg); | ||
201 | |||
202 | /* read back to synchronize accesses */ | ||
203 | gk20a_readl(g, start_reg); | ||
204 | gk20a_pramin_exit(g, mem, chunk); | ||
205 | |||
206 | size -= n; | ||
207 | |||
208 | if (n == (chunk->length - offset)) { | ||
209 | chunk = list_next_entry(chunk, list_entry); | ||
210 | offset = 0; | ||
211 | } else { | ||
212 | offset += n / sizeof(u32); | ||
213 | } | ||
214 | } | ||
215 | } | ||
216 | |||
217 | static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start, | ||
218 | u32 words, u32 **arg) | ||
219 | { | ||
220 | u32 r = start, *dest_u32 = *arg; | ||
221 | |||
222 | if (!g->regs) { | ||
223 | __gk20a_warn_on_no_regs(); | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | while (words--) { | ||
228 | *dest_u32++ = gk20a_readl(g, r); | ||
229 | r += sizeof(u32); | ||
230 | } | ||
231 | |||
232 | *arg = dest_u32; | ||
233 | } | ||
234 | |||
235 | static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start, | ||
236 | u32 words, u32 **arg) | ||
237 | { | ||
238 | u32 r = start, *src_u32 = *arg; | ||
239 | |||
240 | if (!g->regs) { | ||
241 | __gk20a_warn_on_no_regs(); | ||
242 | return; | ||
243 | } | ||
244 | |||
245 | while (words--) { | ||
246 | writel_relaxed(*src_u32++, g->regs + r); | ||
247 | r += sizeof(u32); | ||
248 | } | ||
249 | |||
250 | *arg = src_u32; | ||
251 | } | ||
252 | |||
253 | static inline void pramin_access_batch_set(struct gk20a *g, u32 start, | ||
254 | u32 words, u32 **arg) | ||
255 | { | ||
256 | u32 r = start, repeat = **arg; | ||
257 | |||
258 | if (!g->regs) { | ||
259 | __gk20a_warn_on_no_regs(); | ||
260 | return; | ||
261 | } | ||
262 | |||
263 | while (words--) { | ||
264 | writel_relaxed(repeat, g->regs + r); | ||
265 | r += sizeof(u32); | ||
266 | } | ||
267 | } | ||
268 | |||
269 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 118 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
270 | { | 119 | { |
271 | u32 data = 0; | 120 | u32 data = 0; |
@@ -282,8 +131,8 @@ u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
282 | u32 value; | 131 | u32 value; |
283 | u32 *p = &value; | 132 | u32 *p = &value; |
284 | 133 | ||
285 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), | 134 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), |
286 | pramin_access_batch_rd_n, &p); | 135 | sizeof(u32), pramin_access_batch_rd_n, &p); |
287 | 136 | ||
288 | data = value; | 137 | data = value; |
289 | 138 | ||
@@ -319,7 +168,7 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | |||
319 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 168 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
320 | u32 *dest_u32 = dest; | 169 | u32 *dest_u32 = dest; |
321 | 170 | ||
322 | pramin_access_batched(g, mem, offset, size, | 171 | nvgpu_pramin_access_batched(g, mem, offset, size, |
323 | pramin_access_batch_rd_n, &dest_u32); | 172 | pramin_access_batch_rd_n, &dest_u32); |
324 | } else { | 173 | } else { |
325 | WARN_ON("Accessing unallocated mem_desc"); | 174 | WARN_ON("Accessing unallocated mem_desc"); |
@@ -340,8 +189,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | |||
340 | u32 value = data; | 189 | u32 value = data; |
341 | u32 *p = &value; | 190 | u32 *p = &value; |
342 | 191 | ||
343 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), | 192 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), |
344 | pramin_access_batch_wr_n, &p); | 193 | sizeof(u32), pramin_access_batch_wr_n, &p); |
345 | if (!mem->skip_wmb) | 194 | if (!mem->skip_wmb) |
346 | wmb(); | 195 | wmb(); |
347 | } else { | 196 | } else { |
@@ -374,7 +223,7 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | |||
374 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 223 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
375 | u32 *src_u32 = src; | 224 | u32 *src_u32 = src; |
376 | 225 | ||
377 | pramin_access_batched(g, mem, offset, size, | 226 | nvgpu_pramin_access_batched(g, mem, offset, size, |
378 | pramin_access_batch_wr_n, &src_u32); | 227 | pramin_access_batch_wr_n, &src_u32); |
379 | if (!mem->skip_wmb) | 228 | if (!mem->skip_wmb) |
380 | wmb(); | 229 | wmb(); |
@@ -406,7 +255,7 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | |||
406 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | 255 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); |
407 | u32 *p = &repeat_value; | 256 | u32 *p = &repeat_value; |
408 | 257 | ||
409 | pramin_access_batched(g, mem, offset, size, | 258 | nvgpu_pramin_access_batched(g, mem, offset, size, |
410 | pramin_access_batch_set, &p); | 259 | pramin_access_batch_set, &p); |
411 | if (!mem->skip_wmb) | 260 | if (!mem->skip_wmb) |
412 | wmb(); | 261 | wmb(); |
@@ -844,13 +693,6 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
844 | return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); | 693 | return gk20a_gmmu_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); |
845 | } | 694 | } |
846 | 695 | ||
847 | static void gk20a_init_pramin(struct mm_gk20a *mm) | ||
848 | { | ||
849 | mm->pramin_window = 0; | ||
850 | nvgpu_spinlock_init(&mm->pramin_window_lock); | ||
851 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
852 | } | ||
853 | |||
854 | #if defined(CONFIG_GK20A_VIDMEM) | 696 | #if defined(CONFIG_GK20A_VIDMEM) |
855 | static int gk20a_vidmem_clear_all(struct gk20a *g) | 697 | static int gk20a_vidmem_clear_all(struct gk20a *g) |
856 | { | 698 | { |
@@ -1013,7 +855,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
1013 | (int)(mm->channel.user_size >> 20), | 855 | (int)(mm->channel.user_size >> 20), |
1014 | (int)(mm->channel.kernel_size >> 20)); | 856 | (int)(mm->channel.kernel_size >> 20)); |
1015 | 857 | ||
1016 | gk20a_init_pramin(mm); | 858 | nvgpu_init_pramin(mm); |
1017 | 859 | ||
1018 | mm->vidmem.ce_ctx_id = (u32)~0; | 860 | mm->vidmem.ce_ctx_id = (u32)~0; |
1019 | 861 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index be5ba477..2c8eb16d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -460,6 +460,9 @@ enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | |||
460 | u64 base, u64 size); | 460 | u64 base, u64 size); |
461 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); | 461 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); |
462 | 462 | ||
463 | struct nvgpu_page_alloc * | ||
464 | get_vidmem_page_alloc(struct scatterlist *sgl); | ||
465 | |||
463 | /* | 466 | /* |
464 | * Buffer accessors - wrap between begin() and end() if there is no permanent | 467 | * Buffer accessors - wrap between begin() and end() if there is no permanent |
465 | * kernel mapping for this buffer. | 468 | * kernel mapping for this buffer. |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c new file mode 100644 index 00000000..bed2e9b5 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/page_allocator.h> | ||
18 | |||
19 | #include "gk20a/gk20a.h" | ||
20 | #include "gk20a/mm_gk20a.h" | ||
21 | #include "gk20a/pramin_gk20a.h" | ||
22 | |||
23 | #include <nvgpu/hw/gk20a/hw_bus_gk20a.h> | ||
24 | #include <nvgpu/hw/gk20a/hw_pram_gk20a.h> | ||
25 | |||
26 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | ||
27 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, | ||
28 | struct page_alloc_chunk *chunk, u32 w) | ||
29 | { | ||
30 | u64 bufbase = chunk->base; | ||
31 | u64 addr = bufbase + w * sizeof(u32); | ||
32 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
33 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
34 | u32 lo = (u32)(addr & 0xfffff); | ||
35 | u32 win = gk20a_aperture_mask(g, mem, | ||
36 | bus_bar0_window_target_sys_mem_noncoherent_f(), | ||
37 | bus_bar0_window_target_vid_mem_f()) | | ||
38 | bus_bar0_window_base_f(hi); | ||
39 | |||
40 | gk20a_dbg(gpu_dbg_mem, | ||
41 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", | ||
42 | hi, lo, mem, chunk, bufbase, | ||
43 | bufbase + chunk->length, chunk->length); | ||
44 | |||
45 | WARN_ON(!bufbase); | ||
46 | |||
47 | nvgpu_spinlock_acquire(&g->mm.pramin_window_lock); | ||
48 | |||
49 | if (g->mm.pramin_window != win) { | ||
50 | gk20a_writel(g, bus_bar0_window_r(), win); | ||
51 | gk20a_readl(g, bus_bar0_window_r()); | ||
52 | g->mm.pramin_window = win; | ||
53 | } | ||
54 | |||
55 | return lo; | ||
56 | } | ||
57 | |||
58 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem, | ||
59 | struct page_alloc_chunk *chunk) | ||
60 | { | ||
61 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); | ||
62 | |||
63 | nvgpu_spinlock_release(&g->mm.pramin_window_lock); | ||
64 | } | ||
65 | |||
66 | void gk20a_init_pramin_ops(struct gpu_ops *gops) | ||
67 | { | ||
68 | gops->pramin.enter = gk20a_pramin_enter; | ||
69 | gops->pramin.exit = gk20a_pramin_exit; | ||
70 | gops->pramin.data032_r = pram_data032_r; | ||
71 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h new file mode 100644 index 00000000..93d1cc75 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __PRAMIN_GK20A_H__ | ||
18 | #define __PRAMIN_GK20A_H__ | ||
19 | |||
20 | struct gpu_ops; | ||
21 | |||
22 | void gk20a_init_pramin_ops(struct gpu_ops *ops); | ||
23 | |||
24 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index e049a9f9..80ec8525 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "gk20a/dbg_gpu_gk20a.h" | 22 | #include "gk20a/dbg_gpu_gk20a.h" |
23 | #include "gk20a/css_gr_gk20a.h" | 23 | #include "gk20a/css_gr_gk20a.h" |
24 | #include "gk20a/bus_gk20a.h" | 24 | #include "gk20a/bus_gk20a.h" |
25 | #include "gk20a/pramin_gk20a.h" | ||
25 | 26 | ||
26 | #include "gp10b/gr_gp10b.h" | 27 | #include "gp10b/gr_gp10b.h" |
27 | #include "gp10b/fecs_trace_gp10b.h" | 28 | #include "gp10b/fecs_trace_gp10b.h" |
@@ -242,6 +243,7 @@ int gp106_init_hal(struct gk20a *g) | |||
242 | gp106_init_regops(gops); | 243 | gp106_init_regops(gops); |
243 | gp10b_init_cde_ops(gops); | 244 | gp10b_init_cde_ops(gops); |
244 | gk20a_init_tsg_ops(gops); | 245 | gk20a_init_tsg_ops(gops); |
246 | gk20a_init_pramin_ops(gops); | ||
245 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 247 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
246 | gk20a_init_css_ops(gops); | 248 | gk20a_init_css_ops(gops); |
247 | #endif | 249 | #endif |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index e9272a55..fc1168f5 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "gk20a/dbg_gpu_gk20a.h" | 22 | #include "gk20a/dbg_gpu_gk20a.h" |
23 | #include "gk20a/css_gr_gk20a.h" | 23 | #include "gk20a/css_gr_gk20a.h" |
24 | #include "gk20a/bus_gk20a.h" | 24 | #include "gk20a/bus_gk20a.h" |
25 | #include "gk20a/pramin_gk20a.h" | ||
25 | 26 | ||
26 | #include "gp10b/gr_gp10b.h" | 27 | #include "gp10b/gr_gp10b.h" |
27 | #include "gp10b/fecs_trace_gp10b.h" | 28 | #include "gp10b/fecs_trace_gp10b.h" |
@@ -250,6 +251,7 @@ int gp10b_init_hal(struct gk20a *g) | |||
250 | gp10b_init_cde_ops(gops); | 251 | gp10b_init_cde_ops(gops); |
251 | gp10b_init_therm_ops(gops); | 252 | gp10b_init_therm_ops(gops); |
252 | gk20a_init_tsg_ops(gops); | 253 | gk20a_init_tsg_ops(gops); |
254 | gk20a_init_pramin_ops(gops); | ||
253 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 255 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
254 | gk20a_init_css_ops(gops); | 256 | gk20a_init_css_ops(gops); |
255 | #endif | 257 | #endif |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pramin.h b/drivers/gpu/nvgpu/include/nvgpu/pramin.h new file mode 100644 index 00000000..7e0df06b --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/pramin.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_PRAMIN_H__ | ||
18 | #define __NVGPU_PRAMIN_H__ | ||
19 | |||
20 | #include <linux/types.h> | ||
21 | |||
22 | struct gk20a; | ||
23 | struct mm_gk20a; | ||
24 | struct mem_desc; | ||
25 | |||
26 | /* | ||
27 | * This typedef is for functions that get called during the access_batched() | ||
28 | * operation. | ||
29 | */ | ||
30 | typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | ||
31 | u32 **arg); | ||
32 | |||
33 | /* | ||
34 | * Generally useful batch functions. | ||
35 | */ | ||
36 | void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg); | ||
37 | void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg); | ||
38 | void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg); | ||
39 | |||
40 | void nvgpu_pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | ||
41 | u32 offset, u32 size, | ||
42 | pramin_access_batch_fn loop, u32 **arg); | ||
43 | |||
44 | void nvgpu_init_pramin(struct mm_gk20a *mm); | ||
45 | |||
46 | #endif | ||