diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2018-04-18 15:59:00 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-15 20:47:31 -0400 |
commit | 2a2c16af5f9f1ccfc93a13e820d5381e5c881e92 (patch) | |
tree | 2e5d7b042270a649978e5bb540857012c85fb5b5 /drivers/gpu/nvgpu/os/linux | |
parent | 98d996f4ffb0137d119b5849cae46d7b7e5693e1 (diff) |
gpu: nvgpu: Move Linux files away from common
Move all Linux source code files to drivers/gpu/nvgpu/os/linux from
drivers/gpu/nvgpu/common/linux. This changes the meaning of common
to be OS independent.
JIRA NVGPU-598
JIRA NVGPU-601
Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1747714
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux')
117 files changed, 30032 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c new file mode 100644 index 00000000..32b333f1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde.c | |||
@@ -0,0 +1,1786 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <trace/events/gk20a.h> | ||
25 | |||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/gmmu.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/nvgpu_common.h> | ||
30 | #include <nvgpu/kmem.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/firmware.h> | ||
34 | #include <nvgpu/os_sched.h> | ||
35 | |||
36 | #include <nvgpu/linux/vm.h> | ||
37 | |||
38 | #include "gk20a/gk20a.h" | ||
39 | #include "gk20a/channel_gk20a.h" | ||
40 | #include "gk20a/mm_gk20a.h" | ||
41 | #include "gk20a/fence_gk20a.h" | ||
42 | #include "gk20a/gr_gk20a.h" | ||
43 | |||
44 | #include "cde.h" | ||
45 | #include "os_linux.h" | ||
46 | #include "dmabuf.h" | ||
47 | #include "channel.h" | ||
48 | #include "cde_gm20b.h" | ||
49 | #include "cde_gp10b.h" | ||
50 | |||
51 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
52 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
53 | |||
54 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
55 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
56 | |||
57 | #define CTX_DELETE_TIME 1000 | ||
58 | |||
59 | #define MAX_CTX_USE_COUNT 42 | ||
60 | #define MAX_CTX_RETRY_TIME 2000 | ||
61 | |||
62 | static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | ||
63 | { | ||
64 | struct nvgpu_mapped_buf *buffer; | ||
65 | dma_addr_t addr = 0; | ||
66 | struct gk20a *g = gk20a_from_vm(vm); | ||
67 | |||
68 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
69 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); | ||
70 | if (buffer) | ||
71 | addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); | ||
72 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
73 | |||
74 | return addr; | ||
75 | } | ||
76 | |||
77 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
78 | { | ||
79 | unsigned int i; | ||
80 | |||
81 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
82 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
83 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
84 | } | ||
85 | |||
86 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
87 | |||
88 | cde_ctx->convert_cmd = NULL; | ||
89 | cde_ctx->init_convert_cmd = NULL; | ||
90 | cde_ctx->num_bufs = 0; | ||
91 | cde_ctx->num_params = 0; | ||
92 | cde_ctx->init_cmd_num_entries = 0; | ||
93 | cde_ctx->convert_cmd_num_entries = 0; | ||
94 | cde_ctx->init_cmd_executed = false; | ||
95 | } | ||
96 | |||
97 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
98 | __must_hold(&cde_app->mutex) | ||
99 | { | ||
100 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
101 | struct gk20a *g = &l->g; | ||
102 | struct channel_gk20a *ch = cde_ctx->ch; | ||
103 | struct vm_gk20a *vm = ch->vm; | ||
104 | |||
105 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
106 | |||
107 | /* release mapped memory */ | ||
108 | gk20a_deinit_cde_img(cde_ctx); | ||
109 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
110 | cde_ctx->backing_store_vaddr); | ||
111 | |||
112 | /* | ||
113 | * free the channel | ||
114 | * gk20a_channel_close() will also unbind the channel from TSG | ||
115 | */ | ||
116 | gk20a_channel_close(ch); | ||
117 | nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); | ||
118 | |||
119 | /* housekeeping on app */ | ||
120 | nvgpu_list_del(&cde_ctx->list); | ||
121 | l->cde_app.ctx_count--; | ||
122 | nvgpu_kfree(g, cde_ctx); | ||
123 | } | ||
124 | |||
125 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
126 | bool wait_finish) | ||
127 | __releases(&cde_app->mutex) | ||
128 | __acquires(&cde_app->mutex) | ||
129 | { | ||
130 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
131 | |||
132 | /* permanent contexts do not have deleter works */ | ||
133 | if (!cde_ctx->is_temporary) | ||
134 | return; | ||
135 | |||
136 | if (wait_finish) { | ||
137 | nvgpu_mutex_release(&cde_app->mutex); | ||
138 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
139 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
140 | } else { | ||
141 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
146 | __must_hold(&l->cde_app->mutex) | ||
147 | { | ||
148 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
149 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
150 | |||
151 | /* safe to go off the mutex in cancel_deleter since app is | ||
152 | * deinitialised; no new jobs are started. deleter works may be only at | ||
153 | * waiting for the mutex or before, going to abort */ | ||
154 | |||
155 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
156 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
157 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
158 | gk20a_cde_remove_ctx(cde_ctx); | ||
159 | } | ||
160 | |||
161 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
162 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
163 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
164 | gk20a_cde_remove_ctx(cde_ctx); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
169 | __must_hold(&l->cde_app->mutex) | ||
170 | { | ||
171 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
172 | |||
173 | /* prevent further conversions and delayed works from working */ | ||
174 | cde_app->initialised = false; | ||
175 | /* free all data, empty the list */ | ||
176 | gk20a_cde_remove_contexts(l); | ||
177 | } | ||
178 | |||
179 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
180 | __acquires(&l->cde_app->mutex) | ||
181 | __releases(&l->cde_app->mutex) | ||
182 | { | ||
183 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
184 | |||
185 | if (!cde_app->initialised) | ||
186 | return; | ||
187 | |||
188 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
189 | gk20a_cde_stop(l); | ||
190 | nvgpu_mutex_release(&cde_app->mutex); | ||
191 | |||
192 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
193 | } | ||
194 | |||
195 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
196 | __acquires(&l->cde_app->mutex) | ||
197 | __releases(&l->cde_app->mutex) | ||
198 | { | ||
199 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
200 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
201 | |||
202 | if (!cde_app->initialised) | ||
203 | return; | ||
204 | |||
205 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
206 | |||
207 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
208 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
209 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
210 | } | ||
211 | |||
212 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
213 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
214 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
215 | } | ||
216 | |||
217 | nvgpu_mutex_release(&cde_app->mutex); | ||
218 | |||
219 | } | ||
220 | |||
221 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
222 | __must_hold(&l->cde_app->mutex) | ||
223 | { | ||
224 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
225 | struct gk20a_cde_ctx *cde_ctx; | ||
226 | |||
227 | cde_ctx = gk20a_cde_allocate_context(l); | ||
228 | if (IS_ERR(cde_ctx)) | ||
229 | return PTR_ERR(cde_ctx); | ||
230 | |||
231 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
232 | cde_app->ctx_count++; | ||
233 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
234 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
240 | __must_hold(&l->cde_app->mutex) | ||
241 | { | ||
242 | int err; | ||
243 | int i; | ||
244 | |||
245 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
246 | err = gk20a_cde_create_context(l); | ||
247 | if (err) | ||
248 | goto out; | ||
249 | } | ||
250 | |||
251 | return 0; | ||
252 | out: | ||
253 | gk20a_cde_remove_contexts(l); | ||
254 | return err; | ||
255 | } | ||
256 | |||
257 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
258 | struct nvgpu_firmware *img, | ||
259 | struct gk20a_cde_hdr_buf *buf) | ||
260 | { | ||
261 | struct nvgpu_mem *mem; | ||
262 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
263 | struct gk20a *g = &l->g; | ||
264 | int err; | ||
265 | |||
266 | /* check that the file can hold the buf */ | ||
267 | if (buf->data_byte_offset != 0 && | ||
268 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
269 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
270 | cde_ctx->num_bufs); | ||
271 | return -EINVAL; | ||
272 | } | ||
273 | |||
274 | /* check that we have enough buf elems available */ | ||
275 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
276 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
277 | cde_ctx->num_bufs); | ||
278 | return -ENOMEM; | ||
279 | } | ||
280 | |||
281 | /* allocate buf */ | ||
282 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
283 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
284 | if (err) { | ||
285 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
286 | cde_ctx->num_bufs); | ||
287 | return -ENOMEM; | ||
288 | } | ||
289 | |||
290 | /* copy the content */ | ||
291 | if (buf->data_byte_offset != 0) | ||
292 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
293 | buf->num_bytes); | ||
294 | |||
295 | cde_ctx->num_bufs++; | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
301 | int type, s32 shift, u64 mask, u64 value) | ||
302 | { | ||
303 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
304 | struct gk20a *g = &l->g; | ||
305 | u32 *target_mem_ptr = target; | ||
306 | u64 *target_mem_ptr_u64 = target; | ||
307 | u64 current_value, new_value; | ||
308 | |||
309 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
310 | value &= mask; | ||
311 | |||
312 | /* read current data from the location */ | ||
313 | current_value = 0; | ||
314 | if (type == TYPE_PARAM_TYPE_U32) { | ||
315 | if (mask != 0xfffffffful) | ||
316 | current_value = *target_mem_ptr; | ||
317 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
318 | if (mask != ~0ul) | ||
319 | current_value = *target_mem_ptr_u64; | ||
320 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
321 | current_value = *target_mem_ptr_u64; | ||
322 | current_value = (u64)(current_value >> 32) | | ||
323 | (u64)(current_value << 32); | ||
324 | } else { | ||
325 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
326 | type); | ||
327 | return -EINVAL; | ||
328 | } | ||
329 | |||
330 | current_value &= ~mask; | ||
331 | new_value = current_value | value; | ||
332 | |||
333 | /* store the element data back */ | ||
334 | if (type == TYPE_PARAM_TYPE_U32) | ||
335 | *target_mem_ptr = (u32)new_value; | ||
336 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
337 | *target_mem_ptr_u64 = new_value; | ||
338 | else { | ||
339 | new_value = (u64)(new_value >> 32) | | ||
340 | (u64)(new_value << 32); | ||
341 | *target_mem_ptr_u64 = new_value; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
348 | struct nvgpu_firmware *img, | ||
349 | struct gk20a_cde_hdr_replace *replace) | ||
350 | { | ||
351 | struct nvgpu_mem *source_mem; | ||
352 | struct nvgpu_mem *target_mem; | ||
353 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
354 | struct gk20a *g = &l->g; | ||
355 | u32 *target_mem_ptr; | ||
356 | u64 vaddr; | ||
357 | int err; | ||
358 | |||
359 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
360 | replace->source_buf >= cde_ctx->num_bufs) { | ||
361 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
362 | replace->target_buf, replace->source_buf, | ||
363 | cde_ctx->num_bufs); | ||
364 | return -EINVAL; | ||
365 | } | ||
366 | |||
367 | source_mem = cde_ctx->mem + replace->source_buf; | ||
368 | target_mem = cde_ctx->mem + replace->target_buf; | ||
369 | target_mem_ptr = target_mem->cpu_va; | ||
370 | |||
371 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
372 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
373 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
374 | replace->target_byte_offset, | ||
375 | replace->source_byte_offset, | ||
376 | source_mem->size, | ||
377 | target_mem->size); | ||
378 | return -EINVAL; | ||
379 | } | ||
380 | |||
381 | /* calculate the target pointer */ | ||
382 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
383 | |||
384 | /* determine patch value */ | ||
385 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
386 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
387 | replace->shift, replace->mask, | ||
388 | vaddr); | ||
389 | if (err) { | ||
390 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
391 | err, replace->target_buf, | ||
392 | replace->target_byte_offset, | ||
393 | replace->source_buf, | ||
394 | replace->source_byte_offset); | ||
395 | } | ||
396 | |||
397 | return err; | ||
398 | } | ||
399 | |||
400 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
401 | { | ||
402 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
403 | struct gk20a *g = &l->g; | ||
404 | struct nvgpu_mem *target_mem; | ||
405 | u32 *target_mem_ptr; | ||
406 | u64 new_data; | ||
407 | int user_id = 0, err; | ||
408 | unsigned int i; | ||
409 | |||
410 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
411 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
412 | target_mem = cde_ctx->mem + param->target_buf; | ||
413 | target_mem_ptr = target_mem->cpu_va; | ||
414 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
415 | |||
416 | switch (param->id) { | ||
417 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
418 | new_data = g->gr.comptags_per_cacheline; | ||
419 | break; | ||
420 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
421 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
422 | g->gr.cacheline_size; | ||
423 | break; | ||
424 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
425 | new_data = cde_ctx->surf_param_offset; | ||
426 | break; | ||
427 | case TYPE_PARAM_NUMPAGES: | ||
428 | new_data = cde_ctx->surf_param_lines; | ||
429 | break; | ||
430 | case TYPE_PARAM_BACKINGSTORE: | ||
431 | new_data = cde_ctx->backing_store_vaddr; | ||
432 | break; | ||
433 | case TYPE_PARAM_DESTINATION: | ||
434 | new_data = cde_ctx->compbit_vaddr; | ||
435 | break; | ||
436 | case TYPE_PARAM_DESTINATION_SIZE: | ||
437 | new_data = cde_ctx->compbit_size; | ||
438 | break; | ||
439 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
440 | new_data = g->gr.compbit_store.mem.size; | ||
441 | break; | ||
442 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
443 | new_data = gpuva_to_iova_base(cde_ctx->vm, | ||
444 | cde_ctx->surf_vaddr); | ||
445 | if (new_data == 0) { | ||
446 | nvgpu_warn(g, "cde: failed to find 0x%llx", | ||
447 | cde_ctx->surf_vaddr); | ||
448 | return -EINVAL; | ||
449 | } | ||
450 | break; | ||
451 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
452 | new_data = g->gr.compbit_store.base_hw; | ||
453 | break; | ||
454 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
455 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
456 | break; | ||
457 | case TYPE_PARAM_SCATTERBUFFER: | ||
458 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
459 | break; | ||
460 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
461 | new_data = cde_ctx->scatterbuffer_size; | ||
462 | break; | ||
463 | default: | ||
464 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
465 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
466 | continue; | ||
467 | new_data = cde_ctx->user_param_values[user_id]; | ||
468 | } | ||
469 | |||
470 | nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
471 | i, param->id, param->target_buf, | ||
472 | param->target_byte_offset, new_data, | ||
473 | param->data_offset, param->type, param->shift, | ||
474 | param->mask); | ||
475 | |||
476 | new_data += param->data_offset; | ||
477 | |||
478 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
479 | param->shift, param->mask, new_data); | ||
480 | |||
481 | if (err) { | ||
482 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
483 | err, i, param->id, param->target_buf, | ||
484 | param->target_byte_offset, new_data); | ||
485 | return err; | ||
486 | } | ||
487 | } | ||
488 | |||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
493 | struct nvgpu_firmware *img, | ||
494 | struct gk20a_cde_hdr_param *param) | ||
495 | { | ||
496 | struct nvgpu_mem *target_mem; | ||
497 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
498 | struct gk20a *g = &l->g; | ||
499 | |||
500 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
501 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
502 | cde_ctx->num_params, param->target_buf, | ||
503 | cde_ctx->num_bufs); | ||
504 | return -EINVAL; | ||
505 | } | ||
506 | |||
507 | target_mem = cde_ctx->mem + param->target_buf; | ||
508 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
509 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
510 | cde_ctx->num_params, param->target_byte_offset, | ||
511 | target_mem->size); | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | /* does this parameter fit into our parameter structure */ | ||
516 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
517 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
518 | cde_ctx->num_params); | ||
519 | return -ENOMEM; | ||
520 | } | ||
521 | |||
522 | /* is the given id valid? */ | ||
523 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
524 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
525 | param->id, cde_ctx->num_params, | ||
526 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
527 | return -EINVAL; | ||
528 | } | ||
529 | |||
530 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
531 | cde_ctx->num_params++; | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
537 | struct nvgpu_firmware *img, | ||
538 | u32 required_class) | ||
539 | { | ||
540 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
541 | struct gk20a *g = &l->g; | ||
542 | int err; | ||
543 | |||
544 | /* CDE enabled */ | ||
545 | cde_ctx->ch->cde = true; | ||
546 | |||
547 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); | ||
548 | if (err) { | ||
549 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
550 | err); | ||
551 | return err; | ||
552 | } | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
558 | struct nvgpu_firmware *img, | ||
559 | u32 op, | ||
560 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
561 | u32 num_elems) | ||
562 | { | ||
563 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
564 | struct gk20a *g = &l->g; | ||
565 | struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; | ||
566 | u32 *num_entries; | ||
567 | unsigned int i; | ||
568 | |||
569 | /* check command type */ | ||
570 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
571 | gpfifo = &cde_ctx->init_convert_cmd; | ||
572 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
573 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
574 | gpfifo = &cde_ctx->convert_cmd; | ||
575 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
576 | } else { | ||
577 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
578 | op); | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | |||
582 | /* allocate gpfifo entries to be pushed */ | ||
583 | *gpfifo = nvgpu_kzalloc(g, | ||
584 | sizeof(struct nvgpu_gpfifo_entry) * num_elems); | ||
585 | if (!*gpfifo) { | ||
586 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
587 | return -ENOMEM; | ||
588 | } | ||
589 | |||
590 | gpfifo_elem = *gpfifo; | ||
591 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
592 | struct nvgpu_mem *target_mem; | ||
593 | |||
594 | /* validate the current entry */ | ||
595 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
596 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
597 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | |||
601 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
602 | if (target_mem->size< | ||
603 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
604 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
605 | target_mem->size, | ||
606 | cmd_elem->target_byte_offset, | ||
607 | cmd_elem->num_bytes); | ||
608 | return -EINVAL; | ||
609 | } | ||
610 | |||
611 | /* store the element into gpfifo */ | ||
612 | gpfifo_elem->entry0 = | ||
613 | u64_lo32(target_mem->gpu_va + | ||
614 | cmd_elem->target_byte_offset); | ||
615 | gpfifo_elem->entry1 = | ||
616 | u64_hi32(target_mem->gpu_va + | ||
617 | cmd_elem->target_byte_offset) | | ||
618 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
619 | sizeof(u32)); | ||
620 | } | ||
621 | |||
622 | *num_entries = num_elems; | ||
623 | return 0; | ||
624 | } | ||
625 | |||
626 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
627 | { | ||
628 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
629 | struct gk20a *g = &l->g; | ||
630 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
631 | sizeof(struct nvgpu_gpfifo_entry); | ||
632 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
633 | sizeof(struct nvgpu_gpfifo_entry); | ||
634 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
635 | struct nvgpu_gpfifo_entry *combined_cmd; | ||
636 | |||
637 | /* allocate buffer that has space for both */ | ||
638 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
639 | if (!combined_cmd) { | ||
640 | nvgpu_warn(g, | ||
641 | "cde: could not allocate memory for gpfifo entries"); | ||
642 | return -ENOMEM; | ||
643 | } | ||
644 | |||
645 | /* move the original init here and append convert */ | ||
646 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
647 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
648 | cde_ctx->convert_cmd, conv_bytes); | ||
649 | |||
650 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
651 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
652 | |||
653 | cde_ctx->init_convert_cmd = combined_cmd; | ||
654 | cde_ctx->convert_cmd = combined_cmd | ||
655 | + cde_ctx->init_cmd_num_entries; | ||
656 | |||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
661 | struct nvgpu_firmware *img) | ||
662 | { | ||
663 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
664 | struct gk20a *g = &l->g; | ||
665 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
666 | u32 *data = (u32 *)img->data; | ||
667 | u32 num_of_elems; | ||
668 | struct gk20a_cde_hdr_elem *elem; | ||
669 | u32 min_size = 0; | ||
670 | int err = 0; | ||
671 | unsigned int i; | ||
672 | |||
673 | min_size += 2 * sizeof(u32); | ||
674 | if (img->size < min_size) { | ||
675 | nvgpu_warn(g, "cde: invalid image header"); | ||
676 | return -EINVAL; | ||
677 | } | ||
678 | |||
679 | cde_app->firmware_version = data[0]; | ||
680 | num_of_elems = data[1]; | ||
681 | |||
682 | min_size += num_of_elems * sizeof(*elem); | ||
683 | if (img->size < min_size) { | ||
684 | nvgpu_warn(g, "cde: bad image"); | ||
685 | return -EINVAL; | ||
686 | } | ||
687 | |||
688 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
689 | for (i = 0; i < num_of_elems; i++) { | ||
690 | int err = 0; | ||
691 | switch (elem->type) { | ||
692 | case TYPE_BUF: | ||
693 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
694 | break; | ||
695 | case TYPE_REPLACE: | ||
696 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
697 | &elem->replace); | ||
698 | break; | ||
699 | case TYPE_PARAM: | ||
700 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
701 | break; | ||
702 | case TYPE_REQUIRED_CLASS: | ||
703 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
704 | elem->required_class); | ||
705 | break; | ||
706 | case TYPE_COMMAND: | ||
707 | { | ||
708 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
709 | &img->data[elem->command.data_byte_offset]; | ||
710 | err = gk20a_init_cde_command(cde_ctx, img, | ||
711 | elem->command.op, cmd, | ||
712 | elem->command.num_entries); | ||
713 | break; | ||
714 | } | ||
715 | case TYPE_ARRAY: | ||
716 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
717 | elem->array.data, | ||
718 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
719 | break; | ||
720 | default: | ||
721 | nvgpu_warn(g, "cde: unknown header element"); | ||
722 | err = -EINVAL; | ||
723 | } | ||
724 | |||
725 | if (err) | ||
726 | goto deinit_image; | ||
727 | |||
728 | elem++; | ||
729 | } | ||
730 | |||
731 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
732 | nvgpu_warn(g, "cde: convert command not defined"); | ||
733 | err = -EINVAL; | ||
734 | goto deinit_image; | ||
735 | } | ||
736 | |||
737 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
738 | nvgpu_warn(g, "cde: convert command not defined"); | ||
739 | err = -EINVAL; | ||
740 | goto deinit_image; | ||
741 | } | ||
742 | |||
743 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
744 | if (err) | ||
745 | goto deinit_image; | ||
746 | |||
747 | return 0; | ||
748 | |||
749 | deinit_image: | ||
750 | gk20a_deinit_cde_img(cde_ctx); | ||
751 | return err; | ||
752 | } | ||
753 | |||
754 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
755 | u32 op, struct nvgpu_channel_fence *fence, | ||
756 | u32 flags, struct gk20a_fence **fence_out) | ||
757 | { | ||
758 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
759 | struct gk20a *g = &l->g; | ||
760 | struct nvgpu_gpfifo_entry *gpfifo = NULL; | ||
761 | int num_entries = 0; | ||
762 | |||
763 | /* check command type */ | ||
764 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
765 | /* both init and convert combined */ | ||
766 | gpfifo = cde_ctx->init_convert_cmd; | ||
767 | num_entries = cde_ctx->init_cmd_num_entries | ||
768 | + cde_ctx->convert_cmd_num_entries; | ||
769 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
770 | gpfifo = cde_ctx->convert_cmd; | ||
771 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
772 | } else if (op == TYPE_BUF_COMMAND_NOOP) { | ||
773 | /* Any non-null gpfifo will suffice with 0 num_entries */ | ||
774 | gpfifo = cde_ctx->init_convert_cmd; | ||
775 | num_entries = 0; | ||
776 | } else { | ||
777 | nvgpu_warn(g, "cde: unknown buffer"); | ||
778 | return -EINVAL; | ||
779 | } | ||
780 | |||
781 | if (gpfifo == NULL) { | ||
782 | nvgpu_warn(g, "cde: buffer not available"); | ||
783 | return -ENOSYS; | ||
784 | } | ||
785 | |||
786 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | ||
787 | num_entries, flags, fence, fence_out, | ||
788 | NULL); | ||
789 | } | ||
790 | |||
791 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
792 | __acquires(&cde_app->mutex) | ||
793 | __releases(&cde_app->mutex) | ||
794 | { | ||
795 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
796 | struct gk20a *g = &cde_ctx->l->g; | ||
797 | |||
798 | nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
799 | trace_gk20a_cde_release(cde_ctx); | ||
800 | |||
801 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
802 | |||
803 | if (cde_ctx->in_use) { | ||
804 | cde_ctx->in_use = false; | ||
805 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
806 | cde_app->ctx_usecount--; | ||
807 | } else { | ||
808 | nvgpu_log_info(g, "double release cde context %p", cde_ctx); | ||
809 | } | ||
810 | |||
811 | nvgpu_mutex_release(&cde_app->mutex); | ||
812 | } | ||
813 | |||
814 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
815 | __acquires(&cde_app->mutex) | ||
816 | __releases(&cde_app->mutex) | ||
817 | { | ||
818 | struct delayed_work *delay_work = to_delayed_work(work); | ||
819 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
820 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
821 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
822 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
823 | struct gk20a *g = &l->g; | ||
824 | int err; | ||
825 | |||
826 | /* someone has just taken it? engine deletion started? */ | ||
827 | if (cde_ctx->in_use || !cde_app->initialised) | ||
828 | return; | ||
829 | |||
830 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
831 | "cde: attempting to delete temporary %p", cde_ctx); | ||
832 | |||
833 | err = gk20a_busy(g); | ||
834 | if (err) { | ||
835 | /* this context would find new use anyway later, so not freeing | ||
836 | * here does not leak anything */ | ||
837 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
838 | " temp ctx deletion"); | ||
839 | return; | ||
840 | } | ||
841 | |||
842 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
843 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
844 | nvgpu_log(g, gpu_dbg_cde_ctx, | ||
845 | "cde: context use raced, not deleting %p", | ||
846 | cde_ctx); | ||
847 | goto out; | ||
848 | } | ||
849 | |||
850 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
851 | "double pending %p", cde_ctx); | ||
852 | |||
853 | gk20a_cde_remove_ctx(cde_ctx); | ||
854 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
855 | "cde: destroyed %p count=%d use=%d max=%d", | ||
856 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
857 | cde_app->ctx_count_top); | ||
858 | |||
859 | out: | ||
860 | nvgpu_mutex_release(&cde_app->mutex); | ||
861 | gk20a_idle(g); | ||
862 | } | ||
863 | |||
864 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
865 | __must_hold(&cde_app->mutex) | ||
866 | { | ||
867 | struct gk20a *g = &l->g; | ||
868 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
869 | struct gk20a_cde_ctx *cde_ctx; | ||
870 | |||
871 | /* exhausted? */ | ||
872 | |||
873 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
874 | return ERR_PTR(-EAGAIN); | ||
875 | |||
876 | /* idle context available? */ | ||
877 | |||
878 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
879 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
880 | gk20a_cde_ctx, list); | ||
881 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
882 | "cde: got free %p count=%d use=%d max=%d", | ||
883 | cde_ctx, cde_app->ctx_count, | ||
884 | cde_app->ctx_usecount, | ||
885 | cde_app->ctx_count_top); | ||
886 | trace_gk20a_cde_get_context(cde_ctx); | ||
887 | |||
888 | /* deleter work may be scheduled, but in_use prevents it */ | ||
889 | cde_ctx->in_use = true; | ||
890 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
891 | cde_app->ctx_usecount++; | ||
892 | |||
893 | /* cancel any deletions now that ctx is in use */ | ||
894 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
895 | return cde_ctx; | ||
896 | } | ||
897 | |||
898 | /* no free contexts, get a temporary one */ | ||
899 | |||
900 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
901 | "cde: no free contexts, count=%d", | ||
902 | cde_app->ctx_count); | ||
903 | |||
904 | cde_ctx = gk20a_cde_allocate_context(l); | ||
905 | if (IS_ERR(cde_ctx)) { | ||
906 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
907 | PTR_ERR(cde_ctx)); | ||
908 | return cde_ctx; | ||
909 | } | ||
910 | |||
911 | trace_gk20a_cde_get_context(cde_ctx); | ||
912 | cde_ctx->in_use = true; | ||
913 | cde_ctx->is_temporary = true; | ||
914 | cde_app->ctx_usecount++; | ||
915 | cde_app->ctx_count++; | ||
916 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
917 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
918 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
919 | |||
920 | return cde_ctx; | ||
921 | } | ||
922 | |||
923 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
924 | __releases(&cde_app->mutex) | ||
925 | __acquires(&cde_app->mutex) | ||
926 | { | ||
927 | struct gk20a *g = &l->g; | ||
928 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
929 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
930 | struct nvgpu_timeout timeout; | ||
931 | |||
932 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
933 | NVGPU_TIMER_CPU_TIMER); | ||
934 | |||
935 | do { | ||
936 | cde_ctx = gk20a_cde_do_get_context(l); | ||
937 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
938 | break; | ||
939 | |||
940 | /* exhausted, retry */ | ||
941 | nvgpu_mutex_release(&cde_app->mutex); | ||
942 | cond_resched(); | ||
943 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
944 | } while (!nvgpu_timeout_expired(&timeout)); | ||
945 | |||
946 | return cde_ctx; | ||
947 | } | ||
948 | |||
949 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
950 | { | ||
951 | struct gk20a *g = &l->g; | ||
952 | struct gk20a_cde_ctx *cde_ctx; | ||
953 | int ret; | ||
954 | |||
955 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
956 | if (!cde_ctx) | ||
957 | return ERR_PTR(-ENOMEM); | ||
958 | |||
959 | cde_ctx->l = l; | ||
960 | cde_ctx->dev = dev_from_gk20a(g); | ||
961 | |||
962 | ret = gk20a_cde_load(cde_ctx); | ||
963 | if (ret) { | ||
964 | nvgpu_kfree(g, cde_ctx); | ||
965 | return ERR_PTR(ret); | ||
966 | } | ||
967 | |||
968 | nvgpu_init_list_node(&cde_ctx->list); | ||
969 | cde_ctx->is_temporary = false; | ||
970 | cde_ctx->in_use = false; | ||
971 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
972 | gk20a_cde_ctx_deleter_fn); | ||
973 | |||
974 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
975 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
976 | return cde_ctx; | ||
977 | } | ||
978 | |||
979 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
980 | struct dma_buf *compbits_scatter_buf, | ||
981 | u64 compbits_byte_offset, | ||
982 | u64 scatterbuffer_byte_offset, | ||
983 | struct nvgpu_channel_fence *fence, | ||
984 | u32 __flags, struct gk20a_cde_param *params, | ||
985 | int num_params, struct gk20a_fence **fence_out) | ||
986 | __acquires(&l->cde_app->mutex) | ||
987 | __releases(&l->cde_app->mutex) | ||
988 | { | ||
989 | struct gk20a *g = &l->g; | ||
990 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
991 | struct gk20a_comptags comptags; | ||
992 | struct nvgpu_os_buffer os_buf = { | ||
993 | compbits_scatter_buf, | ||
994 | NULL, | ||
995 | dev_from_gk20a(g) | ||
996 | }; | ||
997 | u64 mapped_compbits_offset = 0; | ||
998 | u64 compbits_size = 0; | ||
999 | u64 mapped_scatterbuffer_offset = 0; | ||
1000 | u64 scatterbuffer_size = 0; | ||
1001 | u64 map_vaddr = 0; | ||
1002 | u64 map_offset = 0; | ||
1003 | u64 map_size = 0; | ||
1004 | u8 *surface = NULL; | ||
1005 | u64 big_page_mask = 0; | ||
1006 | u32 flags; | ||
1007 | int err, i; | ||
1008 | const s16 compbits_kind = 0; | ||
1009 | u32 submit_op; | ||
1010 | struct dma_buf_attachment *attachment; | ||
1011 | |||
1012 | nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
1013 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
1014 | |||
1015 | /* scatter buffer must be after compbits buffer */ | ||
1016 | if (scatterbuffer_byte_offset && | ||
1017 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
1018 | return -EINVAL; | ||
1019 | |||
1020 | err = gk20a_busy(g); | ||
1021 | if (err) | ||
1022 | return err; | ||
1023 | |||
1024 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
1025 | cde_ctx = gk20a_cde_get_context(l); | ||
1026 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
1027 | if (IS_ERR(cde_ctx)) { | ||
1028 | err = PTR_ERR(cde_ctx); | ||
1029 | goto exit_idle; | ||
1030 | } | ||
1031 | |||
1032 | /* First, map the buffer to local va */ | ||
1033 | |||
1034 | /* ensure that the compbits buffer has drvdata */ | ||
1035 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
1036 | dev_from_gk20a(g)); | ||
1037 | if (err) | ||
1038 | goto exit_idle; | ||
1039 | |||
1040 | /* compbits don't start at page aligned offset, so we need to align | ||
1041 | the region to be mapped */ | ||
1042 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
1043 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
1044 | map_size = compbits_scatter_buf->size - map_offset; | ||
1045 | |||
1046 | |||
1047 | /* compute compbit start offset from the beginning of the mapped | ||
1048 | area */ | ||
1049 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1050 | if (scatterbuffer_byte_offset) { | ||
1051 | compbits_size = scatterbuffer_byte_offset - | ||
1052 | compbits_byte_offset; | ||
1053 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1054 | map_offset; | ||
1055 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1056 | scatterbuffer_byte_offset; | ||
1057 | } else { | ||
1058 | compbits_size = compbits_scatter_buf->size - | ||
1059 | compbits_byte_offset; | ||
1060 | } | ||
1061 | |||
1062 | nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1063 | map_offset, map_size); | ||
1064 | nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1065 | mapped_compbits_offset, compbits_size); | ||
1066 | nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1067 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1068 | |||
1069 | |||
1070 | /* map the destination buffer */ | ||
1071 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ | ||
1072 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1073 | NVGPU_VM_MAP_CACHEABLE | | ||
1074 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, | ||
1075 | NVGPU_KIND_INVALID, | ||
1076 | compbits_kind, /* incompressible kind */ | ||
1077 | gk20a_mem_flag_none, | ||
1078 | map_offset, map_size, | ||
1079 | NULL, | ||
1080 | &map_vaddr); | ||
1081 | if (err) { | ||
1082 | dma_buf_put(compbits_scatter_buf); | ||
1083 | err = -EINVAL; | ||
1084 | goto exit_idle; | ||
1085 | } | ||
1086 | |||
1087 | if (scatterbuffer_byte_offset && | ||
1088 | l->ops.cde.need_scatter_buffer && | ||
1089 | l->ops.cde.need_scatter_buffer(g)) { | ||
1090 | struct sg_table *sgt; | ||
1091 | void *scatter_buffer; | ||
1092 | |||
1093 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1094 | if (IS_ERR(surface)) { | ||
1095 | nvgpu_warn(g, | ||
1096 | "dma_buf_vmap failed"); | ||
1097 | err = -EINVAL; | ||
1098 | goto exit_unmap_vaddr; | ||
1099 | } | ||
1100 | |||
1101 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1102 | |||
1103 | nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1104 | surface, scatter_buffer); | ||
1105 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1106 | &attachment); | ||
1107 | if (IS_ERR(sgt)) { | ||
1108 | nvgpu_warn(g, | ||
1109 | "mm_pin failed"); | ||
1110 | err = -EINVAL; | ||
1111 | goto exit_unmap_surface; | ||
1112 | } else { | ||
1113 | err = l->ops.cde.populate_scatter_buffer(g, sgt, | ||
1114 | compbits_byte_offset, scatter_buffer, | ||
1115 | scatterbuffer_size); | ||
1116 | WARN_ON(err); | ||
1117 | |||
1118 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1119 | attachment, sgt); | ||
1120 | if (err) | ||
1121 | goto exit_unmap_surface; | ||
1122 | } | ||
1123 | |||
1124 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1125 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1126 | surface = NULL; | ||
1127 | } | ||
1128 | |||
1129 | /* store source buffer compression tags */ | ||
1130 | gk20a_get_comptags(&os_buf, &comptags); | ||
1131 | cde_ctx->surf_param_offset = comptags.offset; | ||
1132 | cde_ctx->surf_param_lines = comptags.lines; | ||
1133 | |||
1134 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1135 | compbits live in the same surface, and we can get the alloc base | ||
1136 | address by using gpuva_to_iova_base, this will do */ | ||
1137 | cde_ctx->surf_vaddr = map_vaddr; | ||
1138 | |||
1139 | /* store information about destination */ | ||
1140 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1141 | cde_ctx->compbit_size = compbits_size; | ||
1142 | |||
1143 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1144 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1145 | |||
1146 | /* remove existing argument data */ | ||
1147 | memset(cde_ctx->user_param_values, 0, | ||
1148 | sizeof(cde_ctx->user_param_values)); | ||
1149 | |||
1150 | /* read user space arguments for the conversion */ | ||
1151 | for (i = 0; i < num_params; i++) { | ||
1152 | struct gk20a_cde_param *param = params + i; | ||
1153 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1154 | |||
1155 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1156 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1157 | err = -EINVAL; | ||
1158 | goto exit_unmap_surface; | ||
1159 | } | ||
1160 | cde_ctx->user_param_values[id] = param->value; | ||
1161 | } | ||
1162 | |||
1163 | /* patch data */ | ||
1164 | err = gk20a_cde_patch_params(cde_ctx); | ||
1165 | if (err) { | ||
1166 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1167 | goto exit_unmap_surface; | ||
1168 | } | ||
1169 | |||
1170 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1171 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1172 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1173 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1174 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1175 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1176 | |||
1177 | /* take always the postfence as it is needed for protecting the | ||
1178 | * cde context */ | ||
1179 | flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
1180 | |||
1181 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1182 | gk20a_idle(g); | ||
1183 | |||
1184 | if (comptags.lines == 0) { | ||
1185 | /* | ||
1186 | * Nothing to do on the buffer, but do a null kickoff for | ||
1187 | * managing the pre and post fences. | ||
1188 | */ | ||
1189 | submit_op = TYPE_BUF_COMMAND_NOOP; | ||
1190 | } else if (!cde_ctx->init_cmd_executed) { | ||
1191 | /* | ||
1192 | * First time, so include the init pushbuf too in addition to | ||
1193 | * the conversion code. | ||
1194 | */ | ||
1195 | submit_op = TYPE_BUF_COMMAND_INIT; | ||
1196 | } else { | ||
1197 | /* | ||
1198 | * The usual condition: execute just the conversion. | ||
1199 | */ | ||
1200 | submit_op = TYPE_BUF_COMMAND_CONVERT; | ||
1201 | } | ||
1202 | err = gk20a_cde_execute_buffer(cde_ctx, submit_op, | ||
1203 | fence, flags, fence_out); | ||
1204 | |||
1205 | if (comptags.lines != 0 && !err) | ||
1206 | cde_ctx->init_cmd_executed = true; | ||
1207 | |||
1208 | /* unmap the buffers - channel holds references to them now */ | ||
1209 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1210 | |||
1211 | return err; | ||
1212 | |||
1213 | exit_unmap_surface: | ||
1214 | if (surface) | ||
1215 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1216 | exit_unmap_vaddr: | ||
1217 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1218 | exit_idle: | ||
1219 | gk20a_idle(g); | ||
1220 | return err; | ||
1221 | } | ||
1222 | |||
1223 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1224 | __acquires(&cde_app->mutex) | ||
1225 | __releases(&cde_app->mutex) | ||
1226 | { | ||
1227 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1228 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1229 | struct gk20a *g = &l->g; | ||
1230 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1231 | bool channel_idle; | ||
1232 | |||
1233 | channel_gk20a_joblist_lock(ch); | ||
1234 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1235 | channel_gk20a_joblist_unlock(ch); | ||
1236 | |||
1237 | if (!channel_idle) | ||
1238 | return; | ||
1239 | |||
1240 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1241 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1242 | if (!cde_ctx->in_use) | ||
1243 | nvgpu_log_info(g, "double finish cde context %p on channel %p", | ||
1244 | cde_ctx, ch); | ||
1245 | |||
1246 | if (ch->has_timedout) { | ||
1247 | if (cde_ctx->is_temporary) { | ||
1248 | nvgpu_warn(g, | ||
1249 | "cde: channel had timed out" | ||
1250 | " (temporary channel)"); | ||
1251 | /* going to be deleted anyway */ | ||
1252 | } else { | ||
1253 | nvgpu_warn(g, | ||
1254 | "cde: channel had timed out" | ||
1255 | ", reloading"); | ||
1256 | /* mark it to be deleted, replace with a new one */ | ||
1257 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1258 | cde_ctx->is_temporary = true; | ||
1259 | if (gk20a_cde_create_context(l)) { | ||
1260 | nvgpu_err(g, "cde: can't replace context"); | ||
1261 | } | ||
1262 | nvgpu_mutex_release(&cde_app->mutex); | ||
1263 | } | ||
1264 | } | ||
1265 | |||
1266 | /* delete temporary contexts later (watch for doubles) */ | ||
1267 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1268 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1269 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1270 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1271 | } | ||
1272 | |||
1273 | if (!ch->has_timedout) | ||
1274 | gk20a_cde_ctx_release(cde_ctx); | ||
1275 | } | ||
1276 | |||
1277 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1278 | { | ||
1279 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1280 | struct gk20a *g = &l->g; | ||
1281 | struct nvgpu_firmware *img; | ||
1282 | struct channel_gk20a *ch; | ||
1283 | struct tsg_gk20a *tsg; | ||
1284 | struct gr_gk20a *gr = &g->gr; | ||
1285 | struct nvgpu_gpfifo_args gpfifo_args; | ||
1286 | int err = 0; | ||
1287 | u64 vaddr; | ||
1288 | |||
1289 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1290 | if (!img) { | ||
1291 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1292 | return -ENOSYS; | ||
1293 | } | ||
1294 | |||
1295 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
1296 | if (!tsg) { | ||
1297 | nvgpu_err(g, "cde: could not create TSG"); | ||
1298 | err = -ENOMEM; | ||
1299 | goto err_get_gk20a_channel; | ||
1300 | } | ||
1301 | |||
1302 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1303 | cde_ctx, | ||
1304 | -1, | ||
1305 | false); | ||
1306 | if (!ch) { | ||
1307 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1308 | err = -ENOMEM; | ||
1309 | goto err_get_gk20a_channel; | ||
1310 | } | ||
1311 | |||
1312 | ch->timeout.enabled = false; | ||
1313 | |||
1314 | /* bind the channel to the vm */ | ||
1315 | err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); | ||
1316 | if (err) { | ||
1317 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1318 | goto err_commit_va; | ||
1319 | } | ||
1320 | |||
1321 | err = gk20a_tsg_bind_channel(tsg, ch); | ||
1322 | if (err) { | ||
1323 | nvgpu_err(g, "cde: unable to bind to tsg"); | ||
1324 | goto err_alloc_gpfifo; | ||
1325 | } | ||
1326 | |||
1327 | gpfifo_args.num_entries = 1024; | ||
1328 | gpfifo_args.num_inflight_jobs = 0; | ||
1329 | gpfifo_args.flags = 0; | ||
1330 | /* allocate gpfifo (1024 should be more than enough) */ | ||
1331 | err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); | ||
1332 | if (err) { | ||
1333 | nvgpu_warn(g, "cde: unable to allocate gpfifo"); | ||
1334 | goto err_alloc_gpfifo; | ||
1335 | } | ||
1336 | |||
1337 | /* map backing store to gpu virtual space */ | ||
1338 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1339 | g->gr.compbit_store.mem.size, | ||
1340 | NVGPU_VM_MAP_CACHEABLE, | ||
1341 | gk20a_mem_flag_read_only, | ||
1342 | false, | ||
1343 | gr->compbit_store.mem.aperture); | ||
1344 | |||
1345 | if (!vaddr) { | ||
1346 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1347 | err = -ENOMEM; | ||
1348 | goto err_map_backingstore; | ||
1349 | } | ||
1350 | |||
1351 | /* store initialisation data */ | ||
1352 | cde_ctx->ch = ch; | ||
1353 | cde_ctx->tsg = tsg; | ||
1354 | cde_ctx->vm = ch->vm; | ||
1355 | cde_ctx->backing_store_vaddr = vaddr; | ||
1356 | |||
1357 | /* initialise the firmware */ | ||
1358 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1359 | if (err) { | ||
1360 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1361 | goto err_init_cde_img; | ||
1362 | } | ||
1363 | |||
1364 | /* initialisation done */ | ||
1365 | nvgpu_release_firmware(g, img); | ||
1366 | |||
1367 | return 0; | ||
1368 | |||
1369 | err_init_cde_img: | ||
1370 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1371 | err_map_backingstore: | ||
1372 | err_alloc_gpfifo: | ||
1373 | nvgpu_vm_put(ch->vm); | ||
1374 | err_commit_va: | ||
1375 | err_get_gk20a_channel: | ||
1376 | nvgpu_release_firmware(g, img); | ||
1377 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1378 | return err; | ||
1379 | } | ||
1380 | |||
1381 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
1382 | __acquires(&l->cde_app->mutex) | ||
1383 | __releases(&l->cde_app->mutex) | ||
1384 | { | ||
1385 | struct gk20a *g = &l->g; | ||
1386 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1387 | int err; | ||
1388 | |||
1389 | if (!cde_app->initialised) | ||
1390 | return -ENOSYS; | ||
1391 | |||
1392 | err = gk20a_busy(g); | ||
1393 | if (err) | ||
1394 | return err; | ||
1395 | |||
1396 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1397 | |||
1398 | gk20a_cde_stop(l); | ||
1399 | |||
1400 | err = gk20a_cde_create_contexts(l); | ||
1401 | if (!err) | ||
1402 | cde_app->initialised = true; | ||
1403 | |||
1404 | nvgpu_mutex_release(&cde_app->mutex); | ||
1405 | |||
1406 | gk20a_idle(g); | ||
1407 | return err; | ||
1408 | } | ||
1409 | |||
1410 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
1411 | __acquires(&cde_app->mutex) | ||
1412 | __releases(&cde_app->mutex) | ||
1413 | { | ||
1414 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1415 | struct gk20a *g = &l->g; | ||
1416 | int err; | ||
1417 | |||
1418 | if (cde_app->initialised) | ||
1419 | return 0; | ||
1420 | |||
1421 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1422 | |||
1423 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1424 | if (err) | ||
1425 | return err; | ||
1426 | |||
1427 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1428 | |||
1429 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1430 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1431 | cde_app->ctx_count = 0; | ||
1432 | cde_app->ctx_count_top = 0; | ||
1433 | cde_app->ctx_usecount = 0; | ||
1434 | |||
1435 | err = gk20a_cde_create_contexts(l); | ||
1436 | if (!err) | ||
1437 | cde_app->initialised = true; | ||
1438 | |||
1439 | nvgpu_mutex_release(&cde_app->mutex); | ||
1440 | nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1441 | |||
1442 | if (err) | ||
1443 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1444 | |||
1445 | return err; | ||
1446 | } | ||
1447 | |||
1448 | enum cde_launch_patch_id { | ||
1449 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1450 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1451 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1452 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1453 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1454 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1455 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1456 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1457 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1458 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1459 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1460 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1461 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1462 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1463 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1464 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1465 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1466 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1467 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1468 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1469 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1470 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1471 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1472 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1473 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1474 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1475 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1476 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1477 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1478 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1479 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1480 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1481 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1482 | }; | ||
1483 | |||
1484 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1485 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1486 | |||
1487 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1488 | struct nvgpu_os_linux *l, | ||
1489 | struct dma_buf *dmabuf, u32 consumer, | ||
1490 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1491 | u64 scatterbuffer_offset, | ||
1492 | u32 width, u32 height, u32 block_height_log2, | ||
1493 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1494 | struct gk20a_buffer_state *state) | ||
1495 | { | ||
1496 | struct gk20a *g = &l->g; | ||
1497 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1498 | int param = 0; | ||
1499 | int err = 0; | ||
1500 | struct gk20a_fence *new_fence = NULL; | ||
1501 | const int wgx = 8; | ||
1502 | const int wgy = 8; | ||
1503 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1504 | const int xalign = compbits_per_byte * wgx; | ||
1505 | const int yalign = wgy; | ||
1506 | |||
1507 | /* Compute per launch parameters */ | ||
1508 | const int xtiles = (width + 7) >> 3; | ||
1509 | const int ytiles = (height + 7) >> 3; | ||
1510 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1511 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1512 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1513 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1514 | const int xblocks = (xtiles + 1) >> 1; | ||
1515 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1516 | |||
1517 | int hprog = -1; | ||
1518 | int vprog = -1; | ||
1519 | |||
1520 | if (l->ops.cde.get_program_numbers) | ||
1521 | l->ops.cde.get_program_numbers(g, block_height_log2, | ||
1522 | l->cde_app.shader_parameter, | ||
1523 | &hprog, &vprog); | ||
1524 | else { | ||
1525 | nvgpu_warn(g, "cde: chip not supported"); | ||
1526 | return -ENOSYS; | ||
1527 | } | ||
1528 | |||
1529 | if (hprog < 0 || vprog < 0) { | ||
1530 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1531 | return -ENOSYS; | ||
1532 | } | ||
1533 | |||
1534 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1535 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1536 | xtiles, ytiles); | ||
1537 | |||
1538 | nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1539 | width, height, block_height_log2, | ||
1540 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1541 | nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1542 | width, height, xtiles, ytiles); | ||
1543 | nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1544 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1545 | nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1546 | hprog, | ||
1547 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1548 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1549 | vprog, | ||
1550 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1551 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1552 | |||
1553 | /* Write parameters */ | ||
1554 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1555 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1556 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1557 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1558 | block_height_log2); | ||
1559 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1560 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1561 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1562 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1563 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1564 | |||
1565 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1566 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1567 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1568 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1569 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1570 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1571 | |||
1572 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1573 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1574 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1575 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1576 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1577 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1578 | |||
1579 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1580 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1581 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1582 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1583 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1584 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1585 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1586 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1587 | |||
1588 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1589 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1590 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1591 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1592 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1593 | } else { | ||
1594 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1595 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1596 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1597 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1598 | } | ||
1599 | |||
1600 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1601 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1602 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1603 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1604 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1605 | } else { | ||
1606 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1607 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1608 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1609 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1610 | } | ||
1611 | #undef WRITE_PATCH | ||
1612 | |||
1613 | err = gk20a_cde_convert(l, dmabuf, | ||
1614 | compbits_hoffset, | ||
1615 | scatterbuffer_offset, | ||
1616 | fence_in, submit_flags, | ||
1617 | params, param, &new_fence); | ||
1618 | if (err) | ||
1619 | goto out; | ||
1620 | |||
1621 | /* compbits generated, update state & fence */ | ||
1622 | gk20a_fence_put(state->fence); | ||
1623 | state->fence = new_fence; | ||
1624 | state->valid_compbits |= consumer & | ||
1625 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1626 | out: | ||
1627 | return err; | ||
1628 | } | ||
1629 | |||
1630 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1631 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
1632 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1633 | u64 scatterbuffer_offset, | ||
1634 | u32 width, u32 height, u32 block_height_log2, | ||
1635 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1636 | struct gk20a_buffer_state *state) | ||
1637 | { | ||
1638 | struct gk20a *g = &l->g; | ||
1639 | int err = 0; | ||
1640 | |||
1641 | if (!l->cde_app.initialised) | ||
1642 | return -ENOSYS; | ||
1643 | |||
1644 | nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", | ||
1645 | l->cde_app.firmware_version); | ||
1646 | |||
1647 | if (l->cde_app.firmware_version == 1) { | ||
1648 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1649 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
1650 | compbits_voffset, scatterbuffer_offset, | ||
1651 | width, height, block_height_log2, | ||
1652 | submit_flags, fence_in, state); | ||
1653 | } else { | ||
1654 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1655 | l->cde_app.firmware_version); | ||
1656 | err = -EINVAL; | ||
1657 | } | ||
1658 | |||
1659 | return err; | ||
1660 | } | ||
1661 | |||
1662 | int gk20a_prepare_compressible_read( | ||
1663 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
1664 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1665 | u64 scatterbuffer_offset, | ||
1666 | u32 width, u32 height, u32 block_height_log2, | ||
1667 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
1668 | u32 *valid_compbits, u32 *zbc_color, | ||
1669 | struct gk20a_fence **fence_out) | ||
1670 | { | ||
1671 | struct gk20a *g = &l->g; | ||
1672 | int err = 0; | ||
1673 | struct gk20a_buffer_state *state; | ||
1674 | struct dma_buf *dmabuf; | ||
1675 | u32 missing_bits; | ||
1676 | |||
1677 | dmabuf = dma_buf_get(buffer_fd); | ||
1678 | if (IS_ERR(dmabuf)) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1682 | if (err) { | ||
1683 | dma_buf_put(dmabuf); | ||
1684 | return err; | ||
1685 | } | ||
1686 | |||
1687 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1688 | |||
1689 | nvgpu_mutex_acquire(&state->lock); | ||
1690 | |||
1691 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1692 | |||
1693 | gk20a_fence_put(state->fence); | ||
1694 | state->fence = NULL; | ||
1695 | /* state->fence = decompress(); | ||
1696 | state->valid_compbits = 0; */ | ||
1697 | err = -EINVAL; | ||
1698 | goto out; | ||
1699 | } else if (missing_bits) { | ||
1700 | u32 missing_cde_bits = missing_bits & | ||
1701 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1702 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1703 | missing_cde_bits) { | ||
1704 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1705 | l, dmabuf, | ||
1706 | missing_cde_bits, | ||
1707 | offset, compbits_hoffset, | ||
1708 | compbits_voffset, scatterbuffer_offset, | ||
1709 | width, height, block_height_log2, | ||
1710 | submit_flags, fence, | ||
1711 | state); | ||
1712 | if (err) | ||
1713 | goto out; | ||
1714 | } | ||
1715 | } | ||
1716 | |||
1717 | if (state->fence && fence_out) | ||
1718 | *fence_out = gk20a_fence_get(state->fence); | ||
1719 | |||
1720 | if (valid_compbits) | ||
1721 | *valid_compbits = state->valid_compbits; | ||
1722 | |||
1723 | if (zbc_color) | ||
1724 | *zbc_color = state->zbc_color; | ||
1725 | |||
1726 | out: | ||
1727 | nvgpu_mutex_release(&state->lock); | ||
1728 | dma_buf_put(dmabuf); | ||
1729 | return err; | ||
1730 | } | ||
1731 | |||
1732 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1733 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1734 | { | ||
1735 | int err; | ||
1736 | struct gk20a_buffer_state *state; | ||
1737 | struct dma_buf *dmabuf; | ||
1738 | |||
1739 | dmabuf = dma_buf_get(buffer_fd); | ||
1740 | if (IS_ERR(dmabuf)) { | ||
1741 | nvgpu_err(g, "invalid dmabuf"); | ||
1742 | return -EINVAL; | ||
1743 | } | ||
1744 | |||
1745 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1746 | if (err) { | ||
1747 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1748 | dma_buf_put(dmabuf); | ||
1749 | return err; | ||
1750 | } | ||
1751 | |||
1752 | nvgpu_mutex_acquire(&state->lock); | ||
1753 | |||
1754 | /* Update the compbits state. */ | ||
1755 | state->valid_compbits = valid_compbits; | ||
1756 | state->zbc_color = zbc_color; | ||
1757 | |||
1758 | /* Discard previous compbit job fence. */ | ||
1759 | gk20a_fence_put(state->fence); | ||
1760 | state->fence = NULL; | ||
1761 | |||
1762 | nvgpu_mutex_release(&state->lock); | ||
1763 | dma_buf_put(dmabuf); | ||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l) | ||
1768 | { | ||
1769 | struct gk20a *g = &l->g; | ||
1770 | u32 ver = g->params.gpu_arch + g->params.gpu_impl; | ||
1771 | |||
1772 | switch (ver) { | ||
1773 | case GK20A_GPUID_GM20B: | ||
1774 | case GK20A_GPUID_GM20B_B: | ||
1775 | l->ops.cde = gm20b_cde_ops.cde; | ||
1776 | break; | ||
1777 | case NVGPU_GPUID_GP10B: | ||
1778 | l->ops.cde = gp10b_cde_ops.cde; | ||
1779 | break; | ||
1780 | default: | ||
1781 | /* CDE is optional, so today ignoring unknown chip is fine */ | ||
1782 | break; | ||
1783 | } | ||
1784 | |||
1785 | return 0; | ||
1786 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h new file mode 100644 index 00000000..5928b624 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde.h | |||
@@ -0,0 +1,326 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include <nvgpu/nvgpu_mem.h> | ||
23 | #include <nvgpu/list.h> | ||
24 | #include <nvgpu/lock.h> | ||
25 | |||
26 | #include <linux/kobject.h> | ||
27 | #include <linux/workqueue.h> | ||
28 | |||
29 | #define MAX_CDE_BUFS 10 | ||
30 | #define MAX_CDE_PARAMS 64 | ||
31 | #define MAX_CDE_USER_PARAMS 40 | ||
32 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
33 | |||
34 | /* | ||
35 | * The size of the context ring buffer that is dedicated for handling cde | ||
36 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
37 | * wait on the previous job to that channel, so increasing this value | ||
38 | * reduces the likelihood of stalls. | ||
39 | */ | ||
40 | #define NUM_CDE_CONTEXTS 4 | ||
41 | |||
42 | struct dma_buf; | ||
43 | struct device; | ||
44 | struct nvgpu_os_linux; | ||
45 | struct gk20a; | ||
46 | struct gk20a_fence; | ||
47 | struct nvgpu_channel_fence; | ||
48 | struct channel_gk20a; | ||
49 | struct vm_gk20a; | ||
50 | struct nvgpu_gpfifo_entry; | ||
51 | |||
52 | /* | ||
53 | * this element defines a buffer that is allocated and mapped into gpu address | ||
54 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
55 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
56 | * | ||
57 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
58 | */ | ||
59 | |||
60 | struct gk20a_cde_hdr_buf { | ||
61 | u64 data_byte_offset; | ||
62 | u64 num_bytes; | ||
63 | }; | ||
64 | |||
65 | /* | ||
66 | * this element defines a constant patching in buffers. It basically | ||
67 | * computes physical address to <source_buf>+source_byte_offset. The | ||
68 | * address is then modified into patch value as per: | ||
69 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
70 | * | ||
71 | * The type field defines the register size as: | ||
72 | * 0=u32, | ||
73 | * 1=u64 (little endian), | ||
74 | * 2=u64 (big endian) | ||
75 | */ | ||
76 | |||
77 | struct gk20a_cde_hdr_replace { | ||
78 | u32 target_buf; | ||
79 | u32 source_buf; | ||
80 | s32 shift; | ||
81 | u32 type; | ||
82 | u64 target_byte_offset; | ||
83 | u64 source_byte_offset; | ||
84 | u64 mask; | ||
85 | }; | ||
86 | |||
87 | enum { | ||
88 | TYPE_PARAM_TYPE_U32 = 0, | ||
89 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
90 | TYPE_PARAM_TYPE_U64_BIG | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * this element defines a runtime patching in buffers. Parameters with id from | ||
95 | * 0 to 1024 are reserved for special usage as follows: | ||
96 | * 0 = comptags_per_cacheline, | ||
97 | * 1 = slices_per_fbp, | ||
98 | * 2 = num_fbps | ||
99 | * 3 = source buffer first page offset | ||
100 | * 4 = source buffer block height log2 | ||
101 | * 5 = backing store memory address | ||
102 | * 6 = destination memory address | ||
103 | * 7 = destination size (bytes) | ||
104 | * 8 = backing store size (bytes) | ||
105 | * 9 = cache line size | ||
106 | * | ||
107 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
108 | * parameters from user space should be placed in buffers, what is their | ||
109 | * type, etc. | ||
110 | * | ||
111 | * Once the value is available, we add data_offset to the value. | ||
112 | * | ||
113 | * The value address is then modified into patch value as per: | ||
114 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
115 | * | ||
116 | * The type field defines the register size as: | ||
117 | * 0=u32, | ||
118 | * 1=u64 (little endian), | ||
119 | * 2=u64 (big endian) | ||
120 | */ | ||
121 | |||
122 | struct gk20a_cde_hdr_param { | ||
123 | u32 id; | ||
124 | u32 target_buf; | ||
125 | s32 shift; | ||
126 | u32 type; | ||
127 | s64 data_offset; | ||
128 | u64 target_byte_offset; | ||
129 | u64 mask; | ||
130 | }; | ||
131 | |||
132 | enum { | ||
133 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
134 | TYPE_PARAM_GPU_CONFIGURATION, | ||
135 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
136 | TYPE_PARAM_NUMPAGES, | ||
137 | TYPE_PARAM_BACKINGSTORE, | ||
138 | TYPE_PARAM_DESTINATION, | ||
139 | TYPE_PARAM_DESTINATION_SIZE, | ||
140 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
141 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
142 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
143 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
144 | TYPE_PARAM_SCATTERBUFFER, | ||
145 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
146 | NUM_RESERVED_PARAMS = 1024, | ||
147 | }; | ||
148 | |||
149 | /* | ||
150 | * This header element defines a command. The op field determines whether the | ||
151 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
152 | * denotes the beginning address of command elements in the file. | ||
153 | */ | ||
154 | |||
155 | struct gk20a_cde_hdr_command { | ||
156 | u32 op; | ||
157 | u32 num_entries; | ||
158 | u64 data_byte_offset; | ||
159 | }; | ||
160 | |||
161 | enum { | ||
162 | TYPE_BUF_COMMAND_INIT = 0, | ||
163 | TYPE_BUF_COMMAND_CONVERT, | ||
164 | TYPE_BUF_COMMAND_NOOP | ||
165 | }; | ||
166 | |||
167 | /* | ||
168 | * This is a command element defines one entry inside push buffer. target_buf | ||
169 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
170 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
171 | */ | ||
172 | |||
173 | struct gk20a_cde_cmd_elem { | ||
174 | u32 target_buf; | ||
175 | u32 padding; | ||
176 | u64 target_byte_offset; | ||
177 | u64 num_bytes; | ||
178 | }; | ||
179 | |||
180 | /* | ||
181 | * This element is used for storing a small array of data. | ||
182 | */ | ||
183 | |||
184 | enum { | ||
185 | ARRAY_PROGRAM_OFFSET = 0, | ||
186 | ARRAY_REGISTER_COUNT, | ||
187 | ARRAY_LAUNCH_COMMAND, | ||
188 | NUM_CDE_ARRAYS | ||
189 | }; | ||
190 | |||
191 | struct gk20a_cde_hdr_array { | ||
192 | u32 id; | ||
193 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
194 | }; | ||
195 | |||
196 | /* | ||
197 | * Following defines a single header element. Each element has a type and | ||
198 | * some of the data structures. | ||
199 | */ | ||
200 | |||
201 | struct gk20a_cde_hdr_elem { | ||
202 | u32 type; | ||
203 | u32 padding; | ||
204 | union { | ||
205 | struct gk20a_cde_hdr_buf buf; | ||
206 | struct gk20a_cde_hdr_replace replace; | ||
207 | struct gk20a_cde_hdr_param param; | ||
208 | u32 required_class; | ||
209 | struct gk20a_cde_hdr_command command; | ||
210 | struct gk20a_cde_hdr_array array; | ||
211 | }; | ||
212 | }; | ||
213 | |||
214 | enum { | ||
215 | TYPE_BUF = 0, | ||
216 | TYPE_REPLACE, | ||
217 | TYPE_PARAM, | ||
218 | TYPE_REQUIRED_CLASS, | ||
219 | TYPE_COMMAND, | ||
220 | TYPE_ARRAY | ||
221 | }; | ||
222 | |||
223 | struct gk20a_cde_param { | ||
224 | u32 id; | ||
225 | u32 padding; | ||
226 | u64 value; | ||
227 | }; | ||
228 | |||
229 | struct gk20a_cde_ctx { | ||
230 | struct nvgpu_os_linux *l; | ||
231 | struct device *dev; | ||
232 | |||
233 | /* channel related data */ | ||
234 | struct channel_gk20a *ch; | ||
235 | struct tsg_gk20a *tsg; | ||
236 | struct vm_gk20a *vm; | ||
237 | |||
238 | /* buf converter configuration */ | ||
239 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
240 | unsigned int num_bufs; | ||
241 | |||
242 | /* buffer patching params (where should patching be done) */ | ||
243 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
244 | unsigned int num_params; | ||
245 | |||
246 | /* storage for user space parameter values */ | ||
247 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
248 | |||
249 | u32 surf_param_offset; | ||
250 | u32 surf_param_lines; | ||
251 | u64 surf_vaddr; | ||
252 | |||
253 | u64 compbit_vaddr; | ||
254 | u64 compbit_size; | ||
255 | |||
256 | u64 scatterbuffer_vaddr; | ||
257 | u64 scatterbuffer_size; | ||
258 | |||
259 | u64 backing_store_vaddr; | ||
260 | |||
261 | struct nvgpu_gpfifo_entry *init_convert_cmd; | ||
262 | int init_cmd_num_entries; | ||
263 | |||
264 | struct nvgpu_gpfifo_entry *convert_cmd; | ||
265 | int convert_cmd_num_entries; | ||
266 | |||
267 | struct kobj_attribute attr; | ||
268 | |||
269 | bool init_cmd_executed; | ||
270 | |||
271 | struct nvgpu_list_node list; | ||
272 | bool is_temporary; | ||
273 | bool in_use; | ||
274 | struct delayed_work ctx_deleter_work; | ||
275 | }; | ||
276 | |||
277 | static inline struct gk20a_cde_ctx * | ||
278 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
279 | { | ||
280 | return (struct gk20a_cde_ctx *) | ||
281 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
282 | }; | ||
283 | |||
284 | struct gk20a_cde_app { | ||
285 | bool initialised; | ||
286 | struct nvgpu_mutex mutex; | ||
287 | |||
288 | struct nvgpu_list_node free_contexts; | ||
289 | struct nvgpu_list_node used_contexts; | ||
290 | unsigned int ctx_count; | ||
291 | unsigned int ctx_usecount; | ||
292 | unsigned int ctx_count_top; | ||
293 | |||
294 | u32 firmware_version; | ||
295 | |||
296 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
297 | |||
298 | u32 shader_parameter; | ||
299 | }; | ||
300 | |||
301 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
302 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
303 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
304 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
305 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
306 | struct dma_buf *compbits_buf, | ||
307 | u64 compbits_byte_offset, | ||
308 | u64 scatterbuffer_byte_offset, | ||
309 | struct nvgpu_channel_fence *fence, | ||
310 | u32 __flags, struct gk20a_cde_param *params, | ||
311 | int num_params, struct gk20a_fence **fence_out); | ||
312 | |||
313 | int gk20a_prepare_compressible_read( | ||
314 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
315 | u64 compbits_hoffset, u64 compbits_voffset, | ||
316 | u64 scatterbuffer_offset, | ||
317 | u32 width, u32 height, u32 block_height_log2, | ||
318 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
319 | u32 *valid_compbits, u32 *zbc_color, | ||
320 | struct gk20a_fence **fence_out); | ||
321 | int gk20a_mark_compressible_write( | ||
322 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
323 | u32 zbc_color); | ||
324 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); | ||
325 | |||
326 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.c b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c new file mode 100644 index 00000000..1cd15c54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.c | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * GM20B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "cde_gm20b.h" | ||
27 | |||
28 | enum programs { | ||
29 | PROG_HPASS = 0, | ||
30 | PROG_VPASS_LARGE = 1, | ||
31 | PROG_VPASS_SMALL = 2, | ||
32 | PROG_HPASS_DEBUG = 3, | ||
33 | PROG_VPASS_LARGE_DEBUG = 4, | ||
34 | PROG_VPASS_SMALL_DEBUG = 5, | ||
35 | PROG_PASSTHROUGH = 6, | ||
36 | }; | ||
37 | |||
38 | static void gm20b_cde_get_program_numbers(struct gk20a *g, | ||
39 | u32 block_height_log2, | ||
40 | u32 shader_parameter, | ||
41 | int *hprog_out, int *vprog_out) | ||
42 | { | ||
43 | int hprog = PROG_HPASS; | ||
44 | int vprog = (block_height_log2 >= 2) ? | ||
45 | PROG_VPASS_LARGE : PROG_VPASS_SMALL; | ||
46 | if (shader_parameter == 1) { | ||
47 | hprog = PROG_PASSTHROUGH; | ||
48 | vprog = PROG_PASSTHROUGH; | ||
49 | } else if (shader_parameter == 2) { | ||
50 | hprog = PROG_HPASS_DEBUG; | ||
51 | vprog = (block_height_log2 >= 2) ? | ||
52 | PROG_VPASS_LARGE_DEBUG : | ||
53 | PROG_VPASS_SMALL_DEBUG; | ||
54 | } | ||
55 | |||
56 | *hprog_out = hprog; | ||
57 | *vprog_out = vprog; | ||
58 | } | ||
59 | |||
60 | struct nvgpu_os_linux_ops gm20b_cde_ops = { | ||
61 | .cde = { | ||
62 | .get_program_numbers = gm20b_cde_get_program_numbers, | ||
63 | }, | ||
64 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gm20b.h b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h new file mode 100644 index 00000000..640d6ab6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gm20b.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * GM20B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GM20B_CDE | ||
26 | #define _NVHOST_GM20B_CDE | ||
27 | |||
28 | #include "os_linux.h" | ||
29 | |||
30 | extern struct nvgpu_os_linux_ops gm20b_cde_ops; | ||
31 | |||
32 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.c b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c new file mode 100644 index 00000000..5c0e79a7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.c | |||
@@ -0,0 +1,161 @@ | |||
1 | /* | ||
2 | * GP10B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "cde_gp10b.h" | ||
27 | |||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/dma.h> | ||
30 | |||
31 | enum gp10b_programs { | ||
32 | GP10B_PROG_HPASS = 0, | ||
33 | GP10B_PROG_HPASS_4K = 1, | ||
34 | GP10B_PROG_VPASS = 2, | ||
35 | GP10B_PROG_VPASS_4K = 3, | ||
36 | GP10B_PROG_HPASS_DEBUG = 4, | ||
37 | GP10B_PROG_HPASS_4K_DEBUG = 5, | ||
38 | GP10B_PROG_VPASS_DEBUG = 6, | ||
39 | GP10B_PROG_VPASS_4K_DEBUG = 7, | ||
40 | GP10B_PROG_PASSTHROUGH = 8, | ||
41 | }; | ||
42 | |||
43 | void gp10b_cde_get_program_numbers(struct gk20a *g, | ||
44 | u32 block_height_log2, | ||
45 | u32 shader_parameter, | ||
46 | int *hprog_out, int *vprog_out) | ||
47 | { | ||
48 | int hprog, vprog; | ||
49 | |||
50 | if (shader_parameter == 1) { | ||
51 | hprog = GP10B_PROG_PASSTHROUGH; | ||
52 | vprog = GP10B_PROG_PASSTHROUGH; | ||
53 | } else { | ||
54 | hprog = GP10B_PROG_HPASS; | ||
55 | vprog = GP10B_PROG_VPASS; | ||
56 | if (shader_parameter == 2) { | ||
57 | hprog = GP10B_PROG_HPASS_DEBUG; | ||
58 | vprog = GP10B_PROG_VPASS_DEBUG; | ||
59 | } | ||
60 | if (!nvgpu_iommuable(g)) { | ||
61 | if (!g->mm.disable_bigpage) { | ||
62 | nvgpu_warn(g, | ||
63 | "When no IOMMU big pages cannot be used"); | ||
64 | } | ||
65 | hprog |= 1; | ||
66 | vprog |= 1; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | *hprog_out = hprog; | ||
71 | *vprog_out = vprog; | ||
72 | } | ||
73 | |||
74 | bool gp10b_need_scatter_buffer(struct gk20a *g) | ||
75 | { | ||
76 | return !nvgpu_iommuable(g); | ||
77 | } | ||
78 | |||
79 | static u8 parity(u32 a) | ||
80 | { | ||
81 | a ^= a>>16u; | ||
82 | a ^= a>>8u; | ||
83 | a ^= a>>4u; | ||
84 | a &= 0xfu; | ||
85 | return (0x6996u >> a) & 1u; | ||
86 | } | ||
87 | |||
88 | int gp10b_populate_scatter_buffer(struct gk20a *g, | ||
89 | struct sg_table *sgt, | ||
90 | size_t surface_size, | ||
91 | void *scatter_buffer_ptr, | ||
92 | size_t scatter_buffer_size) | ||
93 | { | ||
94 | /* map scatter buffer to CPU VA and fill it */ | ||
95 | const u32 page_size_log2 = 12; | ||
96 | const u32 page_size = 1 << page_size_log2; | ||
97 | const u32 page_size_shift = page_size_log2 - 7u; | ||
98 | |||
99 | /* 0011 1111 1111 1111 1111 1110 0100 1000 */ | ||
100 | const u32 getSliceMaskGP10B = 0x3ffffe48; | ||
101 | u8 *scatter_buffer = scatter_buffer_ptr; | ||
102 | |||
103 | size_t i; | ||
104 | struct scatterlist *sg = NULL; | ||
105 | u8 d = 0; | ||
106 | size_t page = 0; | ||
107 | size_t pages_left; | ||
108 | |||
109 | surface_size = round_up(surface_size, page_size); | ||
110 | |||
111 | pages_left = surface_size >> page_size_log2; | ||
112 | if ((pages_left >> 3) > scatter_buffer_size) | ||
113 | return -ENOMEM; | ||
114 | |||
115 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { | ||
116 | unsigned int j; | ||
117 | u64 surf_pa = sg_phys(sg); | ||
118 | unsigned int n = (int)(sg->length >> page_size_log2); | ||
119 | |||
120 | nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); | ||
121 | |||
122 | for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { | ||
123 | u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; | ||
124 | u8 scatter_bit = parity(addr); | ||
125 | u8 bit = page & 7; | ||
126 | |||
127 | d |= scatter_bit << bit; | ||
128 | if (bit == 7) { | ||
129 | scatter_buffer[page >> 3] = d; | ||
130 | d = 0; | ||
131 | } | ||
132 | |||
133 | ++page; | ||
134 | --pages_left; | ||
135 | } | ||
136 | |||
137 | if (pages_left == 0) | ||
138 | break; | ||
139 | } | ||
140 | |||
141 | /* write the last byte in case the number of pages is not divisible by 8 */ | ||
142 | if ((page & 7) != 0) | ||
143 | scatter_buffer[page >> 3] = d; | ||
144 | |||
145 | if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) { | ||
146 | nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:"); | ||
147 | for (i = 0; i < page >> 3; i++) { | ||
148 | nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | struct nvgpu_os_linux_ops gp10b_cde_ops = { | ||
156 | .cde = { | ||
157 | .get_program_numbers = gp10b_cde_get_program_numbers, | ||
158 | .need_scatter_buffer = gp10b_need_scatter_buffer, | ||
159 | .populate_scatter_buffer = gp10b_populate_scatter_buffer, | ||
160 | }, | ||
161 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cde_gp10b.h b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h new file mode 100644 index 00000000..52e9f292 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cde_gp10b.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * GP10B CDE | ||
3 | * | ||
4 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NVHOST_GP10B_CDE | ||
26 | #define _NVHOST_GP10B_CDE | ||
27 | |||
28 | #include "os_linux.h" | ||
29 | |||
30 | extern struct nvgpu_os_linux_ops gp10b_cde_ops; | ||
31 | |||
32 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c new file mode 100644 index 00000000..165f33db --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ce2.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/types.h> | ||
18 | |||
19 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
20 | |||
21 | #include "gk20a/ce2_gk20a.h" | ||
22 | #include "gk20a/gk20a.h" | ||
23 | #include "channel.h" | ||
24 | |||
25 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | ||
26 | { | ||
27 | /* there is no local memory available, | ||
28 | don't allow local memory related CE flags */ | ||
29 | if (!g->mm.vidmem.size) { | ||
30 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
31 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
32 | } | ||
33 | return launch_flags; | ||
34 | } | ||
35 | |||
36 | int gk20a_ce_execute_ops(struct gk20a *g, | ||
37 | u32 ce_ctx_id, | ||
38 | u64 src_buf, | ||
39 | u64 dst_buf, | ||
40 | u64 size, | ||
41 | unsigned int payload, | ||
42 | int launch_flags, | ||
43 | int request_operation, | ||
44 | u32 submit_flags, | ||
45 | struct gk20a_fence **gk20a_fence_out) | ||
46 | { | ||
47 | int ret = -EPERM; | ||
48 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
49 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
50 | bool found = false; | ||
51 | u32 *cmd_buf_cpu_va; | ||
52 | u64 cmd_buf_gpu_va = 0; | ||
53 | u32 methodSize; | ||
54 | u32 cmd_buf_read_offset; | ||
55 | u32 dma_copy_class; | ||
56 | struct nvgpu_gpfifo_entry gpfifo; | ||
57 | struct nvgpu_channel_fence fence = {0, 0}; | ||
58 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
59 | |||
60 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
61 | goto end; | ||
62 | |||
63 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
64 | |||
65 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
66 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
67 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
68 | found = true; | ||
69 | break; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
74 | |||
75 | if (!found) { | ||
76 | ret = -EINVAL; | ||
77 | goto end; | ||
78 | } | ||
79 | |||
80 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
81 | ret = -ENODEV; | ||
82 | goto end; | ||
83 | } | ||
84 | |||
85 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
86 | |||
87 | ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; | ||
88 | |||
89 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
90 | (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); | ||
91 | |||
92 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
93 | |||
94 | if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { | ||
95 | struct gk20a_fence **prev_post_fence = | ||
96 | &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; | ||
97 | |||
98 | ret = gk20a_fence_wait(g, *prev_post_fence, | ||
99 | gk20a_get_gr_idle_timeout(g)); | ||
100 | |||
101 | gk20a_fence_put(*prev_post_fence); | ||
102 | *prev_post_fence = NULL; | ||
103 | if (ret) | ||
104 | goto noop; | ||
105 | } | ||
106 | |||
107 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
108 | |||
109 | dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
110 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
111 | dst_buf, | ||
112 | size, | ||
113 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
114 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, | ||
115 | payload, | ||
116 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
117 | request_operation, | ||
118 | dma_copy_class); | ||
119 | |||
120 | if (methodSize) { | ||
121 | /* store the element into gpfifo */ | ||
122 | gpfifo.entry0 = | ||
123 | u64_lo32(cmd_buf_gpu_va); | ||
124 | gpfifo.entry1 = | ||
125 | (u64_hi32(cmd_buf_gpu_va) | | ||
126 | pbdma_gp_entry1_length_f(methodSize)); | ||
127 | |||
128 | /* take always the postfence as it is needed for protecting the ce context */ | ||
129 | submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
130 | |||
131 | nvgpu_smp_wmb(); | ||
132 | |||
133 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | ||
134 | 1, submit_flags, &fence, | ||
135 | &ce_cmd_buf_fence_out, NULL); | ||
136 | |||
137 | if (!ret) { | ||
138 | ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = | ||
139 | ce_cmd_buf_fence_out; | ||
140 | if (gk20a_fence_out) { | ||
141 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
142 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
143 | } | ||
144 | |||
145 | /* Next available command buffer queue Index */ | ||
146 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
147 | } | ||
148 | } else { | ||
149 | ret = -ENOMEM; | ||
150 | } | ||
151 | noop: | ||
152 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
153 | end: | ||
154 | return ret; | ||
155 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c new file mode 100644 index 00000000..7810bc21 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/channel.c | |||
@@ -0,0 +1,1021 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/enabled.h> | ||
18 | #include <nvgpu/debug.h> | ||
19 | #include <nvgpu/ltc.h> | ||
20 | #include <nvgpu/error_notifier.h> | ||
21 | #include <nvgpu/os_sched.h> | ||
22 | |||
23 | /* | ||
24 | * This is required for nvgpu_vm_find_buf() which is used in the tracing | ||
25 | * code. Once we can get and access userspace buffers without requiring | ||
26 | * direct dma_buf usage this can be removed. | ||
27 | */ | ||
28 | #include <nvgpu/linux/vm.h> | ||
29 | |||
30 | #include "gk20a/gk20a.h" | ||
31 | |||
32 | #include "channel.h" | ||
33 | #include "ioctl_channel.h" | ||
34 | #include "os_linux.h" | ||
35 | |||
36 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
37 | |||
38 | #include <linux/uaccess.h> | ||
39 | #include <linux/dma-buf.h> | ||
40 | #include <trace/events/gk20a.h> | ||
41 | #include <uapi/linux/nvgpu.h> | ||
42 | |||
43 | #include "sync_sema_android.h" | ||
44 | |||
45 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags) | ||
46 | { | ||
47 | u32 flags = 0; | ||
48 | |||
49 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) | ||
50 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT; | ||
51 | |||
52 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
53 | flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
54 | |||
55 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT) | ||
56 | flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT; | ||
57 | |||
58 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | ||
59 | flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE; | ||
60 | |||
61 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI) | ||
62 | flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI; | ||
63 | |||
64 | if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING) | ||
65 | flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING; | ||
66 | |||
67 | return flags; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * API to convert error_notifiers in common code and of the form | ||
72 | * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user | ||
73 | * space and of the form NVGPU_CHANNEL_* | ||
74 | */ | ||
75 | static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier) | ||
76 | { | ||
77 | switch (error_notifier) { | ||
78 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT: | ||
79 | return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT; | ||
80 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD: | ||
81 | return NVGPU_CHANNEL_GR_ERROR_SW_METHOD; | ||
82 | case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY: | ||
83 | return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY; | ||
84 | case NVGPU_ERR_NOTIFIER_GR_EXCEPTION: | ||
85 | return NVGPU_CHANNEL_GR_EXCEPTION; | ||
86 | case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT: | ||
87 | return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT; | ||
88 | case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY: | ||
89 | return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY; | ||
90 | case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT: | ||
91 | return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT; | ||
92 | case NVGPU_ERR_NOTIFIER_PBDMA_ERROR: | ||
93 | return NVGPU_CHANNEL_PBDMA_ERROR; | ||
94 | case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD: | ||
95 | return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD; | ||
96 | case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR: | ||
97 | return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR; | ||
98 | case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH: | ||
99 | return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH; | ||
100 | } | ||
101 | |||
102 | pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier); | ||
103 | |||
104 | return error_notifier; | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * nvgpu_set_error_notifier_locked() | ||
109 | * Should be called with ch->error_notifier_mutex held | ||
110 | * | ||
111 | * error should be of the form NVGPU_ERR_NOTIFIER_* | ||
112 | */ | ||
113 | void nvgpu_set_error_notifier_locked(struct channel_gk20a *ch, u32 error) | ||
114 | { | ||
115 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
116 | |||
117 | error = nvgpu_error_notifier_to_channel_notifier(error); | ||
118 | |||
119 | if (priv->error_notifier.dmabuf) { | ||
120 | struct nvgpu_notification *notification = | ||
121 | priv->error_notifier.notification; | ||
122 | struct timespec time_data; | ||
123 | u64 nsec; | ||
124 | |||
125 | getnstimeofday(&time_data); | ||
126 | nsec = ((u64)time_data.tv_sec) * 1000000000u + | ||
127 | (u64)time_data.tv_nsec; | ||
128 | notification->time_stamp.nanoseconds[0] = | ||
129 | (u32)nsec; | ||
130 | notification->time_stamp.nanoseconds[1] = | ||
131 | (u32)(nsec >> 32); | ||
132 | notification->info32 = error; | ||
133 | notification->status = 0xffff; | ||
134 | |||
135 | nvgpu_err(ch->g, | ||
136 | "error notifier set to %d for ch %d", error, ch->chid); | ||
137 | } | ||
138 | } | ||
139 | |||
140 | /* error should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
141 | void nvgpu_set_error_notifier(struct channel_gk20a *ch, u32 error) | ||
142 | { | ||
143 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
144 | |||
145 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
146 | nvgpu_set_error_notifier_locked(ch, error); | ||
147 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
148 | } | ||
149 | |||
150 | void nvgpu_set_error_notifier_if_empty(struct channel_gk20a *ch, u32 error) | ||
151 | { | ||
152 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
153 | |||
154 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
155 | if (priv->error_notifier.dmabuf) { | ||
156 | struct nvgpu_notification *notification = | ||
157 | priv->error_notifier.notification; | ||
158 | |||
159 | /* Don't overwrite error flag if it is already set */ | ||
160 | if (notification->status != 0xffff) | ||
161 | nvgpu_set_error_notifier_locked(ch, error); | ||
162 | } | ||
163 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
164 | } | ||
165 | |||
166 | /* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */ | ||
167 | bool nvgpu_is_error_notifier_set(struct channel_gk20a *ch, u32 error_notifier) | ||
168 | { | ||
169 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
170 | bool notifier_set = false; | ||
171 | |||
172 | error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier); | ||
173 | |||
174 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
175 | if (priv->error_notifier.dmabuf) { | ||
176 | struct nvgpu_notification *notification = | ||
177 | priv->error_notifier.notification; | ||
178 | u32 err = notification->info32; | ||
179 | |||
180 | if (err == error_notifier) | ||
181 | notifier_set = true; | ||
182 | } | ||
183 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
184 | |||
185 | return notifier_set; | ||
186 | } | ||
187 | |||
188 | static void gk20a_channel_update_runcb_fn(struct work_struct *work) | ||
189 | { | ||
190 | struct nvgpu_channel_completion_cb *completion_cb = | ||
191 | container_of(work, struct nvgpu_channel_completion_cb, work); | ||
192 | struct nvgpu_channel_linux *priv = | ||
193 | container_of(completion_cb, | ||
194 | struct nvgpu_channel_linux, completion_cb); | ||
195 | struct channel_gk20a *ch = priv->ch; | ||
196 | void (*fn)(struct channel_gk20a *, void *); | ||
197 | void *user_data; | ||
198 | |||
199 | nvgpu_spinlock_acquire(&completion_cb->lock); | ||
200 | fn = completion_cb->fn; | ||
201 | user_data = completion_cb->user_data; | ||
202 | nvgpu_spinlock_release(&completion_cb->lock); | ||
203 | |||
204 | if (fn) | ||
205 | fn(ch, user_data); | ||
206 | } | ||
207 | |||
208 | static void nvgpu_channel_work_completion_init(struct channel_gk20a *ch) | ||
209 | { | ||
210 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
211 | |||
212 | priv->completion_cb.fn = NULL; | ||
213 | priv->completion_cb.user_data = NULL; | ||
214 | nvgpu_spinlock_init(&priv->completion_cb.lock); | ||
215 | INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn); | ||
216 | } | ||
217 | |||
218 | static void nvgpu_channel_work_completion_clear(struct channel_gk20a *ch) | ||
219 | { | ||
220 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
221 | |||
222 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
223 | priv->completion_cb.fn = NULL; | ||
224 | priv->completion_cb.user_data = NULL; | ||
225 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
226 | cancel_work_sync(&priv->completion_cb.work); | ||
227 | } | ||
228 | |||
229 | static void nvgpu_channel_work_completion_signal(struct channel_gk20a *ch) | ||
230 | { | ||
231 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
232 | |||
233 | if (priv->completion_cb.fn) | ||
234 | schedule_work(&priv->completion_cb.work); | ||
235 | } | ||
236 | |||
237 | static void nvgpu_channel_work_completion_cancel_sync(struct channel_gk20a *ch) | ||
238 | { | ||
239 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
240 | |||
241 | if (priv->completion_cb.fn) | ||
242 | cancel_work_sync(&priv->completion_cb.work); | ||
243 | } | ||
244 | |||
245 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
246 | void (*update_fn)(struct channel_gk20a *, void *), | ||
247 | void *update_fn_data, | ||
248 | int runlist_id, | ||
249 | bool is_privileged_channel) | ||
250 | { | ||
251 | struct channel_gk20a *ch; | ||
252 | struct nvgpu_channel_linux *priv; | ||
253 | |||
254 | ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel, | ||
255 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
256 | |||
257 | if (ch) { | ||
258 | priv = ch->os_priv; | ||
259 | nvgpu_spinlock_acquire(&priv->completion_cb.lock); | ||
260 | priv->completion_cb.fn = update_fn; | ||
261 | priv->completion_cb.user_data = update_fn_data; | ||
262 | nvgpu_spinlock_release(&priv->completion_cb.lock); | ||
263 | } | ||
264 | |||
265 | return ch; | ||
266 | } | ||
267 | |||
268 | static void nvgpu_channel_open_linux(struct channel_gk20a *ch) | ||
269 | { | ||
270 | } | ||
271 | |||
272 | static void nvgpu_channel_close_linux(struct channel_gk20a *ch) | ||
273 | { | ||
274 | nvgpu_channel_work_completion_clear(ch); | ||
275 | |||
276 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
277 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
278 | gk20a_channel_free_cycle_stats_snapshot(ch); | ||
279 | #endif | ||
280 | } | ||
281 | |||
282 | static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
283 | { | ||
284 | struct nvgpu_channel_linux *priv; | ||
285 | int err; | ||
286 | |||
287 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
288 | if (!priv) | ||
289 | return -ENOMEM; | ||
290 | |||
291 | ch->os_priv = priv; | ||
292 | priv->ch = ch; | ||
293 | |||
294 | #ifdef CONFIG_SYNC | ||
295 | ch->has_os_fence_framework_support = true; | ||
296 | #endif | ||
297 | |||
298 | err = nvgpu_mutex_init(&priv->error_notifier.mutex); | ||
299 | if (err) { | ||
300 | nvgpu_kfree(g, priv); | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | nvgpu_channel_work_completion_init(ch); | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | static void nvgpu_channel_free_linux(struct gk20a *g, struct channel_gk20a *ch) | ||
310 | { | ||
311 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
312 | |||
313 | nvgpu_mutex_destroy(&priv->error_notifier.mutex); | ||
314 | nvgpu_kfree(g, priv); | ||
315 | |||
316 | ch->os_priv = NULL; | ||
317 | |||
318 | #ifdef CONFIG_SYNC | ||
319 | ch->has_os_fence_framework_support = false; | ||
320 | #endif | ||
321 | } | ||
322 | |||
323 | static int nvgpu_channel_init_os_fence_framework(struct channel_gk20a *ch, | ||
324 | const char *fmt, ...) | ||
325 | { | ||
326 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
327 | struct nvgpu_os_fence_framework *fence_framework; | ||
328 | char name[30]; | ||
329 | va_list args; | ||
330 | |||
331 | fence_framework = &priv->fence_framework; | ||
332 | |||
333 | va_start(args, fmt); | ||
334 | vsnprintf(name, sizeof(name), fmt, args); | ||
335 | va_end(args); | ||
336 | |||
337 | fence_framework->timeline = gk20a_sync_timeline_create(name); | ||
338 | |||
339 | if (!fence_framework->timeline) | ||
340 | return -EINVAL; | ||
341 | |||
342 | return 0; | ||
343 | } | ||
344 | static void nvgpu_channel_signal_os_fence_framework(struct channel_gk20a *ch) | ||
345 | { | ||
346 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
347 | struct nvgpu_os_fence_framework *fence_framework; | ||
348 | |||
349 | fence_framework = &priv->fence_framework; | ||
350 | |||
351 | gk20a_sync_timeline_signal(fence_framework->timeline); | ||
352 | } | ||
353 | |||
354 | static void nvgpu_channel_destroy_os_fence_framework(struct channel_gk20a *ch) | ||
355 | { | ||
356 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
357 | struct nvgpu_os_fence_framework *fence_framework; | ||
358 | |||
359 | fence_framework = &priv->fence_framework; | ||
360 | |||
361 | gk20a_sync_timeline_destroy(fence_framework->timeline); | ||
362 | fence_framework->timeline = NULL; | ||
363 | } | ||
364 | |||
365 | static bool nvgpu_channel_fence_framework_exists(struct channel_gk20a *ch) | ||
366 | { | ||
367 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
368 | struct nvgpu_os_fence_framework *fence_framework; | ||
369 | |||
370 | fence_framework = &priv->fence_framework; | ||
371 | |||
372 | return (fence_framework->timeline != NULL); | ||
373 | } | ||
374 | |||
375 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l) | ||
376 | { | ||
377 | struct gk20a *g = &l->g; | ||
378 | struct fifo_gk20a *f = &g->fifo; | ||
379 | int chid; | ||
380 | int err; | ||
381 | |||
382 | for (chid = 0; chid < (int)f->num_channels; chid++) { | ||
383 | struct channel_gk20a *ch = &f->channel[chid]; | ||
384 | |||
385 | err = nvgpu_channel_alloc_linux(g, ch); | ||
386 | if (err) | ||
387 | goto err_clean; | ||
388 | } | ||
389 | |||
390 | g->os_channel.open = nvgpu_channel_open_linux; | ||
391 | g->os_channel.close = nvgpu_channel_close_linux; | ||
392 | g->os_channel.work_completion_signal = | ||
393 | nvgpu_channel_work_completion_signal; | ||
394 | g->os_channel.work_completion_cancel_sync = | ||
395 | nvgpu_channel_work_completion_cancel_sync; | ||
396 | |||
397 | g->os_channel.os_fence_framework_inst_exists = | ||
398 | nvgpu_channel_fence_framework_exists; | ||
399 | g->os_channel.init_os_fence_framework = | ||
400 | nvgpu_channel_init_os_fence_framework; | ||
401 | g->os_channel.signal_os_fence_framework = | ||
402 | nvgpu_channel_signal_os_fence_framework; | ||
403 | g->os_channel.destroy_os_fence_framework = | ||
404 | nvgpu_channel_destroy_os_fence_framework; | ||
405 | |||
406 | return 0; | ||
407 | |||
408 | err_clean: | ||
409 | for (; chid >= 0; chid--) { | ||
410 | struct channel_gk20a *ch = &f->channel[chid]; | ||
411 | |||
412 | nvgpu_channel_free_linux(g, ch); | ||
413 | } | ||
414 | return err; | ||
415 | } | ||
416 | |||
417 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l) | ||
418 | { | ||
419 | struct gk20a *g = &l->g; | ||
420 | struct fifo_gk20a *f = &g->fifo; | ||
421 | unsigned int chid; | ||
422 | |||
423 | for (chid = 0; chid < f->num_channels; chid++) { | ||
424 | struct channel_gk20a *ch = &f->channel[chid]; | ||
425 | |||
426 | nvgpu_channel_free_linux(g, ch); | ||
427 | } | ||
428 | |||
429 | g->os_channel.os_fence_framework_inst_exists = NULL; | ||
430 | g->os_channel.init_os_fence_framework = NULL; | ||
431 | g->os_channel.signal_os_fence_framework = NULL; | ||
432 | g->os_channel.destroy_os_fence_framework = NULL; | ||
433 | } | ||
434 | |||
435 | u32 nvgpu_get_gpfifo_entry_size(void) | ||
436 | { | ||
437 | return sizeof(struct nvgpu_gpfifo_entry); | ||
438 | } | ||
439 | |||
440 | #ifdef CONFIG_DEBUG_FS | ||
441 | static void trace_write_pushbuffer(struct channel_gk20a *c, | ||
442 | struct nvgpu_gpfifo_entry *g) | ||
443 | { | ||
444 | void *mem = NULL; | ||
445 | unsigned int words; | ||
446 | u64 offset; | ||
447 | struct dma_buf *dmabuf = NULL; | ||
448 | |||
449 | if (gk20a_debug_trace_cmdbuf) { | ||
450 | u64 gpu_va = (u64)g->entry0 | | ||
451 | (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32); | ||
452 | int err; | ||
453 | |||
454 | words = pbdma_gp_entry1_length_v(g->entry1); | ||
455 | err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset); | ||
456 | if (!err) | ||
457 | mem = dma_buf_vmap(dmabuf); | ||
458 | } | ||
459 | |||
460 | if (mem) { | ||
461 | u32 i; | ||
462 | /* | ||
463 | * Write in batches of 128 as there seems to be a limit | ||
464 | * of how much you can output to ftrace at once. | ||
465 | */ | ||
466 | for (i = 0; i < words; i += 128U) { | ||
467 | trace_gk20a_push_cmdbuf( | ||
468 | c->g->name, | ||
469 | 0, | ||
470 | min(words - i, 128U), | ||
471 | offset + i * sizeof(u32), | ||
472 | mem); | ||
473 | } | ||
474 | dma_buf_vunmap(dmabuf, mem); | ||
475 | } | ||
476 | } | ||
477 | #endif | ||
478 | |||
479 | static void trace_write_pushbuffer_range(struct channel_gk20a *c, | ||
480 | struct nvgpu_gpfifo_entry *g, | ||
481 | struct nvgpu_gpfifo_entry __user *user_gpfifo, | ||
482 | int offset, | ||
483 | int count) | ||
484 | { | ||
485 | #ifdef CONFIG_DEBUG_FS | ||
486 | u32 size; | ||
487 | int i; | ||
488 | struct nvgpu_gpfifo_entry *gp; | ||
489 | bool gpfifo_allocated = false; | ||
490 | |||
491 | if (!gk20a_debug_trace_cmdbuf) | ||
492 | return; | ||
493 | |||
494 | if (!g && !user_gpfifo) | ||
495 | return; | ||
496 | |||
497 | if (!g) { | ||
498 | size = count * sizeof(struct nvgpu_gpfifo_entry); | ||
499 | if (size) { | ||
500 | g = nvgpu_big_malloc(c->g, size); | ||
501 | if (!g) | ||
502 | return; | ||
503 | |||
504 | if (copy_from_user(g, user_gpfifo, size)) { | ||
505 | nvgpu_big_free(c->g, g); | ||
506 | return; | ||
507 | } | ||
508 | } | ||
509 | gpfifo_allocated = true; | ||
510 | } | ||
511 | |||
512 | gp = g + offset; | ||
513 | for (i = 0; i < count; i++, gp++) | ||
514 | trace_write_pushbuffer(c, gp); | ||
515 | |||
516 | if (gpfifo_allocated) | ||
517 | nvgpu_big_free(c->g, g); | ||
518 | #endif | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Handle the submit synchronization - pre-fences and post-fences. | ||
523 | */ | ||
524 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | ||
525 | struct nvgpu_channel_fence *fence, | ||
526 | struct channel_gk20a_job *job, | ||
527 | struct priv_cmd_entry **wait_cmd, | ||
528 | struct priv_cmd_entry **incr_cmd, | ||
529 | struct gk20a_fence **post_fence, | ||
530 | bool register_irq, | ||
531 | u32 flags) | ||
532 | { | ||
533 | struct gk20a *g = c->g; | ||
534 | bool need_sync_fence = false; | ||
535 | bool new_sync_created = false; | ||
536 | int wait_fence_fd = -1; | ||
537 | int err = 0; | ||
538 | bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI); | ||
539 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
540 | |||
541 | if (g->aggressive_sync_destroy_thresh) { | ||
542 | nvgpu_mutex_acquire(&c->sync_lock); | ||
543 | if (!c->sync) { | ||
544 | c->sync = gk20a_channel_sync_create(c, false); | ||
545 | if (!c->sync) { | ||
546 | err = -ENOMEM; | ||
547 | nvgpu_mutex_release(&c->sync_lock); | ||
548 | goto fail; | ||
549 | } | ||
550 | new_sync_created = true; | ||
551 | } | ||
552 | nvgpu_atomic_inc(&c->sync->refcount); | ||
553 | nvgpu_mutex_release(&c->sync_lock); | ||
554 | } | ||
555 | |||
556 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | ||
557 | err = g->ops.fifo.resetup_ramfc(c); | ||
558 | if (err) | ||
559 | goto fail; | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * Optionally insert syncpt/semaphore wait in the beginning of gpfifo | ||
564 | * submission when user requested and the wait hasn't expired. | ||
565 | */ | ||
566 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) { | ||
567 | int max_wait_cmds = c->deterministic ? 1 : 0; | ||
568 | |||
569 | if (!pre_alloc_enabled) | ||
570 | job->wait_cmd = nvgpu_kzalloc(g, | ||
571 | sizeof(struct priv_cmd_entry)); | ||
572 | |||
573 | if (!job->wait_cmd) { | ||
574 | err = -ENOMEM; | ||
575 | goto fail; | ||
576 | } | ||
577 | |||
578 | if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
579 | wait_fence_fd = fence->id; | ||
580 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | ||
581 | job->wait_cmd, max_wait_cmds); | ||
582 | } else { | ||
583 | err = c->sync->wait_syncpt(c->sync, fence->id, | ||
584 | fence->value, | ||
585 | job->wait_cmd); | ||
586 | } | ||
587 | |||
588 | if (err) | ||
589 | goto clean_up_wait_cmd; | ||
590 | |||
591 | if (job->wait_cmd->valid) | ||
592 | *wait_cmd = job->wait_cmd; | ||
593 | } | ||
594 | |||
595 | if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) && | ||
596 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) | ||
597 | need_sync_fence = true; | ||
598 | |||
599 | /* | ||
600 | * Always generate an increment at the end of a GPFIFO submission. This | ||
601 | * is used to keep track of method completion for idle railgating. The | ||
602 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | ||
603 | */ | ||
604 | job->post_fence = gk20a_alloc_fence(c); | ||
605 | if (!job->post_fence) { | ||
606 | err = -ENOMEM; | ||
607 | goto clean_up_wait_cmd; | ||
608 | } | ||
609 | if (!pre_alloc_enabled) | ||
610 | job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry)); | ||
611 | |||
612 | if (!job->incr_cmd) { | ||
613 | err = -ENOMEM; | ||
614 | goto clean_up_post_fence; | ||
615 | } | ||
616 | |||
617 | if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
618 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | ||
619 | job->post_fence, need_wfi, need_sync_fence, | ||
620 | register_irq); | ||
621 | else | ||
622 | err = c->sync->incr(c->sync, job->incr_cmd, | ||
623 | job->post_fence, need_sync_fence, | ||
624 | register_irq); | ||
625 | if (!err) { | ||
626 | *incr_cmd = job->incr_cmd; | ||
627 | *post_fence = job->post_fence; | ||
628 | } else | ||
629 | goto clean_up_incr_cmd; | ||
630 | |||
631 | return 0; | ||
632 | |||
633 | clean_up_incr_cmd: | ||
634 | free_priv_cmdbuf(c, job->incr_cmd); | ||
635 | if (!pre_alloc_enabled) | ||
636 | job->incr_cmd = NULL; | ||
637 | clean_up_post_fence: | ||
638 | gk20a_fence_put(job->post_fence); | ||
639 | job->post_fence = NULL; | ||
640 | clean_up_wait_cmd: | ||
641 | free_priv_cmdbuf(c, job->wait_cmd); | ||
642 | if (!pre_alloc_enabled) | ||
643 | job->wait_cmd = NULL; | ||
644 | fail: | ||
645 | *wait_cmd = NULL; | ||
646 | return err; | ||
647 | } | ||
648 | |||
649 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
650 | struct priv_cmd_entry *cmd) | ||
651 | { | ||
652 | struct gk20a *g = c->g; | ||
653 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
654 | struct nvgpu_gpfifo_entry x = { | ||
655 | .entry0 = u64_lo32(cmd->gva), | ||
656 | .entry1 = u64_hi32(cmd->gva) | | ||
657 | pbdma_gp_entry1_length_f(cmd->size) | ||
658 | }; | ||
659 | |||
660 | nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
661 | &x, sizeof(x)); | ||
662 | |||
663 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
664 | trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, | ||
665 | cmd->mem->cpu_va + cmd->off * sizeof(u32)); | ||
666 | |||
667 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
672 | * splitting into two memcpys to handle wrap-around. | ||
673 | */ | ||
674 | static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | ||
675 | struct nvgpu_gpfifo_entry *kern_gpfifo, | ||
676 | struct nvgpu_gpfifo_entry __user *user_gpfifo, | ||
677 | u32 num_entries) | ||
678 | { | ||
679 | /* byte offsets */ | ||
680 | u32 gpfifo_size = | ||
681 | c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry); | ||
682 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry); | ||
683 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry); | ||
684 | u32 end = start + len; /* exclusive */ | ||
685 | struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; | ||
686 | struct nvgpu_gpfifo_entry *cpu_src; | ||
687 | int err; | ||
688 | |||
689 | if (user_gpfifo && !c->gpfifo.pipe) { | ||
690 | /* | ||
691 | * This path (from userspace to sysmem) is special in order to | ||
692 | * avoid two copies unnecessarily (from user to pipe, then from | ||
693 | * pipe to gpu sysmem buffer). | ||
694 | */ | ||
695 | if (end > gpfifo_size) { | ||
696 | /* wrap-around */ | ||
697 | int length0 = gpfifo_size - start; | ||
698 | int length1 = len - length0; | ||
699 | void __user *user2 = (u8 __user *)user_gpfifo + length0; | ||
700 | |||
701 | err = copy_from_user(gpfifo_mem->cpu_va + start, | ||
702 | user_gpfifo, length0); | ||
703 | if (err) | ||
704 | return err; | ||
705 | |||
706 | err = copy_from_user(gpfifo_mem->cpu_va, | ||
707 | user2, length1); | ||
708 | if (err) | ||
709 | return err; | ||
710 | } else { | ||
711 | err = copy_from_user(gpfifo_mem->cpu_va + start, | ||
712 | user_gpfifo, len); | ||
713 | if (err) | ||
714 | return err; | ||
715 | } | ||
716 | |||
717 | trace_write_pushbuffer_range(c, NULL, user_gpfifo, | ||
718 | 0, num_entries); | ||
719 | goto out; | ||
720 | } else if (user_gpfifo) { | ||
721 | /* from userspace to vidmem, use the common copy path below */ | ||
722 | err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len); | ||
723 | if (err) | ||
724 | return err; | ||
725 | |||
726 | cpu_src = c->gpfifo.pipe; | ||
727 | } else { | ||
728 | /* from kernel to either sysmem or vidmem, don't need | ||
729 | * copy_from_user so use the common path below */ | ||
730 | cpu_src = kern_gpfifo; | ||
731 | } | ||
732 | |||
733 | if (end > gpfifo_size) { | ||
734 | /* wrap-around */ | ||
735 | int length0 = gpfifo_size - start; | ||
736 | int length1 = len - length0; | ||
737 | void *src2 = (u8 *)cpu_src + length0; | ||
738 | |||
739 | nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0); | ||
740 | nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1); | ||
741 | } else { | ||
742 | nvgpu_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len); | ||
743 | |||
744 | } | ||
745 | |||
746 | trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); | ||
747 | |||
748 | out: | ||
749 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
750 | (c->gpfifo.entry_num - 1); | ||
751 | |||
752 | return 0; | ||
753 | } | ||
754 | |||
755 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | ||
756 | struct nvgpu_gpfifo_entry *gpfifo, | ||
757 | struct nvgpu_submit_gpfifo_args *args, | ||
758 | u32 num_entries, | ||
759 | u32 flags, | ||
760 | struct nvgpu_channel_fence *fence, | ||
761 | struct gk20a_fence **fence_out, | ||
762 | struct fifo_profile_gk20a *profile) | ||
763 | { | ||
764 | struct gk20a *g = c->g; | ||
765 | struct priv_cmd_entry *wait_cmd = NULL; | ||
766 | struct priv_cmd_entry *incr_cmd = NULL; | ||
767 | struct gk20a_fence *post_fence = NULL; | ||
768 | struct channel_gk20a_job *job = NULL; | ||
769 | /* we might need two extra gpfifo entries - one for pre fence | ||
770 | * and one for post fence. */ | ||
771 | const int extra_entries = 2; | ||
772 | bool skip_buffer_refcounting = (flags & | ||
773 | NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING); | ||
774 | int err = 0; | ||
775 | bool need_job_tracking; | ||
776 | bool need_deferred_cleanup = false; | ||
777 | struct nvgpu_gpfifo_entry __user *user_gpfifo = args ? | ||
778 | (struct nvgpu_gpfifo_entry __user *)(uintptr_t)args->gpfifo : NULL; | ||
779 | |||
780 | if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) | ||
781 | return -ENODEV; | ||
782 | |||
783 | if (c->has_timedout) | ||
784 | return -ETIMEDOUT; | ||
785 | |||
786 | if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) | ||
787 | return -ENOMEM; | ||
788 | |||
789 | /* fifo not large enough for request. Return error immediately. | ||
790 | * Kernel can insert gpfifo entries before and after user gpfifos. | ||
791 | * So, add extra_entries in user request. Also, HW with fifo size N | ||
792 | * can accept only N-1 entreis and so the below condition */ | ||
793 | if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) { | ||
794 | nvgpu_err(g, "not enough gpfifo space allocated"); | ||
795 | return -ENOMEM; | ||
796 | } | ||
797 | |||
798 | if (!gpfifo && !args) | ||
799 | return -EINVAL; | ||
800 | |||
801 | if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT | | ||
802 | NVGPU_SUBMIT_FLAGS_FENCE_GET)) && | ||
803 | !fence) | ||
804 | return -EINVAL; | ||
805 | |||
806 | /* an address space needs to have been bound at this point. */ | ||
807 | if (!gk20a_channel_as_bound(c)) { | ||
808 | nvgpu_err(g, | ||
809 | "not bound to an address space at time of gpfifo" | ||
810 | " submission."); | ||
811 | return -EINVAL; | ||
812 | } | ||
813 | |||
814 | gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY); | ||
815 | |||
816 | /* update debug settings */ | ||
817 | nvgpu_ltc_sync_enabled(g); | ||
818 | |||
819 | nvgpu_log_info(g, "channel %d", c->chid); | ||
820 | |||
821 | /* | ||
822 | * Job tracking is necessary for any of the following conditions: | ||
823 | * - pre- or post-fence functionality | ||
824 | * - channel wdt | ||
825 | * - GPU rail-gating with non-deterministic channels | ||
826 | * - buffer refcounting | ||
827 | * | ||
828 | * If none of the conditions are met, then job tracking is not | ||
829 | * required and a fast submit can be done (ie. only need to write | ||
830 | * out userspace GPFIFO entries and update GP_PUT). | ||
831 | */ | ||
832 | need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) || | ||
833 | (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) || | ||
834 | c->timeout.enabled || | ||
835 | (g->can_railgate && !c->deterministic) || | ||
836 | !skip_buffer_refcounting; | ||
837 | |||
838 | if (need_job_tracking) { | ||
839 | bool need_sync_framework = false; | ||
840 | |||
841 | /* | ||
842 | * If the channel is to have deterministic latency and | ||
843 | * job tracking is required, the channel must have | ||
844 | * pre-allocated resources. Otherwise, we fail the submit here | ||
845 | */ | ||
846 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) | ||
847 | return -EINVAL; | ||
848 | |||
849 | need_sync_framework = | ||
850 | gk20a_channel_sync_needs_sync_framework(g) || | ||
851 | (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE && | ||
852 | flags & NVGPU_SUBMIT_FLAGS_FENCE_GET); | ||
853 | |||
854 | /* | ||
855 | * Deferred clean-up is necessary for any of the following | ||
856 | * conditions: | ||
857 | * - channel's deterministic flag is not set | ||
858 | * - dependency on sync framework, which could make the | ||
859 | * behavior of the clean-up operation non-deterministic | ||
860 | * (should not be performed in the submit path) | ||
861 | * - channel wdt | ||
862 | * - GPU rail-gating with non-deterministic channels | ||
863 | * - buffer refcounting | ||
864 | * | ||
865 | * If none of the conditions are met, then deferred clean-up | ||
866 | * is not required, and we clean-up one job-tracking | ||
867 | * resource in the submit path. | ||
868 | */ | ||
869 | need_deferred_cleanup = !c->deterministic || | ||
870 | need_sync_framework || | ||
871 | c->timeout.enabled || | ||
872 | (g->can_railgate && | ||
873 | !c->deterministic) || | ||
874 | !skip_buffer_refcounting; | ||
875 | |||
876 | /* | ||
877 | * For deterministic channels, we don't allow deferred clean_up | ||
878 | * processing to occur. In cases we hit this, we fail the submit | ||
879 | */ | ||
880 | if (c->deterministic && need_deferred_cleanup) | ||
881 | return -EINVAL; | ||
882 | |||
883 | if (!c->deterministic) { | ||
884 | /* | ||
885 | * Get a power ref unless this is a deterministic | ||
886 | * channel that holds them during the channel lifetime. | ||
887 | * This one is released by gk20a_channel_clean_up_jobs, | ||
888 | * via syncpt or sema interrupt, whichever is used. | ||
889 | */ | ||
890 | err = gk20a_busy(g); | ||
891 | if (err) { | ||
892 | nvgpu_err(g, | ||
893 | "failed to host gk20a to submit gpfifo, process %s", | ||
894 | current->comm); | ||
895 | return err; | ||
896 | } | ||
897 | } | ||
898 | |||
899 | if (!need_deferred_cleanup) { | ||
900 | /* clean up a single job */ | ||
901 | gk20a_channel_clean_up_jobs(c, false); | ||
902 | } | ||
903 | } | ||
904 | |||
905 | |||
906 | /* Grab access to HW to deal with do_idle */ | ||
907 | if (c->deterministic) | ||
908 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
909 | |||
910 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
911 | /* | ||
912 | * Nope - this channel has dropped its own power ref. As | ||
913 | * deterministic submits don't hold power on per each submitted | ||
914 | * job like normal ones do, the GPU might railgate any time now | ||
915 | * and thus submit is disallowed. | ||
916 | */ | ||
917 | err = -EINVAL; | ||
918 | goto clean_up; | ||
919 | } | ||
920 | |||
921 | trace_gk20a_channel_submit_gpfifo(g->name, | ||
922 | c->chid, | ||
923 | num_entries, | ||
924 | flags, | ||
925 | fence ? fence->id : 0, | ||
926 | fence ? fence->value : 0); | ||
927 | |||
928 | nvgpu_log_info(g, "pre-submit put %d, get %d, size %d", | ||
929 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
930 | |||
931 | /* | ||
932 | * Make sure we have enough space for gpfifo entries. Check cached | ||
933 | * values first and then read from HW. If no space, return EAGAIN | ||
934 | * and let userpace decide to re-try request or not. | ||
935 | */ | ||
936 | if (nvgpu_gp_free_count(c) < num_entries + extra_entries) { | ||
937 | if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) { | ||
938 | err = -EAGAIN; | ||
939 | goto clean_up; | ||
940 | } | ||
941 | } | ||
942 | |||
943 | if (c->has_timedout) { | ||
944 | err = -ETIMEDOUT; | ||
945 | goto clean_up; | ||
946 | } | ||
947 | |||
948 | if (need_job_tracking) { | ||
949 | err = channel_gk20a_alloc_job(c, &job); | ||
950 | if (err) | ||
951 | goto clean_up; | ||
952 | |||
953 | err = gk20a_submit_prepare_syncs(c, fence, job, | ||
954 | &wait_cmd, &incr_cmd, | ||
955 | &post_fence, | ||
956 | need_deferred_cleanup, | ||
957 | flags); | ||
958 | if (err) | ||
959 | goto clean_up_job; | ||
960 | } | ||
961 | |||
962 | gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING); | ||
963 | |||
964 | if (wait_cmd) | ||
965 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | ||
966 | |||
967 | if (gpfifo || user_gpfifo) | ||
968 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, | ||
969 | num_entries); | ||
970 | if (err) | ||
971 | goto clean_up_job; | ||
972 | |||
973 | /* | ||
974 | * And here's where we add the incr_cmd we generated earlier. It should | ||
975 | * always run! | ||
976 | */ | ||
977 | if (incr_cmd) | ||
978 | gk20a_submit_append_priv_cmdbuf(c, incr_cmd); | ||
979 | |||
980 | if (fence_out) | ||
981 | *fence_out = gk20a_fence_get(post_fence); | ||
982 | |||
983 | if (need_job_tracking) | ||
984 | /* TODO! Check for errors... */ | ||
985 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | ||
986 | gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND); | ||
987 | |||
988 | g->ops.fifo.userd_gp_put(g, c); | ||
989 | |||
990 | /* No hw access beyond this point */ | ||
991 | if (c->deterministic) | ||
992 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
993 | |||
994 | trace_gk20a_channel_submitted_gpfifo(g->name, | ||
995 | c->chid, | ||
996 | num_entries, | ||
997 | flags, | ||
998 | post_fence ? post_fence->syncpt_id : 0, | ||
999 | post_fence ? post_fence->syncpt_value : 0); | ||
1000 | |||
1001 | nvgpu_log_info(g, "post-submit put %d, get %d, size %d", | ||
1002 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | ||
1003 | |||
1004 | gk20a_fifo_profile_snapshot(profile, PROFILE_END); | ||
1005 | |||
1006 | nvgpu_log_fn(g, "done"); | ||
1007 | return err; | ||
1008 | |||
1009 | clean_up_job: | ||
1010 | channel_gk20a_free_job(c, job); | ||
1011 | clean_up: | ||
1012 | nvgpu_log_fn(g, "fail"); | ||
1013 | gk20a_fence_put(post_fence); | ||
1014 | if (c->deterministic) | ||
1015 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1016 | else if (need_deferred_cleanup) | ||
1017 | gk20a_idle(g); | ||
1018 | |||
1019 | return err; | ||
1020 | } | ||
1021 | |||
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h new file mode 100644 index 00000000..4a58b10c --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/channel.h | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_CHANNEL_H__ | ||
17 | #define __NVGPU_CHANNEL_H__ | ||
18 | |||
19 | #include <linux/workqueue.h> | ||
20 | #include <linux/dma-buf.h> | ||
21 | |||
22 | #include <nvgpu/types.h> | ||
23 | |||
24 | struct channel_gk20a; | ||
25 | struct nvgpu_gpfifo; | ||
26 | struct nvgpu_submit_gpfifo_args; | ||
27 | struct nvgpu_channel_fence; | ||
28 | struct gk20a_fence; | ||
29 | struct fifo_profile_gk20a; | ||
30 | struct nvgpu_os_linux; | ||
31 | |||
32 | struct sync_fence; | ||
33 | struct sync_timeline; | ||
34 | |||
35 | struct nvgpu_channel_completion_cb { | ||
36 | /* | ||
37 | * Signal channel owner via a callback, if set, in job cleanup with | ||
38 | * schedule_work. Means that something finished on the channel (perhaps | ||
39 | * more than one job). | ||
40 | */ | ||
41 | void (*fn)(struct channel_gk20a *, void *); | ||
42 | void *user_data; | ||
43 | /* Make access to the two above atomic */ | ||
44 | struct nvgpu_spinlock lock; | ||
45 | /* Per-channel async work task, cannot reschedule itself */ | ||
46 | struct work_struct work; | ||
47 | }; | ||
48 | |||
49 | struct nvgpu_error_notifier { | ||
50 | struct dma_buf *dmabuf; | ||
51 | void *vaddr; | ||
52 | |||
53 | struct nvgpu_notification *notification; | ||
54 | |||
55 | struct nvgpu_mutex mutex; | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * This struct contains fence_related data. | ||
60 | * e.g. sync_timeline for sync_fences. | ||
61 | */ | ||
62 | struct nvgpu_os_fence_framework { | ||
63 | struct sync_timeline *timeline; | ||
64 | }; | ||
65 | |||
66 | struct nvgpu_channel_linux { | ||
67 | struct channel_gk20a *ch; | ||
68 | |||
69 | struct nvgpu_os_fence_framework fence_framework; | ||
70 | |||
71 | struct nvgpu_channel_completion_cb completion_cb; | ||
72 | struct nvgpu_error_notifier error_notifier; | ||
73 | |||
74 | struct dma_buf *cyclestate_buffer_handler; | ||
75 | }; | ||
76 | |||
77 | u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags); | ||
78 | int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l); | ||
79 | void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l); | ||
80 | |||
81 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | ||
82 | void (*update_fn)(struct channel_gk20a *, void *), | ||
83 | void *update_fn_data, | ||
84 | int runlist_id, | ||
85 | bool is_privileged_channel); | ||
86 | |||
87 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | ||
88 | struct nvgpu_gpfifo_entry *gpfifo, | ||
89 | struct nvgpu_submit_gpfifo_args *args, | ||
90 | u32 num_entries, | ||
91 | u32 flags, | ||
92 | struct nvgpu_channel_fence *fence, | ||
93 | struct gk20a_fence **fence_out, | ||
94 | struct fifo_profile_gk20a *profile); | ||
95 | |||
96 | #endif /* __NVGPU_CHANNEL_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/clk.c b/drivers/gpu/nvgpu/os/linux/clk.c new file mode 100644 index 00000000..414b17c4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/clk.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * Linux clock support | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/clk.h> | ||
20 | |||
21 | #include <soc/tegra/tegra-dvfs.h> | ||
22 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
23 | |||
24 | #include "clk.h" | ||
25 | #include "os_linux.h" | ||
26 | #include "platform_gk20a.h" | ||
27 | |||
28 | #include "gk20a/gk20a.h" | ||
29 | |||
30 | static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain) | ||
31 | { | ||
32 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
33 | unsigned long ret; | ||
34 | |||
35 | switch (api_domain) { | ||
36 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
37 | if (g->clk.tegra_clk) | ||
38 | ret = g->clk.cached_rate ? | ||
39 | g->clk.cached_rate : | ||
40 | clk_get_rate(g->clk.tegra_clk); | ||
41 | else | ||
42 | ret = platform->cached_rate ? | ||
43 | platform->cached_rate : | ||
44 | clk_get_rate(platform->clk[0]); | ||
45 | break; | ||
46 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
47 | ret = clk_get_rate(platform->clk[1]); | ||
48 | break; | ||
49 | default: | ||
50 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
51 | ret = 0; | ||
52 | break; | ||
53 | } | ||
54 | |||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | static int nvgpu_linux_clk_set_rate(struct gk20a *g, | ||
59 | u32 api_domain, unsigned long rate) | ||
60 | { | ||
61 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
62 | int ret; | ||
63 | |||
64 | switch (api_domain) { | ||
65 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
66 | if (g->clk.tegra_clk) { | ||
67 | ret = clk_set_rate(g->clk.tegra_clk, rate); | ||
68 | if (!ret) | ||
69 | g->clk.cached_rate = rate; | ||
70 | } else { | ||
71 | ret = clk_set_rate(platform->clk[0], rate); | ||
72 | if (!ret) | ||
73 | platform->cached_rate = rate; | ||
74 | } | ||
75 | break; | ||
76 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
77 | ret = clk_set_rate(platform->clk[1], rate); | ||
78 | break; | ||
79 | default: | ||
80 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
81 | ret = -EINVAL; | ||
82 | break; | ||
83 | } | ||
84 | |||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g) | ||
89 | { | ||
90 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
91 | |||
92 | /* | ||
93 | * On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed | ||
94 | * to frequency governor is a shared user on the gbus. The latter can be | ||
95 | * accessed as GPU clock parent, and incorporate DVFS related data. | ||
96 | */ | ||
97 | if (g->clk.tegra_clk) | ||
98 | return tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
99 | clk_get_parent(g->clk.tegra_clk)); | ||
100 | |||
101 | if (platform->maxmin_clk_id) | ||
102 | return tegra_bpmp_dvfs_get_fmax_at_vmin( | ||
103 | platform->maxmin_clk_id); | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g) | ||
109 | { | ||
110 | struct clk *c; | ||
111 | |||
112 | c = clk_get_sys("gpu_ref", "gpu_ref"); | ||
113 | if (IS_ERR(c)) { | ||
114 | nvgpu_err(g, "failed to get GPCPLL reference clock"); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | return clk_get_rate(c); | ||
119 | } | ||
120 | |||
121 | static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk, | ||
122 | unsigned long rate) | ||
123 | { | ||
124 | return tegra_dvfs_predict_mv_at_hz_cur_tfloor( | ||
125 | clk_get_parent(clk->tegra_clk), rate); | ||
126 | } | ||
127 | |||
128 | static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain) | ||
129 | { | ||
130 | int ret; | ||
131 | |||
132 | switch (api_domain) { | ||
133 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
134 | ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk)); | ||
135 | break; | ||
136 | default: | ||
137 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
138 | ret = 0; | ||
139 | break; | ||
140 | } | ||
141 | |||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk) | ||
146 | { | ||
147 | return clk_prepare_enable(clk->tegra_clk); | ||
148 | } | ||
149 | |||
150 | static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk) | ||
151 | { | ||
152 | clk_disable_unprepare(clk->tegra_clk); | ||
153 | } | ||
154 | |||
155 | void nvgpu_linux_init_clk_support(struct gk20a *g) | ||
156 | { | ||
157 | g->ops.clk.get_rate = nvgpu_linux_clk_get_rate; | ||
158 | g->ops.clk.set_rate = nvgpu_linux_clk_set_rate; | ||
159 | g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe; | ||
160 | g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate; | ||
161 | g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor; | ||
162 | g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate; | ||
163 | g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable; | ||
164 | g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare; | ||
165 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/clk.h b/drivers/gpu/nvgpu/os/linux/clk.h new file mode 100644 index 00000000..614a7fd7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/clk.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_COMMON_LINUX_CLK_H | ||
18 | |||
19 | struct gk20a; | ||
20 | void nvgpu_linux_init_clk_support(struct gk20a *g); | ||
21 | |||
22 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c new file mode 100644 index 00000000..353f6363 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/comptags.c | |||
@@ -0,0 +1,140 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | |||
19 | #include <nvgpu/comptags.h> | ||
20 | |||
21 | #include <nvgpu/linux/vm.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | #include "dmabuf.h" | ||
25 | |||
26 | void gk20a_get_comptags(struct nvgpu_os_buffer *buf, | ||
27 | struct gk20a_comptags *comptags) | ||
28 | { | ||
29 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
30 | buf->dev); | ||
31 | |||
32 | if (!comptags) | ||
33 | return; | ||
34 | |||
35 | if (!priv) { | ||
36 | memset(comptags, 0, sizeof(*comptags)); | ||
37 | return; | ||
38 | } | ||
39 | |||
40 | nvgpu_mutex_acquire(&priv->lock); | ||
41 | *comptags = priv->comptags; | ||
42 | nvgpu_mutex_release(&priv->lock); | ||
43 | } | ||
44 | |||
45 | int gk20a_alloc_or_get_comptags(struct gk20a *g, | ||
46 | struct nvgpu_os_buffer *buf, | ||
47 | struct gk20a_comptag_allocator *allocator, | ||
48 | struct gk20a_comptags *comptags) | ||
49 | { | ||
50 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
51 | buf->dev); | ||
52 | u32 offset; | ||
53 | int err; | ||
54 | unsigned int ctag_granularity; | ||
55 | u32 lines; | ||
56 | |||
57 | if (!priv) | ||
58 | return -ENOSYS; | ||
59 | |||
60 | nvgpu_mutex_acquire(&priv->lock); | ||
61 | |||
62 | if (priv->comptags.allocated) { | ||
63 | /* | ||
64 | * already allocated | ||
65 | */ | ||
66 | *comptags = priv->comptags; | ||
67 | |||
68 | err = 0; | ||
69 | goto exit_locked; | ||
70 | } | ||
71 | |||
72 | ctag_granularity = g->ops.fb.compression_page_size(g); | ||
73 | lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); | ||
74 | |||
75 | /* 0-sized buffer? Shouldn't occur, but let's check anyways. */ | ||
76 | if (lines < 1) { | ||
77 | err = -EINVAL; | ||
78 | goto exit_locked; | ||
79 | } | ||
80 | |||
81 | /* store the allocator so we can use it when we free the ctags */ | ||
82 | priv->comptag_allocator = allocator; | ||
83 | err = gk20a_comptaglines_alloc(allocator, &offset, lines); | ||
84 | if (!err) { | ||
85 | priv->comptags.offset = offset; | ||
86 | priv->comptags.lines = lines; | ||
87 | priv->comptags.needs_clear = true; | ||
88 | } else { | ||
89 | priv->comptags.offset = 0; | ||
90 | priv->comptags.lines = 0; | ||
91 | priv->comptags.needs_clear = false; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * We don't report an error here if comptag alloc failed. The | ||
96 | * caller will simply fallback to incompressible kinds. It | ||
97 | * would not be safe to re-allocate comptags anyways on | ||
98 | * successive calls, as that would break map aliasing. | ||
99 | */ | ||
100 | err = 0; | ||
101 | priv->comptags.allocated = true; | ||
102 | |||
103 | *comptags = priv->comptags; | ||
104 | |||
105 | exit_locked: | ||
106 | nvgpu_mutex_release(&priv->lock); | ||
107 | |||
108 | return err; | ||
109 | } | ||
110 | |||
111 | bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf) | ||
112 | { | ||
113 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
114 | buf->dev); | ||
115 | bool clear_started = false; | ||
116 | |||
117 | if (priv) { | ||
118 | nvgpu_mutex_acquire(&priv->lock); | ||
119 | |||
120 | clear_started = priv->comptags.needs_clear; | ||
121 | |||
122 | if (!clear_started) | ||
123 | nvgpu_mutex_release(&priv->lock); | ||
124 | } | ||
125 | |||
126 | return clear_started; | ||
127 | } | ||
128 | |||
129 | void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf, | ||
130 | bool clear_successful) | ||
131 | { | ||
132 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | ||
133 | buf->dev); | ||
134 | if (priv) { | ||
135 | if (clear_successful) | ||
136 | priv->comptags.needs_clear = false; | ||
137 | |||
138 | nvgpu_mutex_release(&priv->lock); | ||
139 | } | ||
140 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/cond.c b/drivers/gpu/nvgpu/os/linux/cond.c new file mode 100644 index 00000000..633c34fd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/cond.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/wait.h> | ||
18 | #include <linux/sched.h> | ||
19 | |||
20 | #include <nvgpu/cond.h> | ||
21 | |||
22 | int nvgpu_cond_init(struct nvgpu_cond *cond) | ||
23 | { | ||
24 | init_waitqueue_head(&cond->wq); | ||
25 | cond->initialized = true; | ||
26 | |||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | void nvgpu_cond_destroy(struct nvgpu_cond *cond) | ||
31 | { | ||
32 | cond->initialized = false; | ||
33 | } | ||
34 | |||
35 | int nvgpu_cond_signal(struct nvgpu_cond *cond) | ||
36 | { | ||
37 | if (!cond->initialized) | ||
38 | return -EINVAL; | ||
39 | |||
40 | wake_up(&cond->wq); | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond) | ||
46 | { | ||
47 | if (!cond->initialized) | ||
48 | return -EINVAL; | ||
49 | |||
50 | wake_up_interruptible(&cond->wq); | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | int nvgpu_cond_broadcast(struct nvgpu_cond *cond) | ||
56 | { | ||
57 | if (!cond->initialized) | ||
58 | return -EINVAL; | ||
59 | |||
60 | wake_up_all(&cond->wq); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond) | ||
66 | { | ||
67 | if (!cond->initialized) | ||
68 | return -EINVAL; | ||
69 | |||
70 | wake_up_interruptible_all(&cond->wq); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c new file mode 100644 index 00000000..a335988a --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c | |||
@@ -0,0 +1,730 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/wait.h> | ||
18 | #include <linux/ktime.h> | ||
19 | #include <linux/uaccess.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <trace/events/gk20a.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "gk20a/gr_gk20a.h" | ||
26 | |||
27 | #include <nvgpu/kmem.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/atomic.h> | ||
30 | #include <nvgpu/barrier.h> | ||
31 | |||
32 | #include "platform_gk20a.h" | ||
33 | #include "os_linux.h" | ||
34 | #include "ctxsw_trace.h" | ||
35 | |||
36 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
37 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
38 | |||
39 | #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) | ||
40 | |||
41 | /* Userland-facing FIFO (one global + eventually one per VM) */ | ||
42 | struct gk20a_ctxsw_dev { | ||
43 | struct gk20a *g; | ||
44 | |||
45 | struct nvgpu_ctxsw_ring_header *hdr; | ||
46 | struct nvgpu_ctxsw_trace_entry *ents; | ||
47 | struct nvgpu_ctxsw_trace_filter filter; | ||
48 | bool write_enabled; | ||
49 | struct nvgpu_cond readout_wq; | ||
50 | size_t size; | ||
51 | u32 num_ents; | ||
52 | |||
53 | nvgpu_atomic_t vma_ref; | ||
54 | |||
55 | struct nvgpu_mutex write_lock; | ||
56 | }; | ||
57 | |||
58 | |||
59 | struct gk20a_ctxsw_trace { | ||
60 | struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS]; | ||
61 | }; | ||
62 | |||
63 | static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr) | ||
64 | { | ||
65 | return (hdr->write_idx == hdr->read_idx); | ||
66 | } | ||
67 | |||
68 | static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr) | ||
69 | { | ||
70 | return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx; | ||
71 | } | ||
72 | |||
73 | static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | ||
74 | { | ||
75 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | ||
76 | } | ||
77 | |||
78 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | ||
79 | loff_t *off) | ||
80 | { | ||
81 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
82 | struct gk20a *g = dev->g; | ||
83 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
84 | struct nvgpu_ctxsw_trace_entry __user *entry = | ||
85 | (struct nvgpu_ctxsw_trace_entry *) buf; | ||
86 | size_t copied = 0; | ||
87 | int err; | ||
88 | |||
89 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, | ||
90 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
91 | |||
92 | nvgpu_mutex_acquire(&dev->write_lock); | ||
93 | while (ring_is_empty(hdr)) { | ||
94 | nvgpu_mutex_release(&dev->write_lock); | ||
95 | if (filp->f_flags & O_NONBLOCK) | ||
96 | return -EAGAIN; | ||
97 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
98 | !ring_is_empty(hdr), 0); | ||
99 | if (err) | ||
100 | return err; | ||
101 | nvgpu_mutex_acquire(&dev->write_lock); | ||
102 | } | ||
103 | |||
104 | while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) { | ||
105 | if (ring_is_empty(hdr)) | ||
106 | break; | ||
107 | |||
108 | if (copy_to_user(entry, &dev->ents[hdr->read_idx], | ||
109 | sizeof(*entry))) { | ||
110 | nvgpu_mutex_release(&dev->write_lock); | ||
111 | return -EFAULT; | ||
112 | } | ||
113 | |||
114 | hdr->read_idx++; | ||
115 | if (hdr->read_idx >= hdr->num_ents) | ||
116 | hdr->read_idx = 0; | ||
117 | |||
118 | entry++; | ||
119 | copied += sizeof(*entry); | ||
120 | size -= sizeof(*entry); | ||
121 | } | ||
122 | |||
123 | nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied, | ||
124 | hdr->read_idx); | ||
125 | |||
126 | *off = hdr->read_idx; | ||
127 | nvgpu_mutex_release(&dev->write_lock); | ||
128 | |||
129 | return copied; | ||
130 | } | ||
131 | |||
132 | static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev) | ||
133 | { | ||
134 | struct gk20a *g = dev->g; | ||
135 | |||
136 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled"); | ||
137 | nvgpu_mutex_acquire(&dev->write_lock); | ||
138 | dev->write_enabled = true; | ||
139 | nvgpu_mutex_release(&dev->write_lock); | ||
140 | dev->g->ops.fecs_trace.enable(dev->g); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev) | ||
145 | { | ||
146 | struct gk20a *g = dev->g; | ||
147 | |||
148 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled"); | ||
149 | dev->g->ops.fecs_trace.disable(dev->g); | ||
150 | nvgpu_mutex_acquire(&dev->write_lock); | ||
151 | dev->write_enabled = false; | ||
152 | nvgpu_mutex_release(&dev->write_lock); | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev, | ||
157 | size_t size) | ||
158 | { | ||
159 | struct gk20a *g = dev->g; | ||
160 | void *buf; | ||
161 | int err; | ||
162 | |||
163 | if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref))) | ||
164 | return -EBUSY; | ||
165 | |||
166 | err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size); | ||
167 | if (err) | ||
168 | return err; | ||
169 | |||
170 | |||
171 | dev->hdr = buf; | ||
172 | dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1); | ||
173 | dev->size = size; | ||
174 | dev->num_ents = dev->hdr->num_ents; | ||
175 | |||
176 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d", | ||
177 | dev->size, dev->hdr, dev->ents, dev->hdr->num_ents); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, | ||
182 | void **buf, size_t *size) | ||
183 | { | ||
184 | struct nvgpu_ctxsw_ring_header *hdr; | ||
185 | |||
186 | *size = roundup(*size, PAGE_SIZE); | ||
187 | hdr = vmalloc_user(*size); | ||
188 | if (!hdr) | ||
189 | return -ENOMEM; | ||
190 | |||
191 | hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC; | ||
192 | hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION; | ||
193 | hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header)) | ||
194 | / sizeof(struct nvgpu_ctxsw_trace_entry); | ||
195 | hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry); | ||
196 | hdr->drop_count = 0; | ||
197 | hdr->read_idx = 0; | ||
198 | hdr->write_idx = 0; | ||
199 | hdr->write_seqno = 0; | ||
200 | |||
201 | *buf = hdr; | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | int gk20a_ctxsw_dev_ring_free(struct gk20a *g) | ||
206 | { | ||
207 | struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0]; | ||
208 | |||
209 | nvgpu_vfree(g, dev->hdr); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev, | ||
214 | struct nvgpu_ctxsw_ring_setup_args *args) | ||
215 | { | ||
216 | struct gk20a *g = dev->g; | ||
217 | size_t size = args->size; | ||
218 | int ret; | ||
219 | |||
220 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size); | ||
221 | |||
222 | if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE) | ||
223 | return -EINVAL; | ||
224 | |||
225 | nvgpu_mutex_acquire(&dev->write_lock); | ||
226 | ret = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
227 | nvgpu_mutex_release(&dev->write_lock); | ||
228 | |||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev, | ||
233 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
234 | { | ||
235 | struct gk20a *g = dev->g; | ||
236 | |||
237 | nvgpu_mutex_acquire(&dev->write_lock); | ||
238 | dev->filter = args->filter; | ||
239 | nvgpu_mutex_release(&dev->write_lock); | ||
240 | |||
241 | if (g->ops.fecs_trace.set_filter) | ||
242 | g->ops.fecs_trace.set_filter(g, &dev->filter); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev, | ||
247 | struct nvgpu_ctxsw_trace_filter_args *args) | ||
248 | { | ||
249 | nvgpu_mutex_acquire(&dev->write_lock); | ||
250 | args->filter = dev->filter; | ||
251 | nvgpu_mutex_release(&dev->write_lock); | ||
252 | |||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev) | ||
257 | { | ||
258 | struct gk20a *g = dev->g; | ||
259 | int err; | ||
260 | |||
261 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
262 | |||
263 | err = gk20a_busy(g); | ||
264 | if (err) | ||
265 | return err; | ||
266 | |||
267 | if (g->ops.fecs_trace.flush) | ||
268 | err = g->ops.fecs_trace.flush(g); | ||
269 | |||
270 | if (likely(!err)) | ||
271 | err = g->ops.fecs_trace.poll(g); | ||
272 | |||
273 | gk20a_idle(g); | ||
274 | return err; | ||
275 | } | ||
276 | |||
277 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp) | ||
278 | { | ||
279 | struct nvgpu_os_linux *l; | ||
280 | struct gk20a *g; | ||
281 | struct gk20a_ctxsw_trace *trace; | ||
282 | struct gk20a_ctxsw_dev *dev; | ||
283 | int err; | ||
284 | size_t size; | ||
285 | u32 n; | ||
286 | |||
287 | /* only one VM for now */ | ||
288 | const int vmid = 0; | ||
289 | |||
290 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev); | ||
291 | g = gk20a_get(&l->g); | ||
292 | if (!g) | ||
293 | return -ENODEV; | ||
294 | |||
295 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g); | ||
296 | |||
297 | if (!capable(CAP_SYS_ADMIN)) { | ||
298 | err = -EPERM; | ||
299 | goto free_ref; | ||
300 | } | ||
301 | |||
302 | err = gk20a_busy(g); | ||
303 | if (err) | ||
304 | goto free_ref; | ||
305 | |||
306 | trace = g->ctxsw_trace; | ||
307 | if (!trace) { | ||
308 | err = -ENODEV; | ||
309 | goto idle; | ||
310 | } | ||
311 | |||
312 | /* Allow only one user for this device */ | ||
313 | dev = &trace->devs[vmid]; | ||
314 | nvgpu_mutex_acquire(&dev->write_lock); | ||
315 | if (dev->hdr) { | ||
316 | err = -EBUSY; | ||
317 | goto done; | ||
318 | } | ||
319 | |||
320 | /* By default, allocate ring buffer big enough to accommodate | ||
321 | * FECS records with default event filter */ | ||
322 | |||
323 | /* enable all traces by default */ | ||
324 | NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter); | ||
325 | |||
326 | /* compute max number of entries generated with this filter */ | ||
327 | n = g->ops.fecs_trace.max_entries(g, &dev->filter); | ||
328 | |||
329 | size = sizeof(struct nvgpu_ctxsw_ring_header) + | ||
330 | n * sizeof(struct nvgpu_ctxsw_trace_entry); | ||
331 | nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu", | ||
332 | size, n, sizeof(struct nvgpu_ctxsw_trace_entry)); | ||
333 | |||
334 | err = gk20a_ctxsw_dev_alloc_buffer(dev, size); | ||
335 | if (!err) { | ||
336 | filp->private_data = dev; | ||
337 | nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu", | ||
338 | filp, dev, size); | ||
339 | } | ||
340 | |||
341 | done: | ||
342 | nvgpu_mutex_release(&dev->write_lock); | ||
343 | |||
344 | idle: | ||
345 | gk20a_idle(g); | ||
346 | free_ref: | ||
347 | if (err) | ||
348 | gk20a_put(g); | ||
349 | return err; | ||
350 | } | ||
351 | |||
352 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) | ||
353 | { | ||
354 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
355 | struct gk20a *g = dev->g; | ||
356 | |||
357 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); | ||
358 | |||
359 | g->ops.fecs_trace.disable(g); | ||
360 | |||
361 | nvgpu_mutex_acquire(&dev->write_lock); | ||
362 | dev->write_enabled = false; | ||
363 | nvgpu_mutex_release(&dev->write_lock); | ||
364 | |||
365 | if (dev->hdr) { | ||
366 | dev->g->ops.fecs_trace.free_user_buffer(dev->g); | ||
367 | dev->hdr = NULL; | ||
368 | } | ||
369 | gk20a_put(g); | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, | ||
374 | unsigned long arg) | ||
375 | { | ||
376 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
377 | struct gk20a *g = dev->g; | ||
378 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
379 | int err = 0; | ||
380 | |||
381 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd)); | ||
382 | |||
383 | if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) || | ||
384 | (_IOC_NR(cmd) == 0) || | ||
385 | (_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) || | ||
386 | (_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE)) | ||
387 | return -EINVAL; | ||
388 | |||
389 | memset(buf, 0, sizeof(buf)); | ||
390 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
391 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
392 | return -EFAULT; | ||
393 | } | ||
394 | |||
395 | switch (cmd) { | ||
396 | case NVGPU_CTXSW_IOCTL_TRACE_ENABLE: | ||
397 | err = gk20a_ctxsw_dev_ioctl_trace_enable(dev); | ||
398 | break; | ||
399 | case NVGPU_CTXSW_IOCTL_TRACE_DISABLE: | ||
400 | err = gk20a_ctxsw_dev_ioctl_trace_disable(dev); | ||
401 | break; | ||
402 | case NVGPU_CTXSW_IOCTL_RING_SETUP: | ||
403 | err = gk20a_ctxsw_dev_ioctl_ring_setup(dev, | ||
404 | (struct nvgpu_ctxsw_ring_setup_args *) buf); | ||
405 | break; | ||
406 | case NVGPU_CTXSW_IOCTL_SET_FILTER: | ||
407 | err = gk20a_ctxsw_dev_ioctl_set_filter(dev, | ||
408 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
409 | break; | ||
410 | case NVGPU_CTXSW_IOCTL_GET_FILTER: | ||
411 | err = gk20a_ctxsw_dev_ioctl_get_filter(dev, | ||
412 | (struct nvgpu_ctxsw_trace_filter_args *) buf); | ||
413 | break; | ||
414 | case NVGPU_CTXSW_IOCTL_POLL: | ||
415 | err = gk20a_ctxsw_dev_ioctl_poll(dev); | ||
416 | break; | ||
417 | default: | ||
418 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", | ||
419 | cmd); | ||
420 | err = -ENOTTY; | ||
421 | } | ||
422 | |||
423 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
424 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
425 | |||
426 | return err; | ||
427 | } | ||
428 | |||
429 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait) | ||
430 | { | ||
431 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
432 | struct gk20a *g = dev->g; | ||
433 | struct nvgpu_ctxsw_ring_header *hdr = dev->hdr; | ||
434 | unsigned int mask = 0; | ||
435 | |||
436 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); | ||
437 | |||
438 | nvgpu_mutex_acquire(&dev->write_lock); | ||
439 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
440 | if (!ring_is_empty(hdr)) | ||
441 | mask |= POLLIN | POLLRDNORM; | ||
442 | nvgpu_mutex_release(&dev->write_lock); | ||
443 | |||
444 | return mask; | ||
445 | } | ||
446 | |||
447 | static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma) | ||
448 | { | ||
449 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
450 | struct gk20a *g = dev->g; | ||
451 | |||
452 | nvgpu_atomic_inc(&dev->vma_ref); | ||
453 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
454 | nvgpu_atomic_read(&dev->vma_ref)); | ||
455 | } | ||
456 | |||
457 | static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma) | ||
458 | { | ||
459 | struct gk20a_ctxsw_dev *dev = vma->vm_private_data; | ||
460 | struct gk20a *g = dev->g; | ||
461 | |||
462 | nvgpu_atomic_dec(&dev->vma_ref); | ||
463 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d", | ||
464 | nvgpu_atomic_read(&dev->vma_ref)); | ||
465 | } | ||
466 | |||
467 | static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = { | ||
468 | .open = gk20a_ctxsw_dev_vma_open, | ||
469 | .close = gk20a_ctxsw_dev_vma_close, | ||
470 | }; | ||
471 | |||
472 | int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, | ||
473 | struct vm_area_struct *vma) | ||
474 | { | ||
475 | return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0); | ||
476 | } | ||
477 | |||
478 | int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||
479 | { | ||
480 | struct gk20a_ctxsw_dev *dev = filp->private_data; | ||
481 | struct gk20a *g = dev->g; | ||
482 | int ret; | ||
483 | |||
484 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx", | ||
485 | vma->vm_start, vma->vm_end); | ||
486 | |||
487 | ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma); | ||
488 | if (likely(!ret)) { | ||
489 | vma->vm_private_data = dev; | ||
490 | vma->vm_ops = &gk20a_ctxsw_dev_vma_ops; | ||
491 | vma->vm_ops->open(vma); | ||
492 | } | ||
493 | |||
494 | return ret; | ||
495 | } | ||
496 | |||
497 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
498 | static int gk20a_ctxsw_init_devs(struct gk20a *g) | ||
499 | { | ||
500 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
501 | struct gk20a_ctxsw_dev *dev = trace->devs; | ||
502 | int err; | ||
503 | int i; | ||
504 | |||
505 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
506 | dev->g = g; | ||
507 | dev->hdr = NULL; | ||
508 | dev->write_enabled = false; | ||
509 | nvgpu_cond_init(&dev->readout_wq); | ||
510 | err = nvgpu_mutex_init(&dev->write_lock); | ||
511 | if (err) | ||
512 | return err; | ||
513 | nvgpu_atomic_set(&dev->vma_ref, 0); | ||
514 | dev++; | ||
515 | } | ||
516 | return 0; | ||
517 | } | ||
518 | #endif | ||
519 | |||
520 | int gk20a_ctxsw_trace_init(struct gk20a *g) | ||
521 | { | ||
522 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
523 | struct gk20a_ctxsw_trace *trace = g->ctxsw_trace; | ||
524 | int err; | ||
525 | |||
526 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace); | ||
527 | |||
528 | /* if tracing is not supported, skip this */ | ||
529 | if (!g->ops.fecs_trace.init) | ||
530 | return 0; | ||
531 | |||
532 | if (likely(trace)) | ||
533 | return 0; | ||
534 | |||
535 | trace = nvgpu_kzalloc(g, sizeof(*trace)); | ||
536 | if (unlikely(!trace)) | ||
537 | return -ENOMEM; | ||
538 | g->ctxsw_trace = trace; | ||
539 | |||
540 | err = gk20a_ctxsw_init_devs(g); | ||
541 | if (err) | ||
542 | goto fail; | ||
543 | |||
544 | err = g->ops.fecs_trace.init(g); | ||
545 | if (unlikely(err)) | ||
546 | goto fail; | ||
547 | |||
548 | return 0; | ||
549 | |||
550 | fail: | ||
551 | memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace)); | ||
552 | nvgpu_kfree(g, trace); | ||
553 | g->ctxsw_trace = NULL; | ||
554 | return err; | ||
555 | #else | ||
556 | return 0; | ||
557 | #endif | ||
558 | } | ||
559 | |||
560 | void gk20a_ctxsw_trace_cleanup(struct gk20a *g) | ||
561 | { | ||
562 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
563 | struct gk20a_ctxsw_trace *trace; | ||
564 | struct gk20a_ctxsw_dev *dev; | ||
565 | int i; | ||
566 | |||
567 | if (!g->ctxsw_trace) | ||
568 | return; | ||
569 | |||
570 | trace = g->ctxsw_trace; | ||
571 | dev = trace->devs; | ||
572 | |||
573 | for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) { | ||
574 | nvgpu_mutex_destroy(&dev->write_lock); | ||
575 | dev++; | ||
576 | } | ||
577 | |||
578 | nvgpu_kfree(g, g->ctxsw_trace); | ||
579 | g->ctxsw_trace = NULL; | ||
580 | |||
581 | g->ops.fecs_trace.deinit(g); | ||
582 | #endif | ||
583 | } | ||
584 | |||
585 | int gk20a_ctxsw_trace_write(struct gk20a *g, | ||
586 | struct nvgpu_ctxsw_trace_entry *entry) | ||
587 | { | ||
588 | struct nvgpu_ctxsw_ring_header *hdr; | ||
589 | struct gk20a_ctxsw_dev *dev; | ||
590 | int ret = 0; | ||
591 | const char *reason; | ||
592 | u32 write_idx; | ||
593 | |||
594 | if (!g->ctxsw_trace) | ||
595 | return 0; | ||
596 | |||
597 | if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS)) | ||
598 | return -ENODEV; | ||
599 | |||
600 | dev = &g->ctxsw_trace->devs[entry->vmid]; | ||
601 | hdr = dev->hdr; | ||
602 | |||
603 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, | ||
604 | "dev=%p hdr=%p", dev, hdr); | ||
605 | |||
606 | nvgpu_mutex_acquire(&dev->write_lock); | ||
607 | |||
608 | if (unlikely(!hdr)) { | ||
609 | /* device has been released */ | ||
610 | ret = -ENODEV; | ||
611 | goto done; | ||
612 | } | ||
613 | |||
614 | write_idx = hdr->write_idx; | ||
615 | if (write_idx >= dev->num_ents) { | ||
616 | nvgpu_err(dev->g, | ||
617 | "write_idx=%u out of range [0..%u]", | ||
618 | write_idx, dev->num_ents); | ||
619 | ret = -ENOSPC; | ||
620 | reason = "write_idx out of range"; | ||
621 | goto disable; | ||
622 | } | ||
623 | |||
624 | entry->seqno = hdr->write_seqno++; | ||
625 | |||
626 | if (!dev->write_enabled) { | ||
627 | ret = -EBUSY; | ||
628 | reason = "write disabled"; | ||
629 | goto drop; | ||
630 | } | ||
631 | |||
632 | if (unlikely(ring_is_full(hdr))) { | ||
633 | ret = -ENOSPC; | ||
634 | reason = "user fifo full"; | ||
635 | goto drop; | ||
636 | } | ||
637 | |||
638 | if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) { | ||
639 | reason = "filtered out"; | ||
640 | goto filter; | ||
641 | } | ||
642 | |||
643 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
644 | "seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx", | ||
645 | entry->seqno, entry->context_id, entry->pid, | ||
646 | entry->tag, entry->timestamp); | ||
647 | |||
648 | dev->ents[write_idx] = *entry; | ||
649 | |||
650 | /* ensure record is written before updating write index */ | ||
651 | nvgpu_smp_wmb(); | ||
652 | |||
653 | write_idx++; | ||
654 | if (unlikely(write_idx >= hdr->num_ents)) | ||
655 | write_idx = 0; | ||
656 | hdr->write_idx = write_idx; | ||
657 | nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d", | ||
658 | hdr->read_idx, hdr->write_idx, ring_len(hdr)); | ||
659 | |||
660 | nvgpu_mutex_release(&dev->write_lock); | ||
661 | return ret; | ||
662 | |||
663 | disable: | ||
664 | g->ops.fecs_trace.disable(g); | ||
665 | |||
666 | drop: | ||
667 | hdr->drop_count++; | ||
668 | |||
669 | filter: | ||
670 | nvgpu_log(g, gpu_dbg_ctxsw, | ||
671 | "dropping seqno=%d context_id=%08x pid=%lld " | ||
672 | "tag=%x time=%llx (%s)", | ||
673 | entry->seqno, entry->context_id, entry->pid, | ||
674 | entry->tag, entry->timestamp, reason); | ||
675 | |||
676 | done: | ||
677 | nvgpu_mutex_release(&dev->write_lock); | ||
678 | return ret; | ||
679 | } | ||
680 | |||
681 | void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid) | ||
682 | { | ||
683 | struct gk20a_ctxsw_dev *dev; | ||
684 | |||
685 | if (!g->ctxsw_trace) | ||
686 | return; | ||
687 | |||
688 | dev = &g->ctxsw_trace->devs[vmid]; | ||
689 | nvgpu_cond_signal_interruptible(&dev->readout_wq); | ||
690 | } | ||
691 | |||
692 | void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch) | ||
693 | { | ||
694 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
695 | struct nvgpu_ctxsw_trace_entry entry = { | ||
696 | .vmid = 0, | ||
697 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
698 | .context_id = 0, | ||
699 | .pid = ch->tgid, | ||
700 | }; | ||
701 | |||
702 | if (!g->ctxsw_trace) | ||
703 | return; | ||
704 | |||
705 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
706 | gk20a_ctxsw_trace_write(g, &entry); | ||
707 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
708 | #endif | ||
709 | trace_gk20a_channel_reset(ch->chid, ch->tsgid); | ||
710 | } | ||
711 | |||
712 | void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg) | ||
713 | { | ||
714 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
715 | struct nvgpu_ctxsw_trace_entry entry = { | ||
716 | .vmid = 0, | ||
717 | .tag = NVGPU_CTXSW_TAG_ENGINE_RESET, | ||
718 | .context_id = 0, | ||
719 | .pid = tsg->tgid, | ||
720 | }; | ||
721 | |||
722 | if (!g->ctxsw_trace) | ||
723 | return; | ||
724 | |||
725 | g->ops.ptimer.read_ptimer(g, &entry.timestamp); | ||
726 | gk20a_ctxsw_trace_write(g, &entry); | ||
727 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
728 | #endif | ||
729 | trace_gk20a_channel_reset(~0, tsg->tsgid); | ||
730 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h new file mode 100644 index 00000000..88ca7f25 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __CTXSW_TRACE_H__ | ||
18 | #define __CTXSW_TRACE_H__ | ||
19 | |||
20 | #include <nvgpu/types.h> | ||
21 | |||
22 | #define GK20A_CTXSW_TRACE_NUM_DEVS 1 | ||
23 | |||
24 | struct file; | ||
25 | struct inode; | ||
26 | struct poll_table_struct; | ||
27 | |||
28 | struct gk20a; | ||
29 | |||
30 | int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); | ||
31 | int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); | ||
32 | long gk20a_ctxsw_dev_ioctl(struct file *filp, | ||
33 | unsigned int cmd, unsigned long arg); | ||
34 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, | ||
35 | size_t size, loff_t *offs); | ||
36 | unsigned int gk20a_ctxsw_dev_poll(struct file *filp, | ||
37 | struct poll_table_struct *pts); | ||
38 | |||
39 | #endif /* __CTXSW_TRACE_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c new file mode 100644 index 00000000..8738f3e7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug.c | |||
@@ -0,0 +1,452 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_cde.h" | ||
16 | #include "debug_ce.h" | ||
17 | #include "debug_fifo.h" | ||
18 | #include "debug_gr.h" | ||
19 | #include "debug_allocator.h" | ||
20 | #include "debug_kmem.h" | ||
21 | #include "debug_pmu.h" | ||
22 | #include "debug_sched.h" | ||
23 | #include "debug_hal.h" | ||
24 | #include "debug_xve.h" | ||
25 | #include "os_linux.h" | ||
26 | #include "platform_gk20a.h" | ||
27 | |||
28 | #include "gk20a/gk20a.h" | ||
29 | |||
30 | #include <linux/debugfs.h> | ||
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | |||
34 | #include <nvgpu/debug.h> | ||
35 | |||
36 | unsigned int gk20a_debug_trace_cmdbuf; | ||
37 | |||
38 | static inline void gk20a_debug_write_printk(void *ctx, const char *str, | ||
39 | size_t len) | ||
40 | { | ||
41 | pr_info("%s", str); | ||
42 | } | ||
43 | |||
44 | static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, | ||
45 | size_t len) | ||
46 | { | ||
47 | seq_write((struct seq_file *)ctx, str, len); | ||
48 | } | ||
49 | |||
50 | void gk20a_debug_output(struct gk20a_debug_output *o, | ||
51 | const char *fmt, ...) | ||
52 | { | ||
53 | va_list args; | ||
54 | int len; | ||
55 | |||
56 | va_start(args, fmt); | ||
57 | len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); | ||
58 | va_end(args); | ||
59 | o->fn(o->ctx, o->buf, len); | ||
60 | } | ||
61 | |||
62 | static int gk20a_gr_dump_regs(struct gk20a *g, | ||
63 | struct gk20a_debug_output *o) | ||
64 | { | ||
65 | if (g->ops.gr.dump_gr_regs) | ||
66 | gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | int gk20a_gr_debug_dump(struct gk20a *g) | ||
72 | { | ||
73 | struct gk20a_debug_output o = { | ||
74 | .fn = gk20a_debug_write_printk | ||
75 | }; | ||
76 | |||
77 | gk20a_gr_dump_regs(g, &o); | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static int gk20a_gr_debug_show(struct seq_file *s, void *unused) | ||
83 | { | ||
84 | struct device *dev = s->private; | ||
85 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
86 | struct gk20a_debug_output o = { | ||
87 | .fn = gk20a_debug_write_to_seqfile, | ||
88 | .ctx = s, | ||
89 | }; | ||
90 | int err; | ||
91 | |||
92 | err = gk20a_busy(g); | ||
93 | if (err) { | ||
94 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
95 | return -EINVAL; | ||
96 | } | ||
97 | |||
98 | gk20a_gr_dump_regs(g, &o); | ||
99 | |||
100 | gk20a_idle(g); | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | void gk20a_debug_dump(struct gk20a *g) | ||
106 | { | ||
107 | struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); | ||
108 | struct gk20a_debug_output o = { | ||
109 | .fn = gk20a_debug_write_printk | ||
110 | }; | ||
111 | |||
112 | if (platform->dump_platform_dependencies) | ||
113 | platform->dump_platform_dependencies(dev_from_gk20a(g)); | ||
114 | |||
115 | /* HAL only initialized after 1st power-on */ | ||
116 | if (g->ops.debug.show_dump) | ||
117 | g->ops.debug.show_dump(g, &o); | ||
118 | } | ||
119 | |||
120 | static int gk20a_debug_show(struct seq_file *s, void *unused) | ||
121 | { | ||
122 | struct device *dev = s->private; | ||
123 | struct gk20a_debug_output o = { | ||
124 | .fn = gk20a_debug_write_to_seqfile, | ||
125 | .ctx = s, | ||
126 | }; | ||
127 | struct gk20a *g; | ||
128 | int err; | ||
129 | |||
130 | g = gk20a_get_platform(dev)->g; | ||
131 | |||
132 | err = gk20a_busy(g); | ||
133 | if (err) { | ||
134 | nvgpu_err(g, "failed to power on gpu: %d", err); | ||
135 | return -EFAULT; | ||
136 | } | ||
137 | |||
138 | /* HAL only initialized after 1st power-on */ | ||
139 | if (g->ops.debug.show_dump) | ||
140 | g->ops.debug.show_dump(g, &o); | ||
141 | |||
142 | gk20a_idle(g); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | static int gk20a_gr_debug_open(struct inode *inode, struct file *file) | ||
147 | { | ||
148 | return single_open(file, gk20a_gr_debug_show, inode->i_private); | ||
149 | } | ||
150 | |||
151 | static int gk20a_debug_open(struct inode *inode, struct file *file) | ||
152 | { | ||
153 | return single_open(file, gk20a_debug_show, inode->i_private); | ||
154 | } | ||
155 | |||
156 | static const struct file_operations gk20a_gr_debug_fops = { | ||
157 | .open = gk20a_gr_debug_open, | ||
158 | .read = seq_read, | ||
159 | .llseek = seq_lseek, | ||
160 | .release = single_release, | ||
161 | }; | ||
162 | |||
163 | static const struct file_operations gk20a_debug_fops = { | ||
164 | .open = gk20a_debug_open, | ||
165 | .read = seq_read, | ||
166 | .llseek = seq_lseek, | ||
167 | .release = single_release, | ||
168 | }; | ||
169 | |||
170 | void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) | ||
171 | { | ||
172 | g->ops.fifo.dump_pbdma_status(g, o); | ||
173 | g->ops.fifo.dump_eng_status(g, o); | ||
174 | |||
175 | gk20a_debug_dump_all_channel_status_ramfc(g, o); | ||
176 | } | ||
177 | |||
178 | static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) | ||
179 | { | ||
180 | char buf[3]; | ||
181 | struct gk20a *g = file->private_data; | ||
182 | |||
183 | if (g->mm.disable_bigpage) | ||
184 | buf[0] = 'Y'; | ||
185 | else | ||
186 | buf[0] = 'N'; | ||
187 | buf[1] = '\n'; | ||
188 | buf[2] = 0x00; | ||
189 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
190 | } | ||
191 | |||
192 | static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) | ||
193 | { | ||
194 | char buf[32]; | ||
195 | int buf_size; | ||
196 | bool bv; | ||
197 | struct gk20a *g = file->private_data; | ||
198 | |||
199 | buf_size = min(count, (sizeof(buf)-1)); | ||
200 | if (copy_from_user(buf, user_buf, buf_size)) | ||
201 | return -EFAULT; | ||
202 | |||
203 | if (strtobool(buf, &bv) == 0) { | ||
204 | g->mm.disable_bigpage = bv; | ||
205 | gk20a_init_gpu_characteristics(g); | ||
206 | } | ||
207 | |||
208 | return count; | ||
209 | } | ||
210 | |||
211 | static struct file_operations disable_bigpage_fops = { | ||
212 | .open = simple_open, | ||
213 | .read = disable_bigpage_read, | ||
214 | .write = disable_bigpage_write, | ||
215 | }; | ||
216 | |||
217 | static int railgate_residency_show(struct seq_file *s, void *data) | ||
218 | { | ||
219 | struct gk20a *g = s->private; | ||
220 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
221 | unsigned long time_since_last_state_transition_ms; | ||
222 | unsigned long total_rail_gate_time_ms; | ||
223 | unsigned long total_rail_ungate_time_ms; | ||
224 | |||
225 | if (platform->is_railgated(dev_from_gk20a(g))) { | ||
226 | time_since_last_state_transition_ms = | ||
227 | jiffies_to_msecs(jiffies - | ||
228 | g->pstats.last_rail_gate_complete); | ||
229 | total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms; | ||
230 | total_rail_gate_time_ms = | ||
231 | g->pstats.total_rail_gate_time_ms + | ||
232 | time_since_last_state_transition_ms; | ||
233 | } else { | ||
234 | time_since_last_state_transition_ms = | ||
235 | jiffies_to_msecs(jiffies - | ||
236 | g->pstats.last_rail_ungate_complete); | ||
237 | total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms; | ||
238 | total_rail_ungate_time_ms = | ||
239 | g->pstats.total_rail_ungate_time_ms + | ||
240 | time_since_last_state_transition_ms; | ||
241 | } | ||
242 | |||
243 | seq_printf(s, "Time with Rails Gated: %lu ms\n" | ||
244 | "Time with Rails UnGated: %lu ms\n" | ||
245 | "Total railgating cycles: %lu\n", | ||
246 | total_rail_gate_time_ms, | ||
247 | total_rail_ungate_time_ms, | ||
248 | g->pstats.railgating_cycle_count - 1); | ||
249 | return 0; | ||
250 | |||
251 | } | ||
252 | |||
253 | static int railgate_residency_open(struct inode *inode, struct file *file) | ||
254 | { | ||
255 | return single_open(file, railgate_residency_show, inode->i_private); | ||
256 | } | ||
257 | |||
258 | static const struct file_operations railgate_residency_fops = { | ||
259 | .open = railgate_residency_open, | ||
260 | .read = seq_read, | ||
261 | .llseek = seq_lseek, | ||
262 | .release = single_release, | ||
263 | }; | ||
264 | |||
265 | static int gk20a_railgating_debugfs_init(struct gk20a *g) | ||
266 | { | ||
267 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
268 | struct dentry *d; | ||
269 | |||
270 | d = debugfs_create_file( | ||
271 | "railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
272 | &railgate_residency_fops); | ||
273 | if (!d) | ||
274 | return -ENOMEM; | ||
275 | |||
276 | return 0; | ||
277 | } | ||
278 | static ssize_t timeouts_enabled_read(struct file *file, | ||
279 | char __user *user_buf, size_t count, loff_t *ppos) | ||
280 | { | ||
281 | char buf[3]; | ||
282 | struct gk20a *g = file->private_data; | ||
283 | |||
284 | if (nvgpu_is_timeouts_enabled(g)) | ||
285 | buf[0] = 'Y'; | ||
286 | else | ||
287 | buf[0] = 'N'; | ||
288 | buf[1] = '\n'; | ||
289 | buf[2] = 0x00; | ||
290 | return simple_read_from_buffer(user_buf, count, ppos, buf, 2); | ||
291 | } | ||
292 | |||
293 | static ssize_t timeouts_enabled_write(struct file *file, | ||
294 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
295 | { | ||
296 | char buf[3]; | ||
297 | int buf_size; | ||
298 | bool timeouts_enabled; | ||
299 | struct gk20a *g = file->private_data; | ||
300 | |||
301 | buf_size = min(count, (sizeof(buf)-1)); | ||
302 | if (copy_from_user(buf, user_buf, buf_size)) | ||
303 | return -EFAULT; | ||
304 | |||
305 | if (strtobool(buf, &timeouts_enabled) == 0) { | ||
306 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
307 | if (timeouts_enabled == false) { | ||
308 | /* requesting to disable timeouts */ | ||
309 | if (g->timeouts_disabled_by_user == false) { | ||
310 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
311 | g->timeouts_disabled_by_user = true; | ||
312 | } | ||
313 | } else { | ||
314 | /* requesting to enable timeouts */ | ||
315 | if (g->timeouts_disabled_by_user == true) { | ||
316 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
317 | g->timeouts_disabled_by_user = false; | ||
318 | } | ||
319 | } | ||
320 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
321 | } | ||
322 | |||
323 | return count; | ||
324 | } | ||
325 | |||
326 | static const struct file_operations timeouts_enabled_fops = { | ||
327 | .open = simple_open, | ||
328 | .read = timeouts_enabled_read, | ||
329 | .write = timeouts_enabled_write, | ||
330 | }; | ||
331 | |||
332 | void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) | ||
333 | { | ||
334 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
335 | struct device *dev = dev_from_gk20a(g); | ||
336 | |||
337 | l->debugfs = debugfs_create_dir(dev_name(dev), NULL); | ||
338 | if (!l->debugfs) | ||
339 | return; | ||
340 | |||
341 | if (debugfs_symlink) | ||
342 | l->debugfs_alias = | ||
343 | debugfs_create_symlink(debugfs_symlink, | ||
344 | NULL, dev_name(dev)); | ||
345 | |||
346 | debugfs_create_file("status", S_IRUGO, l->debugfs, | ||
347 | dev, &gk20a_debug_fops); | ||
348 | debugfs_create_file("gr_status", S_IRUGO, l->debugfs, | ||
349 | dev, &gk20a_gr_debug_fops); | ||
350 | debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, | ||
351 | l->debugfs, &gk20a_debug_trace_cmdbuf); | ||
352 | |||
353 | debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, | ||
354 | l->debugfs, &g->ch_wdt_timeout_ms); | ||
355 | |||
356 | debugfs_create_u32("disable_syncpoints", S_IRUGO, | ||
357 | l->debugfs, &g->disable_syncpoints); | ||
358 | |||
359 | /* New debug logging API. */ | ||
360 | debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR, | ||
361 | l->debugfs, &g->log_mask); | ||
362 | debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR, | ||
363 | l->debugfs, &g->log_trace); | ||
364 | |||
365 | l->debugfs_ltc_enabled = | ||
366 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, | ||
367 | l->debugfs, | ||
368 | &g->mm.ltc_enabled_target); | ||
369 | |||
370 | l->debugfs_gr_idle_timeout_default = | ||
371 | debugfs_create_u32("gr_idle_timeout_default_us", | ||
372 | S_IRUGO|S_IWUSR, l->debugfs, | ||
373 | &g->gr_idle_timeout_default); | ||
374 | l->debugfs_timeouts_enabled = | ||
375 | debugfs_create_file("timeouts_enabled", | ||
376 | S_IRUGO|S_IWUSR, | ||
377 | l->debugfs, | ||
378 | g, | ||
379 | &timeouts_enabled_fops); | ||
380 | |||
381 | l->debugfs_disable_bigpage = | ||
382 | debugfs_create_file("disable_bigpage", | ||
383 | S_IRUGO|S_IWUSR, | ||
384 | l->debugfs, | ||
385 | g, | ||
386 | &disable_bigpage_fops); | ||
387 | |||
388 | l->debugfs_timeslice_low_priority_us = | ||
389 | debugfs_create_u32("timeslice_low_priority_us", | ||
390 | S_IRUGO|S_IWUSR, | ||
391 | l->debugfs, | ||
392 | &g->timeslice_low_priority_us); | ||
393 | l->debugfs_timeslice_medium_priority_us = | ||
394 | debugfs_create_u32("timeslice_medium_priority_us", | ||
395 | S_IRUGO|S_IWUSR, | ||
396 | l->debugfs, | ||
397 | &g->timeslice_medium_priority_us); | ||
398 | l->debugfs_timeslice_high_priority_us = | ||
399 | debugfs_create_u32("timeslice_high_priority_us", | ||
400 | S_IRUGO|S_IWUSR, | ||
401 | l->debugfs, | ||
402 | &g->timeslice_high_priority_us); | ||
403 | l->debugfs_runlist_interleave = | ||
404 | debugfs_create_bool("runlist_interleave", | ||
405 | S_IRUGO|S_IWUSR, | ||
406 | l->debugfs, | ||
407 | &g->runlist_interleave); | ||
408 | l->debugfs_force_preemption_gfxp = | ||
409 | debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, | ||
410 | l->debugfs, | ||
411 | &g->gr.ctx_vars.force_preemption_gfxp); | ||
412 | |||
413 | l->debugfs_force_preemption_cilp = | ||
414 | debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, | ||
415 | l->debugfs, | ||
416 | &g->gr.ctx_vars.force_preemption_cilp); | ||
417 | |||
418 | l->debugfs_dump_ctxsw_stats = | ||
419 | debugfs_create_bool("dump_ctxsw_stats_on_channel_close", | ||
420 | S_IRUGO|S_IWUSR, l->debugfs, | ||
421 | &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); | ||
422 | |||
423 | gr_gk20a_debugfs_init(g); | ||
424 | gk20a_pmu_debugfs_init(g); | ||
425 | gk20a_railgating_debugfs_init(g); | ||
426 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
427 | gk20a_cde_debugfs_init(g); | ||
428 | #endif | ||
429 | gk20a_ce_debugfs_init(g); | ||
430 | nvgpu_alloc_debugfs_init(g); | ||
431 | nvgpu_hal_debugfs_init(g); | ||
432 | gk20a_fifo_debugfs_init(g); | ||
433 | gk20a_sched_debugfs_init(g); | ||
434 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
435 | nvgpu_kmem_debugfs_init(g); | ||
436 | #endif | ||
437 | if (g->pci_vendor_id) | ||
438 | nvgpu_xve_debugfs_init(g); | ||
439 | } | ||
440 | |||
441 | void gk20a_debug_deinit(struct gk20a *g) | ||
442 | { | ||
443 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
444 | |||
445 | if (!l->debugfs) | ||
446 | return; | ||
447 | |||
448 | gk20a_fifo_debugfs_deinit(g); | ||
449 | |||
450 | debugfs_remove_recursive(l->debugfs); | ||
451 | debugfs_remove(l->debugfs_alias); | ||
452 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.c b/drivers/gpu/nvgpu/os/linux/debug_allocator.c new file mode 100644 index 00000000..d63a9030 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.c | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_allocator.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include <nvgpu/allocator.h> | ||
22 | |||
23 | static int __alloc_show(struct seq_file *s, void *unused) | ||
24 | { | ||
25 | struct nvgpu_allocator *a = s->private; | ||
26 | |||
27 | nvgpu_alloc_print_stats(a, s, 1); | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | static int __alloc_open(struct inode *inode, struct file *file) | ||
33 | { | ||
34 | return single_open(file, __alloc_show, inode->i_private); | ||
35 | } | ||
36 | |||
37 | static const struct file_operations __alloc_fops = { | ||
38 | .open = __alloc_open, | ||
39 | .read = seq_read, | ||
40 | .llseek = seq_lseek, | ||
41 | .release = single_release, | ||
42 | }; | ||
43 | |||
44 | void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) | ||
45 | { | ||
46 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
47 | |||
48 | if (!l->debugfs_allocators) | ||
49 | return; | ||
50 | |||
51 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
52 | l->debugfs_allocators, | ||
53 | a, &__alloc_fops); | ||
54 | } | ||
55 | |||
56 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) | ||
57 | { | ||
58 | } | ||
59 | |||
60 | void nvgpu_alloc_debugfs_init(struct gk20a *g) | ||
61 | { | ||
62 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
63 | |||
64 | l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs); | ||
65 | if (IS_ERR_OR_NULL(l->debugfs_allocators)) { | ||
66 | l->debugfs_allocators = NULL; | ||
67 | return; | ||
68 | } | ||
69 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_allocator.h b/drivers/gpu/nvgpu/os/linux/debug_allocator.h new file mode 100644 index 00000000..1b21cfc5 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_allocator.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_ALLOCATOR_H__ | ||
16 | #define __NVGPU_DEBUG_ALLOCATOR_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void nvgpu_alloc_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.c b/drivers/gpu/nvgpu/os/linux/debug_cde.c new file mode 100644 index 00000000..f0afa6ee --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_cde.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_cde.h" | ||
16 | #include "platform_gk20a.h" | ||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | |||
22 | static ssize_t gk20a_cde_reload_write(struct file *file, | ||
23 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
24 | { | ||
25 | struct nvgpu_os_linux *l = file->private_data; | ||
26 | gk20a_cde_reload(l); | ||
27 | return count; | ||
28 | } | ||
29 | |||
30 | static const struct file_operations gk20a_cde_reload_fops = { | ||
31 | .open = simple_open, | ||
32 | .write = gk20a_cde_reload_write, | ||
33 | }; | ||
34 | |||
35 | void gk20a_cde_debugfs_init(struct gk20a *g) | ||
36 | { | ||
37 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
38 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
39 | |||
40 | if (!platform->has_cde) | ||
41 | return; | ||
42 | |||
43 | debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, | ||
44 | l->debugfs, &l->cde_app.shader_parameter); | ||
45 | debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, | ||
46 | l->debugfs, &l->cde_app.ctx_count); | ||
47 | debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, | ||
48 | l->debugfs, &l->cde_app.ctx_usecount); | ||
49 | debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, | ||
50 | l->debugfs, &l->cde_app.ctx_count_top); | ||
51 | debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, | ||
52 | l, &gk20a_cde_reload_fops); | ||
53 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_cde.h b/drivers/gpu/nvgpu/os/linux/debug_cde.h new file mode 100644 index 00000000..4895edd6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_cde.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_CDE_H__ | ||
16 | #define __NVGPU_DEBUG_CDE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_cde_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_CDE_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.c b/drivers/gpu/nvgpu/os/linux/debug_ce.c new file mode 100644 index 00000000..cea0bb47 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_ce.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | |||
20 | void gk20a_ce_debugfs_init(struct gk20a *g) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, | ||
25 | l->debugfs, &g->ce_app.ctx_count); | ||
26 | debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, | ||
27 | l->debugfs, &g->ce_app.app_state); | ||
28 | debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, | ||
29 | l->debugfs, &g->ce_app.next_ctx_id); | ||
30 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.h b/drivers/gpu/nvgpu/os/linux/debug_ce.h new file mode 100644 index 00000000..2a8750c4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_CE_H__ | ||
16 | #define __NVGPU_DEBUG_CE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_ce_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_CE_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_clk.c b/drivers/gpu/nvgpu/os/linux/debug_clk.c new file mode 100644 index 00000000..2484d44b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_clk.c | |||
@@ -0,0 +1,271 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/debugfs.h> | ||
17 | #include <linux/seq_file.h> | ||
18 | |||
19 | #include "gm20b/clk_gm20b.h" | ||
20 | #include "os_linux.h" | ||
21 | #include "platform_gk20a.h" | ||
22 | |||
23 | static int rate_get(void *data, u64 *val) | ||
24 | { | ||
25 | struct gk20a *g = (struct gk20a *)data; | ||
26 | struct clk_gk20a *clk = &g->clk; | ||
27 | |||
28 | *val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq); | ||
29 | return 0; | ||
30 | } | ||
31 | static int rate_set(void *data, u64 val) | ||
32 | { | ||
33 | struct gk20a *g = (struct gk20a *)data; | ||
34 | return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val); | ||
35 | } | ||
36 | DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n"); | ||
37 | |||
38 | static int pll_reg_show(struct seq_file *s, void *data) | ||
39 | { | ||
40 | struct gk20a *g = s->private; | ||
41 | struct nvgpu_clk_pll_debug_data d; | ||
42 | u32 reg, m, n, pl, f; | ||
43 | int err = 0; | ||
44 | |||
45 | if (g->ops.clk.get_pll_debug_data) { | ||
46 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
47 | if (err) | ||
48 | return err; | ||
49 | } else { | ||
50 | return -EINVAL; | ||
51 | } | ||
52 | |||
53 | seq_printf(s, "bypassctrl = %s, ", | ||
54 | d.trim_sys_bypassctrl_val ? "bypass" : "vco"); | ||
55 | seq_printf(s, "sel_vco = %s, ", | ||
56 | d.trim_sys_sel_vco_val ? "vco" : "bypass"); | ||
57 | |||
58 | seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val, | ||
59 | d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled", | ||
60 | d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked", | ||
61 | d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off"); | ||
62 | |||
63 | reg = d.trim_sys_gpcpll_coeff_val; | ||
64 | m = d.trim_sys_gpcpll_coeff_mdiv; | ||
65 | n = d.trim_sys_gpcpll_coeff_ndiv; | ||
66 | pl = d.trim_sys_gpcpll_coeff_pldiv; | ||
67 | f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl)); | ||
68 | seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); | ||
69 | seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); | ||
70 | |||
71 | seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n", | ||
72 | d.trim_sys_gpcpll_dvfs0_val, | ||
73 | d.trim_sys_gpcpll_dvfs0_dfs_coeff, | ||
74 | d.trim_sys_gpcpll_dvfs0_dfs_det_max, | ||
75 | d.trim_sys_gpcpll_dvfs0_dfs_dc_offset); | ||
76 | |||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static int pll_reg_open(struct inode *inode, struct file *file) | ||
81 | { | ||
82 | return single_open(file, pll_reg_show, inode->i_private); | ||
83 | } | ||
84 | |||
85 | static const struct file_operations pll_reg_fops = { | ||
86 | .open = pll_reg_open, | ||
87 | .read = seq_read, | ||
88 | .llseek = seq_lseek, | ||
89 | .release = single_release, | ||
90 | }; | ||
91 | |||
92 | static int pll_reg_raw_show(struct seq_file *s, void *data) | ||
93 | { | ||
94 | struct gk20a *g = s->private; | ||
95 | struct nvgpu_clk_pll_debug_data d; | ||
96 | u32 reg; | ||
97 | int err = 0; | ||
98 | |||
99 | if (g->ops.clk.get_pll_debug_data) { | ||
100 | err = g->ops.clk.get_pll_debug_data(g, &d); | ||
101 | if (err) | ||
102 | return err; | ||
103 | } else { | ||
104 | return -EINVAL; | ||
105 | } | ||
106 | |||
107 | seq_puts(s, "GPCPLL REGISTERS:\n"); | ||
108 | for (reg = d.trim_sys_gpcpll_cfg_reg; | ||
109 | reg <= d.trim_sys_gpcpll_dvfs2_reg; | ||
110 | reg += sizeof(u32)) | ||
111 | seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg)); | ||
112 | |||
113 | seq_puts(s, "\nGPC CLK OUT REGISTERS:\n"); | ||
114 | |||
115 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg, | ||
116 | d.trim_sys_sel_vco_val); | ||
117 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg, | ||
118 | d.trim_sys_gpc2clk_out_val); | ||
119 | seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg, | ||
120 | d.trim_sys_bypassctrl_val); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int pll_reg_raw_open(struct inode *inode, struct file *file) | ||
126 | { | ||
127 | return single_open(file, pll_reg_raw_show, inode->i_private); | ||
128 | } | ||
129 | |||
130 | static ssize_t pll_reg_raw_write(struct file *file, | ||
131 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
132 | { | ||
133 | struct gk20a *g = file->f_path.dentry->d_inode->i_private; | ||
134 | char buf[80]; | ||
135 | u32 reg, val; | ||
136 | int err = 0; | ||
137 | |||
138 | if (sizeof(buf) <= count) | ||
139 | return -EINVAL; | ||
140 | |||
141 | if (copy_from_user(buf, userbuf, count)) | ||
142 | return -EFAULT; | ||
143 | |||
144 | /* terminate buffer and trim - white spaces may be appended | ||
145 | * at the end when invoked from shell command line */ | ||
146 | buf[count] = '\0'; | ||
147 | strim(buf); | ||
148 | |||
149 | if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2) | ||
150 | return -EINVAL; | ||
151 | |||
152 | if (g->ops.clk.pll_reg_write(g, reg, val)) | ||
153 | err = g->ops.clk.pll_reg_write(g, reg, val); | ||
154 | else | ||
155 | err = -EINVAL; | ||
156 | |||
157 | return err; | ||
158 | } | ||
159 | |||
160 | static const struct file_operations pll_reg_raw_fops = { | ||
161 | .open = pll_reg_raw_open, | ||
162 | .read = seq_read, | ||
163 | .write = pll_reg_raw_write, | ||
164 | .llseek = seq_lseek, | ||
165 | .release = single_release, | ||
166 | }; | ||
167 | |||
168 | static int monitor_get(void *data, u64 *val) | ||
169 | { | ||
170 | struct gk20a *g = (struct gk20a *)data; | ||
171 | int err = 0; | ||
172 | |||
173 | if (g->ops.clk.get_gpcclk_clock_counter) | ||
174 | err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val); | ||
175 | else | ||
176 | err = -EINVAL; | ||
177 | |||
178 | return err; | ||
179 | } | ||
180 | DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n"); | ||
181 | |||
182 | static int voltage_get(void *data, u64 *val) | ||
183 | { | ||
184 | struct gk20a *g = (struct gk20a *)data; | ||
185 | int err = 0; | ||
186 | |||
187 | if (g->ops.clk.get_voltage) | ||
188 | err = g->ops.clk.get_voltage(&g->clk, val); | ||
189 | else | ||
190 | err = -EINVAL; | ||
191 | |||
192 | return err; | ||
193 | } | ||
194 | DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n"); | ||
195 | |||
196 | static int pll_param_show(struct seq_file *s, void *data) | ||
197 | { | ||
198 | struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms(); | ||
199 | |||
200 | seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n", | ||
201 | gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope, | ||
202 | gpc_pll_params->vco_ctrl); | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static int pll_param_open(struct inode *inode, struct file *file) | ||
207 | { | ||
208 | return single_open(file, pll_param_show, inode->i_private); | ||
209 | } | ||
210 | |||
211 | static const struct file_operations pll_param_fops = { | ||
212 | .open = pll_param_open, | ||
213 | .read = seq_read, | ||
214 | .llseek = seq_lseek, | ||
215 | .release = single_release, | ||
216 | }; | ||
217 | |||
218 | int gm20b_clk_init_debugfs(struct gk20a *g) | ||
219 | { | ||
220 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
221 | struct dentry *d; | ||
222 | |||
223 | if (!l->debugfs) | ||
224 | return -EINVAL; | ||
225 | |||
226 | d = debugfs_create_file( | ||
227 | "rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops); | ||
228 | if (!d) | ||
229 | goto err_out; | ||
230 | |||
231 | d = debugfs_create_file( | ||
232 | "pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops); | ||
233 | if (!d) | ||
234 | goto err_out; | ||
235 | |||
236 | d = debugfs_create_file("pll_reg_raw", | ||
237 | S_IRUGO, l->debugfs, g, &pll_reg_raw_fops); | ||
238 | if (!d) | ||
239 | goto err_out; | ||
240 | |||
241 | d = debugfs_create_file( | ||
242 | "monitor", S_IRUGO, l->debugfs, g, &monitor_fops); | ||
243 | if (!d) | ||
244 | goto err_out; | ||
245 | |||
246 | d = debugfs_create_file( | ||
247 | "voltage", S_IRUGO, l->debugfs, g, &voltage_fops); | ||
248 | if (!d) | ||
249 | goto err_out; | ||
250 | |||
251 | d = debugfs_create_file( | ||
252 | "pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops); | ||
253 | if (!d) | ||
254 | goto err_out; | ||
255 | |||
256 | d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs, | ||
257 | (u32 *)&g->clk.gpc_pll.mode); | ||
258 | if (!d) | ||
259 | goto err_out; | ||
260 | |||
261 | d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO, | ||
262 | l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq); | ||
263 | if (!d) | ||
264 | goto err_out; | ||
265 | |||
266 | return 0; | ||
267 | |||
268 | err_out: | ||
269 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
270 | return -ENOMEM; | ||
271 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.c b/drivers/gpu/nvgpu/os/linux/debug_fifo.c new file mode 100644 index 00000000..2b5674c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.c | |||
@@ -0,0 +1,378 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_fifo.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include <nvgpu/sort.h> | ||
22 | #include <nvgpu/timers.h> | ||
23 | |||
24 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); | ||
25 | |||
26 | static void *gk20a_fifo_sched_debugfs_seq_start( | ||
27 | struct seq_file *s, loff_t *pos) | ||
28 | { | ||
29 | struct gk20a *g = s->private; | ||
30 | struct fifo_gk20a *f = &g->fifo; | ||
31 | |||
32 | if (*pos >= f->num_channels) | ||
33 | return NULL; | ||
34 | |||
35 | return &f->channel[*pos]; | ||
36 | } | ||
37 | |||
38 | static void *gk20a_fifo_sched_debugfs_seq_next( | ||
39 | struct seq_file *s, void *v, loff_t *pos) | ||
40 | { | ||
41 | struct gk20a *g = s->private; | ||
42 | struct fifo_gk20a *f = &g->fifo; | ||
43 | |||
44 | ++(*pos); | ||
45 | if (*pos >= f->num_channels) | ||
46 | return NULL; | ||
47 | |||
48 | return &f->channel[*pos]; | ||
49 | } | ||
50 | |||
51 | static void gk20a_fifo_sched_debugfs_seq_stop( | ||
52 | struct seq_file *s, void *v) | ||
53 | { | ||
54 | } | ||
55 | |||
56 | static int gk20a_fifo_sched_debugfs_seq_show( | ||
57 | struct seq_file *s, void *v) | ||
58 | { | ||
59 | struct gk20a *g = s->private; | ||
60 | struct fifo_gk20a *f = &g->fifo; | ||
61 | struct channel_gk20a *ch = v; | ||
62 | struct tsg_gk20a *tsg = NULL; | ||
63 | |||
64 | struct fifo_engine_info_gk20a *engine_info; | ||
65 | struct fifo_runlist_info_gk20a *runlist; | ||
66 | u32 runlist_id; | ||
67 | int ret = SEQ_SKIP; | ||
68 | u32 engine_id; | ||
69 | |||
70 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
71 | engine_info = (f->engine_info + engine_id); | ||
72 | runlist_id = engine_info->runlist_id; | ||
73 | runlist = &f->runlist_info[runlist_id]; | ||
74 | |||
75 | if (ch == f->channel) { | ||
76 | seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); | ||
77 | seq_puts(s, " (usecs) (msecs)\n"); | ||
78 | ret = 0; | ||
79 | } | ||
80 | |||
81 | if (!test_bit(ch->chid, runlist->active_channels)) | ||
82 | return ret; | ||
83 | |||
84 | if (gk20a_channel_get(ch)) { | ||
85 | tsg = tsg_gk20a_from_ch(ch); | ||
86 | |||
87 | if (tsg) | ||
88 | seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", | ||
89 | ch->chid, | ||
90 | ch->tsgid, | ||
91 | ch->tgid, | ||
92 | tsg->timeslice_us, | ||
93 | ch->timeout_ms_max, | ||
94 | tsg->interleave_level, | ||
95 | tsg->gr_ctx.graphics_preempt_mode, | ||
96 | tsg->gr_ctx.compute_preempt_mode); | ||
97 | gk20a_channel_put(ch); | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { | ||
103 | .start = gk20a_fifo_sched_debugfs_seq_start, | ||
104 | .next = gk20a_fifo_sched_debugfs_seq_next, | ||
105 | .stop = gk20a_fifo_sched_debugfs_seq_stop, | ||
106 | .show = gk20a_fifo_sched_debugfs_seq_show | ||
107 | }; | ||
108 | |||
109 | static int gk20a_fifo_sched_debugfs_open(struct inode *inode, | ||
110 | struct file *file) | ||
111 | { | ||
112 | struct gk20a *g = inode->i_private; | ||
113 | int err; | ||
114 | |||
115 | if (!capable(CAP_SYS_ADMIN)) | ||
116 | return -EPERM; | ||
117 | |||
118 | err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); | ||
119 | if (err) | ||
120 | return err; | ||
121 | |||
122 | nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); | ||
123 | |||
124 | ((struct seq_file *)file->private_data)->private = inode->i_private; | ||
125 | return 0; | ||
126 | }; | ||
127 | |||
128 | /* | ||
129 | * The file operations structure contains our open function along with | ||
130 | * set of the canned seq_ ops. | ||
131 | */ | ||
132 | static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | ||
133 | .owner = THIS_MODULE, | ||
134 | .open = gk20a_fifo_sched_debugfs_open, | ||
135 | .read = seq_read, | ||
136 | .llseek = seq_lseek, | ||
137 | .release = seq_release | ||
138 | }; | ||
139 | |||
140 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
141 | { | ||
142 | struct gk20a *g = (struct gk20a *) data; | ||
143 | struct fifo_gk20a *f = &g->fifo; | ||
144 | |||
145 | |||
146 | nvgpu_mutex_acquire(&f->profile.lock); | ||
147 | if (val == 0) { | ||
148 | if (f->profile.enabled) { | ||
149 | f->profile.enabled = false; | ||
150 | nvgpu_ref_put(&f->profile.ref, | ||
151 | __gk20a_fifo_profile_free); | ||
152 | } | ||
153 | } else { | ||
154 | if (!f->profile.enabled) { | ||
155 | /* not kref init as it can have a running condition if | ||
156 | * we enable/disable/enable while kickoff is happening | ||
157 | */ | ||
158 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { | ||
159 | f->profile.data = nvgpu_vzalloc(g, | ||
160 | FIFO_PROFILING_ENTRIES * | ||
161 | sizeof(struct fifo_profile_gk20a)); | ||
162 | f->profile.sorted = nvgpu_vzalloc(g, | ||
163 | FIFO_PROFILING_ENTRIES * | ||
164 | sizeof(u64)); | ||
165 | if (!(f->profile.data && f->profile.sorted)) { | ||
166 | nvgpu_vfree(g, f->profile.data); | ||
167 | nvgpu_vfree(g, f->profile.sorted); | ||
168 | nvgpu_mutex_release(&f->profile.lock); | ||
169 | return -ENOMEM; | ||
170 | } | ||
171 | nvgpu_ref_init(&f->profile.ref); | ||
172 | } | ||
173 | atomic_set(&f->profile.get.atomic_var, 0); | ||
174 | f->profile.enabled = true; | ||
175 | } | ||
176 | } | ||
177 | nvgpu_mutex_release(&f->profile.lock); | ||
178 | |||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | DEFINE_SIMPLE_ATTRIBUTE( | ||
183 | gk20a_fifo_profile_enable_debugfs_fops, | ||
184 | NULL, | ||
185 | gk20a_fifo_profile_enable, | ||
186 | "%llu\n" | ||
187 | ); | ||
188 | |||
189 | static int __profile_cmp(const void *a, const void *b) | ||
190 | { | ||
191 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * This uses about 800b in the stack, but the function using it is not part | ||
196 | * of a callstack where much memory is being used, so it is fine | ||
197 | */ | ||
198 | #define PERCENTILE_WIDTH 5 | ||
199 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
200 | |||
201 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
202 | u64 *percentiles, u32 index_end, u32 index_start) | ||
203 | { | ||
204 | unsigned int nelem = 0; | ||
205 | unsigned int index; | ||
206 | struct fifo_profile_gk20a *profile; | ||
207 | |||
208 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
209 | profile = &g->fifo.profile.data[index]; | ||
210 | |||
211 | if (profile->timestamp[index_end] > | ||
212 | profile->timestamp[index_start]) { | ||
213 | /* This is a valid element */ | ||
214 | g->fifo.profile.sorted[nelem] = | ||
215 | profile->timestamp[index_end] - | ||
216 | profile->timestamp[index_start]; | ||
217 | nelem++; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /* sort it */ | ||
222 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
223 | __profile_cmp, NULL); | ||
224 | |||
225 | /* build ranges */ | ||
226 | for (index = 0; index < PERCENTILE_RANGES; index++) { | ||
227 | percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : | ||
228 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * | ||
229 | nelem)/100 - 1]; | ||
230 | } | ||
231 | return nelem; | ||
232 | } | ||
233 | |||
234 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
235 | { | ||
236 | struct gk20a *g = s->private; | ||
237 | unsigned int get, nelem, index; | ||
238 | /* | ||
239 | * 800B in the stack, but function is declared statically and only | ||
240 | * called from debugfs handler | ||
241 | */ | ||
242 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
243 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
244 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
245 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
246 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
247 | |||
248 | if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { | ||
249 | seq_printf(s, "Profiling disabled\n"); | ||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | get = atomic_read(&g->fifo.profile.get.atomic_var); | ||
254 | |||
255 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
256 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
257 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
258 | PROFILE_END, PROFILE_ENTRY); | ||
259 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
260 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
261 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
262 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
263 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
264 | PROFILE_END, PROFILE_APPEND); | ||
265 | |||
266 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
267 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
268 | |||
269 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
270 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
271 | PERCENTILE_WIDTH * (index+1), | ||
272 | percentiles_ioctl[index], | ||
273 | percentiles_kickoff[index], | ||
274 | percentiles_append[index], | ||
275 | percentiles_jobtracking[index], | ||
276 | percentiles_userd[index]); | ||
277 | |||
278 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
284 | { | ||
285 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
286 | } | ||
287 | |||
288 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
289 | .open = gk20a_fifo_profile_stats_open, | ||
290 | .read = seq_read, | ||
291 | .llseek = seq_lseek, | ||
292 | .release = single_release, | ||
293 | }; | ||
294 | |||
295 | |||
296 | void gk20a_fifo_debugfs_init(struct gk20a *g) | ||
297 | { | ||
298 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
299 | struct dentry *gpu_root = l->debugfs; | ||
300 | struct dentry *fifo_root; | ||
301 | struct dentry *profile_root; | ||
302 | |||
303 | fifo_root = debugfs_create_dir("fifo", gpu_root); | ||
304 | if (IS_ERR_OR_NULL(fifo_root)) | ||
305 | return; | ||
306 | |||
307 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
308 | |||
309 | debugfs_create_file("sched", 0600, fifo_root, g, | ||
310 | &gk20a_fifo_sched_debugfs_fops); | ||
311 | |||
312 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
313 | if (IS_ERR_OR_NULL(profile_root)) | ||
314 | return; | ||
315 | |||
316 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
317 | g->fifo.profile.enabled = false; | ||
318 | atomic_set(&g->fifo.profile.get.atomic_var, 0); | ||
319 | atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); | ||
320 | |||
321 | debugfs_create_file("enable", 0600, profile_root, g, | ||
322 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
323 | |||
324 | debugfs_create_file("stats", 0600, profile_root, g, | ||
325 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
326 | |||
327 | } | ||
328 | |||
329 | void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) | ||
330 | { | ||
331 | if (profile) | ||
332 | profile->timestamp[idx] = nvgpu_current_time_ns(); | ||
333 | } | ||
334 | |||
335 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) | ||
336 | { | ||
337 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
338 | profile.ref); | ||
339 | nvgpu_vfree(f->g, f->profile.data); | ||
340 | nvgpu_vfree(f->g, f->profile.sorted); | ||
341 | } | ||
342 | |||
343 | /* Get the next element in the ring buffer of profile entries | ||
344 | * and grab a reference to the structure | ||
345 | */ | ||
346 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
347 | { | ||
348 | struct fifo_gk20a *f = &g->fifo; | ||
349 | struct fifo_profile_gk20a *profile; | ||
350 | unsigned int index; | ||
351 | |||
352 | /* If kref is zero, profiling is not enabled */ | ||
353 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) | ||
354 | return NULL; | ||
355 | index = atomic_inc_return(&f->profile.get.atomic_var); | ||
356 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
357 | |||
358 | return profile; | ||
359 | } | ||
360 | |||
361 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
362 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
363 | struct fifo_profile_gk20a *profile) | ||
364 | { | ||
365 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
366 | } | ||
367 | |||
368 | void gk20a_fifo_debugfs_deinit(struct gk20a *g) | ||
369 | { | ||
370 | struct fifo_gk20a *f = &g->fifo; | ||
371 | |||
372 | nvgpu_mutex_acquire(&f->profile.lock); | ||
373 | if (f->profile.enabled) { | ||
374 | f->profile.enabled = false; | ||
375 | nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
376 | } | ||
377 | nvgpu_mutex_release(&f->profile.lock); | ||
378 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_fifo.h b/drivers/gpu/nvgpu/os/linux/debug_fifo.h new file mode 100644 index 00000000..46ac853e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_fifo.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_FIFO_H__ | ||
16 | #define __NVGPU_DEBUG_FIFO_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_fifo_debugfs_init(struct gk20a *g); | ||
20 | void gk20a_fifo_debugfs_deinit(struct gk20a *g); | ||
21 | |||
22 | #endif /* __NVGPU_DEBUG_FIFO_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.c b/drivers/gpu/nvgpu/os/linux/debug_gr.c new file mode 100644 index 00000000..d54c6d63 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_gr.c | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_gr.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | |||
20 | int gr_gk20a_debugfs_init(struct gk20a *g) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | l->debugfs_gr_default_attrib_cb_size = | ||
25 | debugfs_create_u32("gr_default_attrib_cb_size", | ||
26 | S_IRUGO|S_IWUSR, l->debugfs, | ||
27 | &g->gr.attrib_cb_default_size); | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_gr.h b/drivers/gpu/nvgpu/os/linux/debug_gr.h new file mode 100644 index 00000000..4b46acbb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_gr.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_GR_H__ | ||
16 | #define __NVGPU_DEBUG_GR_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int gr_gk20a_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_GR_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.c b/drivers/gpu/nvgpu/os/linux/debug_hal.c new file mode 100644 index 00000000..031e335e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_hal.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_hal.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | /* Format and print a single function pointer to the specified seq_file. */ | ||
22 | static void __hal_print_op(struct seq_file *s, void *op_ptr) | ||
23 | { | ||
24 | seq_printf(s, "%pF\n", op_ptr); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * Prints an array of function pointer addresses in op_ptrs to the | ||
29 | * specified seq_file | ||
30 | */ | ||
31 | static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops) | ||
32 | { | ||
33 | int i; | ||
34 | |||
35 | for (i = 0; i < num_ops; i++) | ||
36 | __hal_print_op(s, op_ptrs[i]); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * Show file operation, which generates content of the file once. Prints a list | ||
41 | * of gpu operations as defined by gops and the corresponding function pointer | ||
42 | * destination addresses. Relies on no compiler reordering of struct fields and | ||
43 | * assumption that all members are function pointers. | ||
44 | */ | ||
45 | static int __hal_show(struct seq_file *s, void *unused) | ||
46 | { | ||
47 | struct gpu_ops *gops = s->private; | ||
48 | |||
49 | __hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *)); | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static int __hal_open(struct inode *inode, struct file *file) | ||
55 | { | ||
56 | return single_open(file, __hal_show, inode->i_private); | ||
57 | } | ||
58 | |||
59 | static const struct file_operations __hal_fops = { | ||
60 | .open = __hal_open, | ||
61 | .read = seq_read, | ||
62 | .llseek = seq_lseek, | ||
63 | .release = single_release, | ||
64 | }; | ||
65 | |||
66 | void nvgpu_hal_debugfs_fini(struct gk20a *g) | ||
67 | { | ||
68 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
69 | |||
70 | if (!(l->debugfs_hal == NULL)) | ||
71 | debugfs_remove_recursive(l->debugfs_hal); | ||
72 | } | ||
73 | |||
74 | void nvgpu_hal_debugfs_init(struct gk20a *g) | ||
75 | { | ||
76 | struct dentry *d; | ||
77 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
78 | |||
79 | if (!l->debugfs) | ||
80 | return; | ||
81 | l->debugfs_hal = debugfs_create_dir("hal", l->debugfs); | ||
82 | if (IS_ERR_OR_NULL(l->debugfs_hal)) { | ||
83 | l->debugfs_hal = NULL; | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | /* Pass along reference to the gpu_ops struct as private data */ | ||
88 | d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal, | ||
89 | &g->ops, &__hal_fops); | ||
90 | if (!d) { | ||
91 | nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__); | ||
92 | debugfs_remove_recursive(l->debugfs_hal); | ||
93 | return; | ||
94 | } | ||
95 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_hal.h b/drivers/gpu/nvgpu/os/linux/debug_hal.h new file mode 100644 index 00000000..eee6f234 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_hal.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_HAL_H__ | ||
16 | #define __NVGPU_DEBUG_HAL_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void nvgpu_hal_debugfs_fini(struct gk20a *g); | ||
20 | void nvgpu_hal_debugfs_init(struct gk20a *g); | ||
21 | |||
22 | #endif /* __NVGPU_DEBUG_HAL_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.c b/drivers/gpu/nvgpu/os/linux/debug_kmem.c new file mode 100644 index 00000000..a0c7d47d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.c | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/debugfs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | #include "debug_kmem.h" | ||
19 | #include "kmem_priv.h" | ||
20 | |||
21 | /** | ||
22 | * to_human_readable_bytes - Determine suffix for passed size. | ||
23 | * | ||
24 | * @bytes - Number of bytes to generate a suffix for. | ||
25 | * @hr_bytes [out] - The human readable number of bytes. | ||
26 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
27 | * | ||
28 | * Computes a human readable decomposition of the passed number of bytes. The | ||
29 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
30 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
31 | * ranges: | ||
32 | * | ||
33 | * 0 - 1023 B | ||
34 | * 1 - 1023 KB | ||
35 | * 1 - 1023 MB | ||
36 | * 1 - 1023 GB | ||
37 | * 1 - 1023 TB | ||
38 | * 1 - ... PB | ||
39 | */ | ||
40 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
41 | const char **hr_suffix) | ||
42 | { | ||
43 | static const char *suffixes[] = | ||
44 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
45 | |||
46 | u64 suffix_ind = 0; | ||
47 | |||
48 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
49 | bytes >>= 10; | ||
50 | suffix_ind++; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Handle case where bytes > 1023PB. | ||
55 | */ | ||
56 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
57 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
58 | |||
59 | *hr_bytes = bytes; | ||
60 | *hr_suffix = suffixes[suffix_ind]; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * print_hr_bytes - Print human readable bytes | ||
65 | * | ||
66 | * @s - A seq_file to print to. May be NULL. | ||
67 | * @msg - A message to print before the bytes. | ||
68 | * @bytes - Number of bytes. | ||
69 | * | ||
70 | * Print @msg followed by the human readable decomposition of the passed number | ||
71 | * of bytes. | ||
72 | * | ||
73 | * If @s is NULL then this prints will be made to the kernel log. | ||
74 | */ | ||
75 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
76 | { | ||
77 | u64 hr_bytes; | ||
78 | const char *hr_suffix; | ||
79 | |||
80 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
81 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * print_histogram - Build a histogram of the memory usage. | ||
86 | * | ||
87 | * @tracker The tracking to pull data from. | ||
88 | * @s A seq_file to dump info into. | ||
89 | */ | ||
90 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
91 | struct seq_file *s) | ||
92 | { | ||
93 | int i; | ||
94 | u64 pot_min, pot_max; | ||
95 | u64 nr_buckets; | ||
96 | unsigned int *buckets; | ||
97 | unsigned int total_allocs; | ||
98 | struct nvgpu_rbtree_node *node; | ||
99 | static const char histogram_line[] = | ||
100 | "++++++++++++++++++++++++++++++++++++++++"; | ||
101 | |||
102 | /* | ||
103 | * pot_min is essentially a round down to the nearest power of 2. This | ||
104 | * is the start of the histogram. pot_max is just a round up to the | ||
105 | * nearest power of two. Each histogram bucket is one power of two so | ||
106 | * the histogram buckets are exponential. | ||
107 | */ | ||
108 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
109 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
110 | |||
111 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
112 | |||
113 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
114 | if (!buckets) { | ||
115 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
116 | return; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Iterate across all of the allocs and determine what bucket they | ||
121 | * should go in. Round the size down to the nearest power of two to | ||
122 | * find the right bucket. | ||
123 | */ | ||
124 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
125 | while (node) { | ||
126 | int b; | ||
127 | u64 bucket_min; | ||
128 | struct nvgpu_mem_alloc *alloc = | ||
129 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
130 | |||
131 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
132 | if (bucket_min < tracker->min_alloc) | ||
133 | bucket_min = tracker->min_alloc; | ||
134 | |||
135 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
136 | |||
137 | /* | ||
138 | * Handle the one case were there's an alloc exactly as big as | ||
139 | * the maximum bucket size of the largest bucket. Most of the | ||
140 | * buckets have an inclusive minimum and exclusive maximum. But | ||
141 | * the largest bucket needs to have an _inclusive_ maximum as | ||
142 | * well. | ||
143 | */ | ||
144 | if (b == (int)nr_buckets) | ||
145 | b--; | ||
146 | |||
147 | buckets[b]++; | ||
148 | |||
149 | nvgpu_rbtree_enum_next(&node, node); | ||
150 | } | ||
151 | |||
152 | total_allocs = 0; | ||
153 | for (i = 0; i < (int)nr_buckets; i++) | ||
154 | total_allocs += buckets[i]; | ||
155 | |||
156 | __pstat(s, "Alloc histogram:\n"); | ||
157 | |||
158 | /* | ||
159 | * Actually compute the histogram lines. | ||
160 | */ | ||
161 | for (i = 0; i < (int)nr_buckets; i++) { | ||
162 | char this_line[sizeof(histogram_line) + 1]; | ||
163 | u64 line_length; | ||
164 | u64 hr_bytes; | ||
165 | const char *hr_suffix; | ||
166 | |||
167 | memset(this_line, 0, sizeof(this_line)); | ||
168 | |||
169 | /* | ||
170 | * Compute the normalized line length. Cant use floating point | ||
171 | * so we will just multiply everything by 1000 and use fixed | ||
172 | * point. | ||
173 | */ | ||
174 | line_length = (1000 * buckets[i]) / total_allocs; | ||
175 | line_length *= sizeof(histogram_line); | ||
176 | line_length /= 1000; | ||
177 | |||
178 | memset(this_line, '+', line_length); | ||
179 | |||
180 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
181 | &hr_bytes, &hr_suffix); | ||
182 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
183 | hr_bytes, hr_bytes << 1, | ||
184 | hr_suffix, buckets[i], this_line); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /** | ||
189 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
190 | * | ||
191 | * @tracker The tracking to pull data from. | ||
192 | * @s A seq_file to dump info into. | ||
193 | * | ||
194 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
195 | * used with @s. Otherwise the stats are pr_info()ed. | ||
196 | */ | ||
197 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
198 | struct seq_file *s) | ||
199 | { | ||
200 | nvgpu_lock_tracker(tracker); | ||
201 | |||
202 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
203 | |||
204 | __pstat(s, "Basic Stats:\n"); | ||
205 | __pstat(s, " Number of allocs %lld\n", | ||
206 | tracker->nr_allocs); | ||
207 | __pstat(s, " Number of frees %lld\n", | ||
208 | tracker->nr_frees); | ||
209 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
210 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
211 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
212 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
213 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
214 | tracker->bytes_alloced_real); | ||
215 | print_hr_bytes(s, " Bytes freed (real) ", | ||
216 | tracker->bytes_freed_real); | ||
217 | __pstat(s, "\n"); | ||
218 | |||
219 | print_histogram(tracker, s); | ||
220 | |||
221 | nvgpu_unlock_tracker(tracker); | ||
222 | } | ||
223 | |||
224 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
225 | { | ||
226 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
227 | |||
228 | nvgpu_kmem_print_stats(tracker, s); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
234 | { | ||
235 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
236 | } | ||
237 | |||
238 | static const struct file_operations __kmem_tracking_fops = { | ||
239 | .open = __kmem_tracking_open, | ||
240 | .read = seq_read, | ||
241 | .llseek = seq_lseek, | ||
242 | .release = single_release, | ||
243 | }; | ||
244 | |||
245 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
246 | struct nvgpu_mem_alloc_tracker *tracker, | ||
247 | struct seq_file *s) | ||
248 | { | ||
249 | struct nvgpu_rbtree_node *node; | ||
250 | |||
251 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
252 | while (node) { | ||
253 | struct nvgpu_mem_alloc *alloc = | ||
254 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
255 | |||
256 | kmem_print_mem_alloc(g, alloc, s); | ||
257 | |||
258 | nvgpu_rbtree_enum_next(&node, node); | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
265 | { | ||
266 | struct gk20a *g = s->private; | ||
267 | |||
268 | nvgpu_lock_tracker(g->vmallocs); | ||
269 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
270 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
271 | seq_puts(s, "\n"); | ||
272 | nvgpu_unlock_tracker(g->vmallocs); | ||
273 | |||
274 | nvgpu_lock_tracker(g->kmallocs); | ||
275 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
276 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
277 | nvgpu_unlock_tracker(g->kmallocs); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
283 | { | ||
284 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
285 | } | ||
286 | |||
287 | static const struct file_operations __kmem_traces_fops = { | ||
288 | .open = __kmem_traces_open, | ||
289 | .read = seq_read, | ||
290 | .llseek = seq_lseek, | ||
291 | .release = single_release, | ||
292 | }; | ||
293 | |||
294 | void nvgpu_kmem_debugfs_init(struct gk20a *g) | ||
295 | { | ||
296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
297 | struct dentry *node; | ||
298 | |||
299 | l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs); | ||
300 | if (IS_ERR_OR_NULL(l->debugfs_kmem)) | ||
301 | return; | ||
302 | |||
303 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
304 | l->debugfs_kmem, | ||
305 | g->vmallocs, &__kmem_tracking_fops); | ||
306 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
307 | l->debugfs_kmem, | ||
308 | g->kmallocs, &__kmem_tracking_fops); | ||
309 | node = debugfs_create_file("traces", S_IRUGO, | ||
310 | l->debugfs_kmem, | ||
311 | g, &__kmem_traces_fops); | ||
312 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_kmem.h b/drivers/gpu/nvgpu/os/linux/debug_kmem.h new file mode 100644 index 00000000..44322b53 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_kmem.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_KMEM_H__ | ||
16 | #define __NVGPU_DEBUG_KMEM_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
20 | void nvgpu_kmem_debugfs_init(struct gk20a *g); | ||
21 | #endif | ||
22 | |||
23 | #endif /* __NVGPU_DEBUG_KMEM_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.c b/drivers/gpu/nvgpu/os/linux/debug_pmu.c new file mode 100644 index 00000000..f4ed992d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.c | |||
@@ -0,0 +1,481 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <nvgpu/enabled.h> | ||
16 | #include "debug_pmu.h" | ||
17 | #include "os_linux.h" | ||
18 | |||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | static int lpwr_debug_show(struct seq_file *s, void *data) | ||
24 | { | ||
25 | struct gk20a *g = s->private; | ||
26 | |||
27 | if (g->ops.pmu.pmu_pg_engines_feature_list && | ||
28 | g->ops.pmu.pmu_pg_engines_feature_list(g, | ||
29 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != | ||
30 | NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { | ||
31 | seq_printf(s, "PSTATE: %u\n" | ||
32 | "RPPG Enabled: %u\n" | ||
33 | "RPPG ref count: %u\n" | ||
34 | "RPPG state: %u\n" | ||
35 | "MSCG Enabled: %u\n" | ||
36 | "MSCG pstate state: %u\n" | ||
37 | "MSCG transition state: %u\n", | ||
38 | g->ops.clk_arb.get_current_pstate(g), | ||
39 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
40 | g->pmu.elpg_stat, g->mscg_enabled, | ||
41 | g->pmu.mscg_stat, g->pmu.mscg_transition_state); | ||
42 | |||
43 | } else | ||
44 | seq_printf(s, "ELPG Enabled: %u\n" | ||
45 | "ELPG ref count: %u\n" | ||
46 | "ELPG state: %u\n", | ||
47 | g->elpg_enabled, g->pmu.elpg_refcnt, | ||
48 | g->pmu.elpg_stat); | ||
49 | |||
50 | return 0; | ||
51 | |||
52 | } | ||
53 | |||
54 | static int lpwr_debug_open(struct inode *inode, struct file *file) | ||
55 | { | ||
56 | return single_open(file, lpwr_debug_show, inode->i_private); | ||
57 | } | ||
58 | |||
59 | static const struct file_operations lpwr_debug_fops = { | ||
60 | .open = lpwr_debug_open, | ||
61 | .read = seq_read, | ||
62 | .llseek = seq_lseek, | ||
63 | .release = single_release, | ||
64 | }; | ||
65 | |||
66 | static int mscg_stat_show(struct seq_file *s, void *data) | ||
67 | { | ||
68 | struct gk20a *g = s->private; | ||
69 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
70 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
71 | int err; | ||
72 | |||
73 | /* Don't unnecessarily power on the device */ | ||
74 | if (g->power_on) { | ||
75 | err = gk20a_busy(g); | ||
76 | if (err) | ||
77 | return err; | ||
78 | |||
79 | nvgpu_pmu_get_pg_stats(g, | ||
80 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
81 | gk20a_idle(g); | ||
82 | } | ||
83 | total_ingating = g->pg_ingating_time_us + | ||
84 | (u64)pg_stat_data.ingating_time; | ||
85 | total_ungating = g->pg_ungating_time_us + | ||
86 | (u64)pg_stat_data.ungating_time; | ||
87 | |||
88 | divisor = total_ingating + total_ungating; | ||
89 | |||
90 | /* We compute the residency on a scale of 1000 */ | ||
91 | dividend = total_ingating * 1000; | ||
92 | |||
93 | if (divisor) | ||
94 | residency = div64_u64(dividend, divisor); | ||
95 | else | ||
96 | residency = 0; | ||
97 | |||
98 | seq_printf(s, | ||
99 | "Time in MSCG: %llu us\n" | ||
100 | "Time out of MSCG: %llu us\n" | ||
101 | "MSCG residency ratio: %llu\n" | ||
102 | "MSCG Entry Count: %u\n" | ||
103 | "MSCG Avg Entry latency %u\n" | ||
104 | "MSCG Avg Exit latency %u\n", | ||
105 | total_ingating, total_ungating, | ||
106 | residency, pg_stat_data.gating_cnt, | ||
107 | pg_stat_data.avg_entry_latency_us, | ||
108 | pg_stat_data.avg_exit_latency_us); | ||
109 | return 0; | ||
110 | |||
111 | } | ||
112 | |||
113 | static int mscg_stat_open(struct inode *inode, struct file *file) | ||
114 | { | ||
115 | return single_open(file, mscg_stat_show, inode->i_private); | ||
116 | } | ||
117 | |||
118 | static const struct file_operations mscg_stat_fops = { | ||
119 | .open = mscg_stat_open, | ||
120 | .read = seq_read, | ||
121 | .llseek = seq_lseek, | ||
122 | .release = single_release, | ||
123 | }; | ||
124 | |||
125 | static int mscg_transitions_show(struct seq_file *s, void *data) | ||
126 | { | ||
127 | struct gk20a *g = s->private; | ||
128 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
129 | u32 total_gating_cnt; | ||
130 | int err; | ||
131 | |||
132 | if (g->power_on) { | ||
133 | err = gk20a_busy(g); | ||
134 | if (err) | ||
135 | return err; | ||
136 | |||
137 | nvgpu_pmu_get_pg_stats(g, | ||
138 | PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data); | ||
139 | gk20a_idle(g); | ||
140 | } | ||
141 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
142 | |||
143 | seq_printf(s, "%u\n", total_gating_cnt); | ||
144 | return 0; | ||
145 | |||
146 | } | ||
147 | |||
148 | static int mscg_transitions_open(struct inode *inode, struct file *file) | ||
149 | { | ||
150 | return single_open(file, mscg_transitions_show, inode->i_private); | ||
151 | } | ||
152 | |||
153 | static const struct file_operations mscg_transitions_fops = { | ||
154 | .open = mscg_transitions_open, | ||
155 | .read = seq_read, | ||
156 | .llseek = seq_lseek, | ||
157 | .release = single_release, | ||
158 | }; | ||
159 | |||
160 | static int elpg_stat_show(struct seq_file *s, void *data) | ||
161 | { | ||
162 | struct gk20a *g = s->private; | ||
163 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
164 | u64 total_ingating, total_ungating, residency, divisor, dividend; | ||
165 | int err; | ||
166 | |||
167 | /* Don't unnecessarily power on the device */ | ||
168 | if (g->power_on) { | ||
169 | err = gk20a_busy(g); | ||
170 | if (err) | ||
171 | return err; | ||
172 | |||
173 | nvgpu_pmu_get_pg_stats(g, | ||
174 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
175 | gk20a_idle(g); | ||
176 | } | ||
177 | total_ingating = g->pg_ingating_time_us + | ||
178 | (u64)pg_stat_data.ingating_time; | ||
179 | total_ungating = g->pg_ungating_time_us + | ||
180 | (u64)pg_stat_data.ungating_time; | ||
181 | divisor = total_ingating + total_ungating; | ||
182 | |||
183 | /* We compute the residency on a scale of 1000 */ | ||
184 | dividend = total_ingating * 1000; | ||
185 | |||
186 | if (divisor) | ||
187 | residency = div64_u64(dividend, divisor); | ||
188 | else | ||
189 | residency = 0; | ||
190 | |||
191 | seq_printf(s, | ||
192 | "Time in ELPG: %llu us\n" | ||
193 | "Time out of ELPG: %llu us\n" | ||
194 | "ELPG residency ratio: %llu\n" | ||
195 | "ELPG Entry Count: %u\n" | ||
196 | "ELPG Avg Entry latency %u us\n" | ||
197 | "ELPG Avg Exit latency %u us\n", | ||
198 | total_ingating, total_ungating, | ||
199 | residency, pg_stat_data.gating_cnt, | ||
200 | pg_stat_data.avg_entry_latency_us, | ||
201 | pg_stat_data.avg_exit_latency_us); | ||
202 | return 0; | ||
203 | |||
204 | } | ||
205 | |||
206 | static int elpg_stat_open(struct inode *inode, struct file *file) | ||
207 | { | ||
208 | return single_open(file, elpg_stat_show, inode->i_private); | ||
209 | } | ||
210 | |||
211 | static const struct file_operations elpg_stat_fops = { | ||
212 | .open = elpg_stat_open, | ||
213 | .read = seq_read, | ||
214 | .llseek = seq_lseek, | ||
215 | .release = single_release, | ||
216 | }; | ||
217 | |||
218 | static int elpg_transitions_show(struct seq_file *s, void *data) | ||
219 | { | ||
220 | struct gk20a *g = s->private; | ||
221 | struct pmu_pg_stats_data pg_stat_data = { 0 }; | ||
222 | u32 total_gating_cnt; | ||
223 | int err; | ||
224 | |||
225 | if (g->power_on) { | ||
226 | err = gk20a_busy(g); | ||
227 | if (err) | ||
228 | return err; | ||
229 | |||
230 | nvgpu_pmu_get_pg_stats(g, | ||
231 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); | ||
232 | gk20a_idle(g); | ||
233 | } | ||
234 | total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt; | ||
235 | |||
236 | seq_printf(s, "%u\n", total_gating_cnt); | ||
237 | return 0; | ||
238 | |||
239 | } | ||
240 | |||
241 | static int elpg_transitions_open(struct inode *inode, struct file *file) | ||
242 | { | ||
243 | return single_open(file, elpg_transitions_show, inode->i_private); | ||
244 | } | ||
245 | |||
246 | static const struct file_operations elpg_transitions_fops = { | ||
247 | .open = elpg_transitions_open, | ||
248 | .read = seq_read, | ||
249 | .llseek = seq_lseek, | ||
250 | .release = single_release, | ||
251 | }; | ||
252 | |||
253 | static int falc_trace_show(struct seq_file *s, void *data) | ||
254 | { | ||
255 | struct gk20a *g = s->private; | ||
256 | struct nvgpu_pmu *pmu = &g->pmu; | ||
257 | u32 i = 0, j = 0, k, l, m; | ||
258 | char part_str[40]; | ||
259 | void *tracebuffer; | ||
260 | char *trace; | ||
261 | u32 *trace1; | ||
262 | |||
263 | /* allocate system memory to copy pmu trace buffer */ | ||
264 | tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE); | ||
265 | if (tracebuffer == NULL) | ||
266 | return -ENOMEM; | ||
267 | |||
268 | /* read pmu traces into system memory buffer */ | ||
269 | nvgpu_mem_rd_n(g, &pmu->trace_buf, | ||
270 | 0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE); | ||
271 | |||
272 | trace = (char *)tracebuffer; | ||
273 | trace1 = (u32 *)tracebuffer; | ||
274 | |||
275 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { | ||
276 | for (j = 0; j < 0x40; j++) | ||
277 | if (trace1[(i / 4) + j]) | ||
278 | break; | ||
279 | if (j == 0x40) | ||
280 | break; | ||
281 | seq_printf(s, "Index %x: ", trace1[(i / 4)]); | ||
282 | l = 0; | ||
283 | m = 0; | ||
284 | while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) { | ||
285 | if (k >= 40) | ||
286 | break; | ||
287 | strncpy(part_str, (trace+i+20+m), k); | ||
288 | part_str[k] = 0; | ||
289 | seq_printf(s, "%s0x%x", part_str, | ||
290 | trace1[(i / 4) + 1 + l]); | ||
291 | l++; | ||
292 | m += k + 2; | ||
293 | } | ||
294 | seq_printf(s, "%s", (trace+i+20+m)); | ||
295 | } | ||
296 | |||
297 | nvgpu_kfree(g, tracebuffer); | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int falc_trace_open(struct inode *inode, struct file *file) | ||
302 | { | ||
303 | return single_open(file, falc_trace_show, inode->i_private); | ||
304 | } | ||
305 | |||
306 | static const struct file_operations falc_trace_fops = { | ||
307 | .open = falc_trace_open, | ||
308 | .read = seq_read, | ||
309 | .llseek = seq_lseek, | ||
310 | .release = single_release, | ||
311 | }; | ||
312 | |||
313 | static int perfmon_events_enable_show(struct seq_file *s, void *data) | ||
314 | { | ||
315 | struct gk20a *g = s->private; | ||
316 | |||
317 | seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); | ||
318 | return 0; | ||
319 | |||
320 | } | ||
321 | |||
322 | static int perfmon_events_enable_open(struct inode *inode, struct file *file) | ||
323 | { | ||
324 | return single_open(file, perfmon_events_enable_show, inode->i_private); | ||
325 | } | ||
326 | |||
327 | static ssize_t perfmon_events_enable_write(struct file *file, | ||
328 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
329 | { | ||
330 | struct seq_file *s = file->private_data; | ||
331 | struct gk20a *g = s->private; | ||
332 | unsigned long val = 0; | ||
333 | char buf[40]; | ||
334 | int buf_size; | ||
335 | int err; | ||
336 | |||
337 | memset(buf, 0, sizeof(buf)); | ||
338 | buf_size = min(count, (sizeof(buf)-1)); | ||
339 | |||
340 | if (copy_from_user(buf, userbuf, buf_size)) | ||
341 | return -EFAULT; | ||
342 | |||
343 | if (kstrtoul(buf, 10, &val) < 0) | ||
344 | return -EINVAL; | ||
345 | |||
346 | /* Don't turn on gk20a unnecessarily */ | ||
347 | if (g->power_on) { | ||
348 | err = gk20a_busy(g); | ||
349 | if (err) | ||
350 | return err; | ||
351 | |||
352 | if (val && !g->pmu.perfmon_sampling_enabled && | ||
353 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
354 | g->pmu.perfmon_sampling_enabled = true; | ||
355 | g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); | ||
356 | } else if (!val && g->pmu.perfmon_sampling_enabled && | ||
357 | nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { | ||
358 | g->pmu.perfmon_sampling_enabled = false; | ||
359 | g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); | ||
360 | } | ||
361 | gk20a_idle(g); | ||
362 | } else { | ||
363 | g->pmu.perfmon_sampling_enabled = val ? true : false; | ||
364 | } | ||
365 | |||
366 | return count; | ||
367 | } | ||
368 | |||
369 | static const struct file_operations perfmon_events_enable_fops = { | ||
370 | .open = perfmon_events_enable_open, | ||
371 | .read = seq_read, | ||
372 | .write = perfmon_events_enable_write, | ||
373 | .llseek = seq_lseek, | ||
374 | .release = single_release, | ||
375 | }; | ||
376 | |||
377 | static int perfmon_events_count_show(struct seq_file *s, void *data) | ||
378 | { | ||
379 | struct gk20a *g = s->private; | ||
380 | |||
381 | seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); | ||
382 | return 0; | ||
383 | |||
384 | } | ||
385 | |||
386 | static int perfmon_events_count_open(struct inode *inode, struct file *file) | ||
387 | { | ||
388 | return single_open(file, perfmon_events_count_show, inode->i_private); | ||
389 | } | ||
390 | |||
391 | static const struct file_operations perfmon_events_count_fops = { | ||
392 | .open = perfmon_events_count_open, | ||
393 | .read = seq_read, | ||
394 | .llseek = seq_lseek, | ||
395 | .release = single_release, | ||
396 | }; | ||
397 | |||
398 | static int security_show(struct seq_file *s, void *data) | ||
399 | { | ||
400 | struct gk20a *g = s->private; | ||
401 | |||
402 | seq_printf(s, "%d\n", g->pmu.pmu_mode); | ||
403 | return 0; | ||
404 | |||
405 | } | ||
406 | |||
407 | static int security_open(struct inode *inode, struct file *file) | ||
408 | { | ||
409 | return single_open(file, security_show, inode->i_private); | ||
410 | } | ||
411 | |||
412 | static const struct file_operations security_fops = { | ||
413 | .open = security_open, | ||
414 | .read = seq_read, | ||
415 | .llseek = seq_lseek, | ||
416 | .release = single_release, | ||
417 | }; | ||
418 | |||
419 | int gk20a_pmu_debugfs_init(struct gk20a *g) | ||
420 | { | ||
421 | struct dentry *d; | ||
422 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
423 | |||
424 | d = debugfs_create_file( | ||
425 | "lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
426 | &lpwr_debug_fops); | ||
427 | if (!d) | ||
428 | goto err_out; | ||
429 | |||
430 | d = debugfs_create_file( | ||
431 | "mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
432 | &mscg_stat_fops); | ||
433 | if (!d) | ||
434 | goto err_out; | ||
435 | |||
436 | d = debugfs_create_file( | ||
437 | "mscg_transitions", S_IRUGO, l->debugfs, g, | ||
438 | &mscg_transitions_fops); | ||
439 | if (!d) | ||
440 | goto err_out; | ||
441 | |||
442 | d = debugfs_create_file( | ||
443 | "elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g, | ||
444 | &elpg_stat_fops); | ||
445 | if (!d) | ||
446 | goto err_out; | ||
447 | |||
448 | d = debugfs_create_file( | ||
449 | "elpg_transitions", S_IRUGO, l->debugfs, g, | ||
450 | &elpg_transitions_fops); | ||
451 | if (!d) | ||
452 | goto err_out; | ||
453 | |||
454 | d = debugfs_create_file( | ||
455 | "falc_trace", S_IRUGO, l->debugfs, g, | ||
456 | &falc_trace_fops); | ||
457 | if (!d) | ||
458 | goto err_out; | ||
459 | |||
460 | d = debugfs_create_file( | ||
461 | "perfmon_events_enable", S_IRUGO, l->debugfs, g, | ||
462 | &perfmon_events_enable_fops); | ||
463 | if (!d) | ||
464 | goto err_out; | ||
465 | |||
466 | d = debugfs_create_file( | ||
467 | "perfmon_events_count", S_IRUGO, l->debugfs, g, | ||
468 | &perfmon_events_count_fops); | ||
469 | if (!d) | ||
470 | goto err_out; | ||
471 | |||
472 | d = debugfs_create_file( | ||
473 | "pmu_security", S_IRUGO, l->debugfs, g, | ||
474 | &security_fops); | ||
475 | if (!d) | ||
476 | goto err_out; | ||
477 | return 0; | ||
478 | err_out: | ||
479 | pr_err("%s: Failed to make debugfs node\n", __func__); | ||
480 | return -ENOMEM; | ||
481 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.h b/drivers/gpu/nvgpu/os/linux/debug_pmu.h new file mode 100644 index 00000000..c4e3243d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_PMU_H__ | ||
16 | #define __NVGPU_DEBUG_PMU_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int gk20a_pmu_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_PMU_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.c b/drivers/gpu/nvgpu/os/linux/debug_sched.c new file mode 100644 index 00000000..5b7cbddf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_sched.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include "debug_sched.h" | ||
16 | #include "os_linux.h" | ||
17 | |||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused) | ||
22 | { | ||
23 | struct gk20a *g = s->private; | ||
24 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
25 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
26 | bool sched_busy = true; | ||
27 | |||
28 | int n = sched->bitmap_size / sizeof(u64); | ||
29 | int i; | ||
30 | int err; | ||
31 | |||
32 | err = gk20a_busy(g); | ||
33 | if (err) | ||
34 | return err; | ||
35 | |||
36 | if (nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
37 | sched_busy = false; | ||
38 | nvgpu_mutex_release(&sched->busy_lock); | ||
39 | } | ||
40 | |||
41 | seq_printf(s, "control_locked=%d\n", sched->control_locked); | ||
42 | seq_printf(s, "busy=%d\n", sched_busy); | ||
43 | seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size); | ||
44 | |||
45 | nvgpu_mutex_acquire(&sched->status_lock); | ||
46 | |||
47 | seq_puts(s, "active_tsg_bitmap\n"); | ||
48 | for (i = 0; i < n; i++) | ||
49 | seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]); | ||
50 | |||
51 | seq_puts(s, "recent_tsg_bitmap\n"); | ||
52 | for (i = 0; i < n; i++) | ||
53 | seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]); | ||
54 | |||
55 | nvgpu_mutex_release(&sched->status_lock); | ||
56 | |||
57 | gk20a_idle(g); | ||
58 | |||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file) | ||
63 | { | ||
64 | return single_open(file, gk20a_sched_debugfs_show, inode->i_private); | ||
65 | } | ||
66 | |||
67 | static const struct file_operations gk20a_sched_debugfs_fops = { | ||
68 | .open = gk20a_sched_debugfs_open, | ||
69 | .read = seq_read, | ||
70 | .llseek = seq_lseek, | ||
71 | .release = single_release, | ||
72 | }; | ||
73 | |||
74 | void gk20a_sched_debugfs_init(struct gk20a *g) | ||
75 | { | ||
76 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
77 | |||
78 | debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs, | ||
79 | g, &gk20a_sched_debugfs_fops); | ||
80 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_sched.h b/drivers/gpu/nvgpu/os/linux/debug_sched.h new file mode 100644 index 00000000..34a8f55f --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_sched.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_SCHED_H__ | ||
16 | #define __NVGPU_DEBUG_SCHED_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | void gk20a_sched_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_SCHED_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.c b/drivers/gpu/nvgpu/os/linux/debug_xve.c new file mode 100644 index 00000000..743702a2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_xve.c | |||
@@ -0,0 +1,176 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <nvgpu/types.h> | ||
16 | #include <nvgpu/xve.h> | ||
17 | |||
18 | #include "debug_xve.h" | ||
19 | #include "os_linux.h" | ||
20 | |||
21 | #include <linux/debugfs.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | |||
24 | static ssize_t xve_link_speed_write(struct file *filp, | ||
25 | const char __user *buff, | ||
26 | size_t len, loff_t *off) | ||
27 | { | ||
28 | struct gk20a *g = ((struct seq_file *)filp->private_data)->private; | ||
29 | char kbuff[16]; | ||
30 | u32 buff_size, check_len; | ||
31 | u32 link_speed = 0; | ||
32 | int ret; | ||
33 | |||
34 | buff_size = min_t(size_t, 16, len); | ||
35 | |||
36 | memset(kbuff, 0, 16); | ||
37 | if (copy_from_user(kbuff, buff, buff_size)) | ||
38 | return -EFAULT; | ||
39 | |||
40 | check_len = strlen("Gen1"); | ||
41 | if (strncmp(kbuff, "Gen1", check_len) == 0) | ||
42 | link_speed = GPU_XVE_SPEED_2P5; | ||
43 | else if (strncmp(kbuff, "Gen2", check_len) == 0) | ||
44 | link_speed = GPU_XVE_SPEED_5P0; | ||
45 | else if (strncmp(kbuff, "Gen3", check_len) == 0) | ||
46 | link_speed = GPU_XVE_SPEED_8P0; | ||
47 | else | ||
48 | nvgpu_err(g, "%s: Unknown PCIe speed: %s", | ||
49 | __func__, kbuff); | ||
50 | |||
51 | if (!link_speed) | ||
52 | return -EINVAL; | ||
53 | |||
54 | /* Brief pause... To help rate limit this. */ | ||
55 | nvgpu_msleep(250); | ||
56 | |||
57 | /* | ||
58 | * And actually set the speed. Yay. | ||
59 | */ | ||
60 | ret = g->ops.xve.set_speed(g, link_speed); | ||
61 | if (ret) | ||
62 | return ret; | ||
63 | |||
64 | return len; | ||
65 | } | ||
66 | |||
67 | static int xve_link_speed_show(struct seq_file *s, void *unused) | ||
68 | { | ||
69 | struct gk20a *g = s->private; | ||
70 | u32 speed; | ||
71 | int err; | ||
72 | |||
73 | err = g->ops.xve.get_speed(g, &speed); | ||
74 | if (err) | ||
75 | return err; | ||
76 | |||
77 | seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed)); | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static int xve_link_speed_open(struct inode *inode, struct file *file) | ||
83 | { | ||
84 | return single_open(file, xve_link_speed_show, inode->i_private); | ||
85 | } | ||
86 | |||
87 | static const struct file_operations xve_link_speed_fops = { | ||
88 | .open = xve_link_speed_open, | ||
89 | .read = seq_read, | ||
90 | .write = xve_link_speed_write, | ||
91 | .llseek = seq_lseek, | ||
92 | .release = single_release, | ||
93 | }; | ||
94 | |||
95 | static int xve_available_speeds_show(struct seq_file *s, void *unused) | ||
96 | { | ||
97 | struct gk20a *g = s->private; | ||
98 | u32 available_speeds; | ||
99 | |||
100 | g->ops.xve.available_speeds(g, &available_speeds); | ||
101 | |||
102 | seq_puts(s, "Available PCIe bus speeds:\n"); | ||
103 | if (available_speeds & GPU_XVE_SPEED_2P5) | ||
104 | seq_puts(s, " Gen1\n"); | ||
105 | if (available_speeds & GPU_XVE_SPEED_5P0) | ||
106 | seq_puts(s, " Gen2\n"); | ||
107 | if (available_speeds & GPU_XVE_SPEED_8P0) | ||
108 | seq_puts(s, " Gen3\n"); | ||
109 | |||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int xve_available_speeds_open(struct inode *inode, struct file *file) | ||
114 | { | ||
115 | return single_open(file, xve_available_speeds_show, inode->i_private); | ||
116 | } | ||
117 | |||
118 | static const struct file_operations xve_available_speeds_fops = { | ||
119 | .open = xve_available_speeds_open, | ||
120 | .read = seq_read, | ||
121 | .llseek = seq_lseek, | ||
122 | .release = single_release, | ||
123 | }; | ||
124 | |||
125 | static int xve_link_control_status_show(struct seq_file *s, void *unused) | ||
126 | { | ||
127 | struct gk20a *g = s->private; | ||
128 | u32 link_status; | ||
129 | |||
130 | link_status = g->ops.xve.get_link_control_status(g); | ||
131 | seq_printf(s, "0x%08x\n", link_status); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int xve_link_control_status_open(struct inode *inode, struct file *file) | ||
137 | { | ||
138 | return single_open(file, xve_link_control_status_show, inode->i_private); | ||
139 | } | ||
140 | |||
141 | static const struct file_operations xve_link_control_status_fops = { | ||
142 | .open = xve_link_control_status_open, | ||
143 | .read = seq_read, | ||
144 | .llseek = seq_lseek, | ||
145 | .release = single_release, | ||
146 | }; | ||
147 | |||
148 | int nvgpu_xve_debugfs_init(struct gk20a *g) | ||
149 | { | ||
150 | int err = -ENODEV; | ||
151 | |||
152 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
153 | struct dentry *gpu_root = l->debugfs; | ||
154 | |||
155 | l->debugfs_xve = debugfs_create_dir("xve", gpu_root); | ||
156 | if (IS_ERR_OR_NULL(l->debugfs_xve)) | ||
157 | goto fail; | ||
158 | |||
159 | /* | ||
160 | * These are just debug nodes. If they fail to get made it's not worth | ||
161 | * worrying the higher level SW. | ||
162 | */ | ||
163 | debugfs_create_file("link_speed", S_IRUGO, | ||
164 | l->debugfs_xve, g, | ||
165 | &xve_link_speed_fops); | ||
166 | debugfs_create_file("available_speeds", S_IRUGO, | ||
167 | l->debugfs_xve, g, | ||
168 | &xve_available_speeds_fops); | ||
169 | debugfs_create_file("link_control_status", S_IRUGO, | ||
170 | l->debugfs_xve, g, | ||
171 | &xve_link_control_status_fops); | ||
172 | |||
173 | err = 0; | ||
174 | fail: | ||
175 | return err; | ||
176 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/debug_xve.h b/drivers/gpu/nvgpu/os/linux/debug_xve.h new file mode 100644 index 00000000..f3b1ac54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/debug_xve.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __NVGPU_DEBUG_XVE_H__ | ||
16 | #define __NVGPU_DEBUG_XVE_H__ | ||
17 | |||
18 | struct gk20a; | ||
19 | int nvgpu_xve_debugfs_init(struct gk20a *g); | ||
20 | |||
21 | #endif /* __NVGPU_DEBUG_SVE_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/dma.c b/drivers/gpu/nvgpu/os/linux/dma.c new file mode 100644 index 00000000..f513dcd6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dma.c | |||
@@ -0,0 +1,694 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-mapping.h> | ||
18 | #include <linux/version.h> | ||
19 | |||
20 | #include <nvgpu/log.h> | ||
21 | #include <nvgpu/dma.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/bug.h> | ||
24 | #include <nvgpu/gmmu.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/enabled.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | |||
29 | #include <nvgpu/linux/dma.h> | ||
30 | #include <nvgpu/linux/vidmem.h> | ||
31 | |||
32 | #include "gk20a/gk20a.h" | ||
33 | |||
34 | #include "platform_gk20a.h" | ||
35 | #include "os_linux.h" | ||
36 | |||
37 | #ifdef __DMA_ATTRS_LONGS | ||
38 | #define NVGPU_DEFINE_DMA_ATTRS(x) \ | ||
39 | struct dma_attrs x = { \ | ||
40 | .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ | ||
41 | } | ||
42 | #define NVGPU_DMA_ATTR(attrs) &attrs | ||
43 | #else | ||
44 | #define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0 | ||
45 | #define NVGPU_DMA_ATTR(attrs) attrs | ||
46 | #endif | ||
47 | |||
48 | /* | ||
49 | * Enough to hold all the possible flags in string form. When a new flag is | ||
50 | * added it must be added here as well!! | ||
51 | */ | ||
52 | #define NVGPU_DMA_STR_SIZE \ | ||
53 | sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS") | ||
54 | |||
55 | /* | ||
56 | * The returned string is kmalloc()ed here but must be freed by the caller. | ||
57 | */ | ||
58 | static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags) | ||
59 | { | ||
60 | char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE); | ||
61 | int bytes_available = NVGPU_DMA_STR_SIZE; | ||
62 | |||
63 | /* | ||
64 | * Return the empty buffer if there's no flags. Makes it easier on the | ||
65 | * calling code to just print it instead of any if (NULL) type logic. | ||
66 | */ | ||
67 | if (!flags) | ||
68 | return buf; | ||
69 | |||
70 | #define APPEND_FLAG(flag, str_flag) \ | ||
71 | do { \ | ||
72 | if (flags & flag) { \ | ||
73 | strncat(buf, str_flag, bytes_available); \ | ||
74 | bytes_available -= strlen(str_flag); \ | ||
75 | } \ | ||
76 | } while (0) | ||
77 | |||
78 | APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING "); | ||
79 | APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS "); | ||
80 | #undef APPEND_FLAG | ||
81 | |||
82 | return buf; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * __dma_dbg - Debug print for DMA allocs and frees. | ||
87 | * | ||
88 | * @g - The GPU. | ||
89 | * @size - The requested size of the alloc (size_t). | ||
90 | * @flags - The flags (unsigned long). | ||
91 | * @type - A string describing the type (i.e: sysmem or vidmem). | ||
92 | * @what - A string with 'alloc' or 'free'. | ||
93 | * | ||
94 | * @flags is the DMA flags. If there are none or it doesn't make sense to print | ||
95 | * flags just pass 0. | ||
96 | * | ||
97 | * Please use dma_dbg_alloc() and dma_dbg_free() instead of this function. | ||
98 | */ | ||
99 | static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags, | ||
100 | const char *type, const char *what) | ||
101 | { | ||
102 | char *flags_str = NULL; | ||
103 | |||
104 | /* | ||
105 | * Don't bother making the flags_str if debugging is | ||
106 | * not enabled. This saves a malloc and a free. | ||
107 | */ | ||
108 | if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma)) | ||
109 | return; | ||
110 | |||
111 | flags_str = nvgpu_dma_flags_to_str(g, flags); | ||
112 | |||
113 | __nvgpu_log_dbg(g, gpu_dbg_dma, | ||
114 | __func__, __LINE__, | ||
115 | "DMA %s: [%s] size=%-7zu " | ||
116 | "aligned=%-7zu total=%-10llukB %s", | ||
117 | what, type, | ||
118 | size, PAGE_ALIGN(size), | ||
119 | g->dma_memory_used >> 10, | ||
120 | flags_str); | ||
121 | |||
122 | if (flags_str) | ||
123 | nvgpu_kfree(g, flags_str); | ||
124 | } | ||
125 | |||
126 | #define dma_dbg_alloc(g, size, flags, type) \ | ||
127 | __dma_dbg(g, size, flags, type, "alloc") | ||
128 | #define dma_dbg_free(g, size, flags, type) \ | ||
129 | __dma_dbg(g, size, flags, type, "free") | ||
130 | |||
131 | /* | ||
132 | * For after the DMA alloc is done. | ||
133 | */ | ||
134 | #define __dma_dbg_done(g, size, type, what) \ | ||
135 | nvgpu_log(g, gpu_dbg_dma, \ | ||
136 | "DMA %s: [%s] size=%-7zu Done!", \ | ||
137 | what, type, size); \ | ||
138 | |||
139 | #define dma_dbg_alloc_done(g, size, type) \ | ||
140 | __dma_dbg_done(g, size, type, "alloc") | ||
141 | #define dma_dbg_free_done(g, size, type) \ | ||
142 | __dma_dbg_done(g, size, type, "free") | ||
143 | |||
144 | #if defined(CONFIG_GK20A_VIDMEM) | ||
145 | static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, | ||
146 | size_t size) | ||
147 | { | ||
148 | u64 addr = 0; | ||
149 | |||
150 | if (at) | ||
151 | addr = nvgpu_alloc_fixed(allocator, at, size, 0); | ||
152 | else | ||
153 | addr = nvgpu_alloc(allocator, size); | ||
154 | |||
155 | return addr; | ||
156 | } | ||
157 | #endif | ||
158 | |||
159 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) | ||
160 | static void nvgpu_dma_flags_to_attrs(unsigned long *attrs, | ||
161 | unsigned long flags) | ||
162 | #define ATTR_ARG(x) *x | ||
163 | #else | ||
164 | static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs, | ||
165 | unsigned long flags) | ||
166 | #define ATTR_ARG(x) x | ||
167 | #endif | ||
168 | { | ||
169 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
170 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs)); | ||
171 | if (flags & NVGPU_DMA_FORCE_CONTIGUOUS) | ||
172 | dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs)); | ||
173 | #undef ATTR_ARG | ||
174 | } | ||
175 | |||
176 | int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem) | ||
177 | { | ||
178 | return nvgpu_dma_alloc_flags(g, 0, size, mem); | ||
179 | } | ||
180 | |||
181 | int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size, | ||
182 | struct nvgpu_mem *mem) | ||
183 | { | ||
184 | if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { | ||
185 | /* | ||
186 | * Force the no-kernel-mapping flag on because we don't support | ||
187 | * the lack of it for vidmem - the user should not care when | ||
188 | * using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a | ||
189 | * difference, the user should use the flag explicitly anyway. | ||
190 | * | ||
191 | * Incoming flags are ignored here, since bits other than the | ||
192 | * no-kernel-mapping flag are ignored by the vidmem mapping | ||
193 | * functions anyway. | ||
194 | */ | ||
195 | int err = nvgpu_dma_alloc_flags_vid(g, | ||
196 | NVGPU_DMA_NO_KERNEL_MAPPING, | ||
197 | size, mem); | ||
198 | |||
199 | if (!err) | ||
200 | return 0; | ||
201 | /* | ||
202 | * Fall back to sysmem (which may then also fail) in case | ||
203 | * vidmem is exhausted. | ||
204 | */ | ||
205 | } | ||
206 | |||
207 | return nvgpu_dma_alloc_flags_sys(g, flags, size, mem); | ||
208 | } | ||
209 | |||
210 | int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem) | ||
211 | { | ||
212 | return nvgpu_dma_alloc_flags_sys(g, 0, size, mem); | ||
213 | } | ||
214 | |||
215 | int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | ||
216 | size_t size, struct nvgpu_mem *mem) | ||
217 | { | ||
218 | struct device *d = dev_from_gk20a(g); | ||
219 | int err; | ||
220 | dma_addr_t iova; | ||
221 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
222 | void *alloc_ret; | ||
223 | |||
224 | if (nvgpu_mem_is_valid(mem)) { | ||
225 | nvgpu_warn(g, "memory leak !!"); | ||
226 | WARN_ON(1); | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
231 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
232 | * | ||
233 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
234 | * and then make a vmap() ourselves. | ||
235 | */ | ||
236 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
237 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
238 | |||
239 | /* | ||
240 | * Before the debug print so we see this in the total. But during | ||
241 | * cleanup in the fail path this has to be subtracted. | ||
242 | */ | ||
243 | g->dma_memory_used += PAGE_ALIGN(size); | ||
244 | |||
245 | dma_dbg_alloc(g, size, flags, "sysmem"); | ||
246 | |||
247 | /* | ||
248 | * Save the old size but for actual allocation purposes the size is | ||
249 | * going to be page aligned. | ||
250 | */ | ||
251 | mem->size = size; | ||
252 | size = PAGE_ALIGN(size); | ||
253 | |||
254 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
255 | |||
256 | alloc_ret = dma_alloc_attrs(d, size, &iova, | ||
257 | GFP_KERNEL|__GFP_ZERO, | ||
258 | NVGPU_DMA_ATTR(dma_attrs)); | ||
259 | if (!alloc_ret) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
263 | mem->priv.pages = alloc_ret; | ||
264 | err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, | ||
265 | mem->priv.pages, | ||
266 | iova, size); | ||
267 | } else { | ||
268 | mem->cpu_va = alloc_ret; | ||
269 | err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va, | ||
270 | iova, size, flags); | ||
271 | } | ||
272 | if (err) | ||
273 | goto fail_free_dma; | ||
274 | |||
275 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
276 | mem->cpu_va = vmap(mem->priv.pages, | ||
277 | size >> PAGE_SHIFT, | ||
278 | 0, PAGE_KERNEL); | ||
279 | if (!mem->cpu_va) { | ||
280 | err = -ENOMEM; | ||
281 | goto fail_free_sgt; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | mem->aligned_size = size; | ||
286 | mem->aperture = APERTURE_SYSMEM; | ||
287 | mem->priv.flags = flags; | ||
288 | |||
289 | dma_dbg_alloc_done(g, mem->size, "sysmem"); | ||
290 | |||
291 | return 0; | ||
292 | |||
293 | fail_free_sgt: | ||
294 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
295 | fail_free_dma: | ||
296 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | ||
297 | mem->cpu_va = NULL; | ||
298 | mem->priv.sgt = NULL; | ||
299 | mem->size = 0; | ||
300 | g->dma_memory_used -= mem->aligned_size; | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem) | ||
305 | { | ||
306 | return nvgpu_dma_alloc_flags_vid(g, | ||
307 | NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); | ||
308 | } | ||
309 | |||
310 | int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags, | ||
311 | size_t size, struct nvgpu_mem *mem) | ||
312 | { | ||
313 | return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0); | ||
314 | } | ||
315 | |||
316 | int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, | ||
317 | size_t size, struct nvgpu_mem *mem, u64 at) | ||
318 | { | ||
319 | #if defined(CONFIG_GK20A_VIDMEM) | ||
320 | u64 addr; | ||
321 | int err; | ||
322 | struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? | ||
323 | &g->mm.vidmem.allocator : | ||
324 | &g->mm.vidmem.bootstrap_allocator; | ||
325 | int before_pending; | ||
326 | |||
327 | if (nvgpu_mem_is_valid(mem)) { | ||
328 | nvgpu_warn(g, "memory leak !!"); | ||
329 | WARN_ON(1); | ||
330 | } | ||
331 | |||
332 | dma_dbg_alloc(g, size, flags, "vidmem"); | ||
333 | |||
334 | mem->size = size; | ||
335 | size = PAGE_ALIGN(size); | ||
336 | |||
337 | if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) | ||
338 | return -ENOSYS; | ||
339 | |||
340 | /* | ||
341 | * Our own allocator doesn't have any flags yet, and we can't | ||
342 | * kernel-map these, so require explicit flags. | ||
343 | */ | ||
344 | WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
345 | |||
346 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | ||
347 | before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var); | ||
348 | addr = __nvgpu_dma_alloc(vidmem_alloc, at, size); | ||
349 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | ||
350 | if (!addr) { | ||
351 | /* | ||
352 | * If memory is known to be freed soon, let the user know that | ||
353 | * it may be available after a while. | ||
354 | */ | ||
355 | if (before_pending) | ||
356 | return -EAGAIN; | ||
357 | else | ||
358 | return -ENOMEM; | ||
359 | } | ||
360 | |||
361 | if (at) | ||
362 | mem->mem_flags |= NVGPU_MEM_FLAG_FIXED; | ||
363 | |||
364 | mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
365 | if (!mem->priv.sgt) { | ||
366 | err = -ENOMEM; | ||
367 | goto fail_physfree; | ||
368 | } | ||
369 | |||
370 | err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); | ||
371 | if (err) | ||
372 | goto fail_kfree; | ||
373 | |||
374 | nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr); | ||
375 | sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); | ||
376 | |||
377 | mem->aligned_size = size; | ||
378 | mem->aperture = APERTURE_VIDMEM; | ||
379 | mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
380 | mem->allocator = vidmem_alloc; | ||
381 | mem->priv.flags = flags; | ||
382 | |||
383 | nvgpu_init_list_node(&mem->clear_list_entry); | ||
384 | |||
385 | dma_dbg_alloc_done(g, mem->size, "vidmem"); | ||
386 | |||
387 | return 0; | ||
388 | |||
389 | fail_kfree: | ||
390 | nvgpu_kfree(g, mem->priv.sgt); | ||
391 | fail_physfree: | ||
392 | nvgpu_free(&g->mm.vidmem.allocator, addr); | ||
393 | mem->size = 0; | ||
394 | return err; | ||
395 | #else | ||
396 | return -ENOSYS; | ||
397 | #endif | ||
398 | } | ||
399 | |||
400 | int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size, | ||
401 | struct nvgpu_mem *mem) | ||
402 | { | ||
403 | return nvgpu_dma_alloc_map_flags(vm, 0, size, mem); | ||
404 | } | ||
405 | |||
406 | int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags, | ||
407 | size_t size, struct nvgpu_mem *mem) | ||
408 | { | ||
409 | if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) { | ||
410 | /* | ||
411 | * Force the no-kernel-mapping flag on because we don't support | ||
412 | * the lack of it for vidmem - the user should not care when | ||
413 | * using nvgpu_dma_alloc_map and it's vidmem, or if there's a | ||
414 | * difference, the user should use the flag explicitly anyway. | ||
415 | */ | ||
416 | int err = nvgpu_dma_alloc_map_flags_vid(vm, | ||
417 | flags | NVGPU_DMA_NO_KERNEL_MAPPING, | ||
418 | size, mem); | ||
419 | |||
420 | if (!err) | ||
421 | return 0; | ||
422 | /* | ||
423 | * Fall back to sysmem (which may then also fail) in case | ||
424 | * vidmem is exhausted. | ||
425 | */ | ||
426 | } | ||
427 | |||
428 | return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem); | ||
429 | } | ||
430 | |||
431 | int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size, | ||
432 | struct nvgpu_mem *mem) | ||
433 | { | ||
434 | return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem); | ||
435 | } | ||
436 | |||
437 | int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, | ||
438 | size_t size, struct nvgpu_mem *mem) | ||
439 | { | ||
440 | int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem); | ||
441 | |||
442 | if (err) | ||
443 | return err; | ||
444 | |||
445 | mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, | ||
446 | gk20a_mem_flag_none, false, | ||
447 | mem->aperture); | ||
448 | if (!mem->gpu_va) { | ||
449 | err = -ENOMEM; | ||
450 | goto fail_free; | ||
451 | } | ||
452 | |||
453 | return 0; | ||
454 | |||
455 | fail_free: | ||
456 | nvgpu_dma_free(vm->mm->g, mem); | ||
457 | return err; | ||
458 | } | ||
459 | |||
460 | int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size, | ||
461 | struct nvgpu_mem *mem) | ||
462 | { | ||
463 | return nvgpu_dma_alloc_map_flags_vid(vm, | ||
464 | NVGPU_DMA_NO_KERNEL_MAPPING, size, mem); | ||
465 | } | ||
466 | |||
467 | int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, | ||
468 | size_t size, struct nvgpu_mem *mem) | ||
469 | { | ||
470 | int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem); | ||
471 | |||
472 | if (err) | ||
473 | return err; | ||
474 | |||
475 | mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0, | ||
476 | gk20a_mem_flag_none, false, | ||
477 | mem->aperture); | ||
478 | if (!mem->gpu_va) { | ||
479 | err = -ENOMEM; | ||
480 | goto fail_free; | ||
481 | } | ||
482 | |||
483 | return 0; | ||
484 | |||
485 | fail_free: | ||
486 | nvgpu_dma_free(vm->mm->g, mem); | ||
487 | return err; | ||
488 | } | ||
489 | |||
490 | static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | ||
491 | { | ||
492 | struct device *d = dev_from_gk20a(g); | ||
493 | |||
494 | g->dma_memory_used -= mem->aligned_size; | ||
495 | |||
496 | dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem"); | ||
497 | |||
498 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | ||
499 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | ||
500 | (mem->cpu_va || mem->priv.pages)) { | ||
501 | /* | ||
502 | * Free side of WAR for bug 2040115. | ||
503 | */ | ||
504 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
505 | vunmap(mem->cpu_va); | ||
506 | |||
507 | if (mem->priv.flags) { | ||
508 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
509 | |||
510 | nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); | ||
511 | |||
512 | if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { | ||
513 | dma_free_attrs(d, mem->aligned_size, mem->priv.pages, | ||
514 | sg_dma_address(mem->priv.sgt->sgl), | ||
515 | NVGPU_DMA_ATTR(dma_attrs)); | ||
516 | } else { | ||
517 | dma_free_attrs(d, mem->aligned_size, mem->cpu_va, | ||
518 | sg_dma_address(mem->priv.sgt->sgl), | ||
519 | NVGPU_DMA_ATTR(dma_attrs)); | ||
520 | } | ||
521 | } else { | ||
522 | dma_free_coherent(d, mem->aligned_size, mem->cpu_va, | ||
523 | sg_dma_address(mem->priv.sgt->sgl)); | ||
524 | } | ||
525 | mem->cpu_va = NULL; | ||
526 | mem->priv.pages = NULL; | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * When this flag is set we expect that pages is still populated but not | ||
531 | * by the DMA API. | ||
532 | */ | ||
533 | if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) | ||
534 | nvgpu_kfree(g, mem->priv.pages); | ||
535 | |||
536 | if (mem->priv.sgt) | ||
537 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
538 | |||
539 | dma_dbg_free_done(g, mem->size, "sysmem"); | ||
540 | |||
541 | mem->size = 0; | ||
542 | mem->aligned_size = 0; | ||
543 | mem->aperture = APERTURE_INVALID; | ||
544 | } | ||
545 | |||
546 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | ||
547 | { | ||
548 | #if defined(CONFIG_GK20A_VIDMEM) | ||
549 | size_t mem_size = mem->size; | ||
550 | |||
551 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | ||
552 | |||
553 | /* Sanity check - only this supported when allocating. */ | ||
554 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | ||
555 | |||
556 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | ||
557 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); | ||
558 | |||
559 | /* | ||
560 | * If there's an error here then that means we can't clear the | ||
561 | * vidmem. That's too bad; however, we still own the nvgpu_mem | ||
562 | * buf so we have to free that. | ||
563 | * | ||
564 | * We don't need to worry about the vidmem allocator itself | ||
565 | * since when that gets cleaned up in the driver shutdown path | ||
566 | * all the outstanding allocs are force freed. | ||
567 | */ | ||
568 | if (err) | ||
569 | nvgpu_kfree(g, mem); | ||
570 | } else { | ||
571 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | ||
572 | nvgpu_free(mem->allocator, | ||
573 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
574 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
575 | |||
576 | mem->size = 0; | ||
577 | mem->aligned_size = 0; | ||
578 | mem->aperture = APERTURE_INVALID; | ||
579 | } | ||
580 | |||
581 | dma_dbg_free_done(g, mem_size, "vidmem"); | ||
582 | #endif | ||
583 | } | ||
584 | |||
585 | void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem) | ||
586 | { | ||
587 | switch (mem->aperture) { | ||
588 | case APERTURE_SYSMEM: | ||
589 | return nvgpu_dma_free_sys(g, mem); | ||
590 | case APERTURE_VIDMEM: | ||
591 | return nvgpu_dma_free_vid(g, mem); | ||
592 | default: | ||
593 | break; /* like free() on "null" memory */ | ||
594 | } | ||
595 | } | ||
596 | |||
597 | void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem) | ||
598 | { | ||
599 | if (mem->gpu_va) | ||
600 | nvgpu_gmmu_unmap(vm, mem, mem->gpu_va); | ||
601 | mem->gpu_va = 0; | ||
602 | |||
603 | nvgpu_dma_free(vm->mm->g, mem); | ||
604 | } | ||
605 | |||
606 | int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt, | ||
607 | void *cpuva, u64 iova, size_t size, unsigned long flags) | ||
608 | { | ||
609 | int err = 0; | ||
610 | struct sg_table *tbl; | ||
611 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | ||
612 | |||
613 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
614 | if (!tbl) { | ||
615 | err = -ENOMEM; | ||
616 | goto fail; | ||
617 | } | ||
618 | |||
619 | nvgpu_dma_flags_to_attrs(&dma_attrs, flags); | ||
620 | err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova, | ||
621 | size, NVGPU_DMA_ATTR(dma_attrs)); | ||
622 | if (err) | ||
623 | goto fail; | ||
624 | |||
625 | sg_dma_address(tbl->sgl) = iova; | ||
626 | *sgt = tbl; | ||
627 | |||
628 | return 0; | ||
629 | |||
630 | fail: | ||
631 | if (tbl) | ||
632 | nvgpu_kfree(g, tbl); | ||
633 | |||
634 | return err; | ||
635 | } | ||
636 | |||
637 | int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt, | ||
638 | void *cpuva, u64 iova, size_t size) | ||
639 | { | ||
640 | return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0); | ||
641 | } | ||
642 | |||
643 | int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt, | ||
644 | struct page **pages, u64 iova, size_t size) | ||
645 | { | ||
646 | int err = 0; | ||
647 | struct sg_table *tbl; | ||
648 | |||
649 | tbl = nvgpu_kzalloc(g, sizeof(struct sg_table)); | ||
650 | if (!tbl) { | ||
651 | err = -ENOMEM; | ||
652 | goto fail; | ||
653 | } | ||
654 | |||
655 | err = sg_alloc_table_from_pages(tbl, pages, | ||
656 | DIV_ROUND_UP(size, PAGE_SIZE), | ||
657 | 0, size, GFP_KERNEL); | ||
658 | if (err) | ||
659 | goto fail; | ||
660 | |||
661 | sg_dma_address(tbl->sgl) = iova; | ||
662 | *sgt = tbl; | ||
663 | |||
664 | return 0; | ||
665 | |||
666 | fail: | ||
667 | if (tbl) | ||
668 | nvgpu_kfree(g, tbl); | ||
669 | |||
670 | return err; | ||
671 | } | ||
672 | |||
673 | void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt) | ||
674 | { | ||
675 | sg_free_table(*sgt); | ||
676 | nvgpu_kfree(g, *sgt); | ||
677 | *sgt = NULL; | ||
678 | } | ||
679 | |||
680 | bool nvgpu_iommuable(struct gk20a *g) | ||
681 | { | ||
682 | #ifdef CONFIG_TEGRA_GK20A | ||
683 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
684 | |||
685 | /* | ||
686 | * Check against the nvgpu device to see if it's been marked as | ||
687 | * IOMMU'able. | ||
688 | */ | ||
689 | if (!device_is_iommuable(l->dev)) | ||
690 | return false; | ||
691 | #endif | ||
692 | |||
693 | return true; | ||
694 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c new file mode 100644 index 00000000..129739f0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c | |||
@@ -0,0 +1,218 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <linux/dma-buf.h> | ||
19 | #include <linux/scatterlist.h> | ||
20 | |||
21 | #include <nvgpu/comptags.h> | ||
22 | #include <nvgpu/enabled.h> | ||
23 | |||
24 | #include <nvgpu/linux/vm.h> | ||
25 | #include <nvgpu/linux/vidmem.h> | ||
26 | |||
27 | #include "gk20a/gk20a.h" | ||
28 | |||
29 | #include "platform_gk20a.h" | ||
30 | #include "dmabuf.h" | ||
31 | #include "os_linux.h" | ||
32 | |||
33 | static void gk20a_mm_delete_priv(void *_priv) | ||
34 | { | ||
35 | struct gk20a_buffer_state *s, *s_tmp; | ||
36 | struct gk20a_dmabuf_priv *priv = _priv; | ||
37 | struct gk20a *g; | ||
38 | |||
39 | if (!priv) | ||
40 | return; | ||
41 | |||
42 | g = priv->g; | ||
43 | |||
44 | if (priv->comptags.allocated && priv->comptags.lines) { | ||
45 | BUG_ON(!priv->comptag_allocator); | ||
46 | gk20a_comptaglines_free(priv->comptag_allocator, | ||
47 | priv->comptags.offset, | ||
48 | priv->comptags.lines); | ||
49 | } | ||
50 | |||
51 | /* Free buffer states */ | ||
52 | nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states, | ||
53 | gk20a_buffer_state, list) { | ||
54 | gk20a_fence_put(s->fence); | ||
55 | nvgpu_list_del(&s->list); | ||
56 | nvgpu_kfree(g, s); | ||
57 | } | ||
58 | |||
59 | nvgpu_kfree(g, priv); | ||
60 | } | ||
61 | |||
62 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | ||
63 | struct dma_buf *dmabuf) | ||
64 | { | ||
65 | struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf); | ||
66 | bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY); | ||
67 | |||
68 | if (buf_owner == NULL) { | ||
69 | /* Not nvgpu-allocated, assume system memory */ | ||
70 | return APERTURE_SYSMEM; | ||
71 | } else if (WARN_ON(buf_owner == g && unified_memory)) { | ||
72 | /* Looks like our video memory, but this gpu doesn't support | ||
73 | * it. Warn about a bug and bail out */ | ||
74 | nvgpu_warn(g, | ||
75 | "dmabuf is our vidmem but we don't have local vidmem"); | ||
76 | return APERTURE_INVALID; | ||
77 | } else if (buf_owner != g) { | ||
78 | /* Someone else's vidmem */ | ||
79 | return APERTURE_INVALID; | ||
80 | } else { | ||
81 | /* Yay, buf_owner == g */ | ||
82 | return APERTURE_VIDMEM; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
87 | struct dma_buf_attachment **attachment) | ||
88 | { | ||
89 | struct gk20a_dmabuf_priv *priv; | ||
90 | |||
91 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
92 | if (WARN_ON(!priv)) | ||
93 | return ERR_PTR(-EINVAL); | ||
94 | |||
95 | nvgpu_mutex_acquire(&priv->lock); | ||
96 | |||
97 | if (priv->pin_count == 0) { | ||
98 | priv->attach = dma_buf_attach(dmabuf, dev); | ||
99 | if (IS_ERR(priv->attach)) { | ||
100 | nvgpu_mutex_release(&priv->lock); | ||
101 | return (struct sg_table *)priv->attach; | ||
102 | } | ||
103 | |||
104 | priv->sgt = dma_buf_map_attachment(priv->attach, | ||
105 | DMA_BIDIRECTIONAL); | ||
106 | if (IS_ERR(priv->sgt)) { | ||
107 | dma_buf_detach(dmabuf, priv->attach); | ||
108 | nvgpu_mutex_release(&priv->lock); | ||
109 | return priv->sgt; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | priv->pin_count++; | ||
114 | nvgpu_mutex_release(&priv->lock); | ||
115 | *attachment = priv->attach; | ||
116 | return priv->sgt; | ||
117 | } | ||
118 | |||
119 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
120 | struct dma_buf_attachment *attachment, | ||
121 | struct sg_table *sgt) | ||
122 | { | ||
123 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | ||
124 | dma_addr_t dma_addr; | ||
125 | |||
126 | if (IS_ERR(priv) || !priv) | ||
127 | return; | ||
128 | |||
129 | nvgpu_mutex_acquire(&priv->lock); | ||
130 | WARN_ON(priv->sgt != sgt); | ||
131 | WARN_ON(priv->attach != attachment); | ||
132 | priv->pin_count--; | ||
133 | WARN_ON(priv->pin_count < 0); | ||
134 | dma_addr = sg_dma_address(priv->sgt->sgl); | ||
135 | if (priv->pin_count == 0) { | ||
136 | dma_buf_unmap_attachment(priv->attach, priv->sgt, | ||
137 | DMA_BIDIRECTIONAL); | ||
138 | dma_buf_detach(dmabuf, priv->attach); | ||
139 | } | ||
140 | nvgpu_mutex_release(&priv->lock); | ||
141 | } | ||
142 | |||
143 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev) | ||
144 | { | ||
145 | struct gk20a *g = gk20a_get_platform(dev)->g; | ||
146 | struct gk20a_dmabuf_priv *priv; | ||
147 | |||
148 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
149 | if (likely(priv)) | ||
150 | return 0; | ||
151 | |||
152 | nvgpu_mutex_acquire(&g->mm.priv_lock); | ||
153 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
154 | if (priv) | ||
155 | goto priv_exist_or_err; | ||
156 | |||
157 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
158 | if (!priv) { | ||
159 | priv = ERR_PTR(-ENOMEM); | ||
160 | goto priv_exist_or_err; | ||
161 | } | ||
162 | |||
163 | nvgpu_mutex_init(&priv->lock); | ||
164 | nvgpu_init_list_node(&priv->states); | ||
165 | priv->g = g; | ||
166 | dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv); | ||
167 | |||
168 | priv_exist_or_err: | ||
169 | nvgpu_mutex_release(&g->mm.priv_lock); | ||
170 | if (IS_ERR(priv)) | ||
171 | return -ENOMEM; | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
177 | u64 offset, struct gk20a_buffer_state **state) | ||
178 | { | ||
179 | int err = 0; | ||
180 | struct gk20a_dmabuf_priv *priv; | ||
181 | struct gk20a_buffer_state *s; | ||
182 | struct device *dev = dev_from_gk20a(g); | ||
183 | |||
184 | if (WARN_ON(offset >= (u64)dmabuf->size)) | ||
185 | return -EINVAL; | ||
186 | |||
187 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev); | ||
188 | if (err) | ||
189 | return err; | ||
190 | |||
191 | priv = dma_buf_get_drvdata(dmabuf, dev); | ||
192 | if (WARN_ON(!priv)) | ||
193 | return -ENOSYS; | ||
194 | |||
195 | nvgpu_mutex_acquire(&priv->lock); | ||
196 | |||
197 | nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list) | ||
198 | if (s->offset == offset) | ||
199 | goto out; | ||
200 | |||
201 | /* State not found, create state. */ | ||
202 | s = nvgpu_kzalloc(g, sizeof(*s)); | ||
203 | if (!s) { | ||
204 | err = -ENOMEM; | ||
205 | goto out; | ||
206 | } | ||
207 | |||
208 | s->offset = offset; | ||
209 | nvgpu_init_list_node(&s->list); | ||
210 | nvgpu_mutex_init(&s->lock); | ||
211 | nvgpu_list_add_tail(&s->list, &priv->states); | ||
212 | |||
213 | out: | ||
214 | nvgpu_mutex_release(&priv->lock); | ||
215 | if (!err) | ||
216 | *state = s; | ||
217 | return err; | ||
218 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.h b/drivers/gpu/nvgpu/os/linux/dmabuf.h new file mode 100644 index 00000000..8399eaaf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dmabuf.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __COMMON_LINUX_DMABUF_H__ | ||
18 | #define __COMMON_LINUX_DMABUF_H__ | ||
19 | |||
20 | #include <nvgpu/comptags.h> | ||
21 | #include <nvgpu/list.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/gmmu.h> | ||
24 | |||
25 | struct sg_table; | ||
26 | struct dma_buf; | ||
27 | struct dma_buf_attachment; | ||
28 | struct device; | ||
29 | |||
30 | struct gk20a; | ||
31 | struct gk20a_buffer_state; | ||
32 | |||
33 | struct gk20a_dmabuf_priv { | ||
34 | struct nvgpu_mutex lock; | ||
35 | |||
36 | struct gk20a *g; | ||
37 | |||
38 | struct gk20a_comptag_allocator *comptag_allocator; | ||
39 | struct gk20a_comptags comptags; | ||
40 | |||
41 | struct dma_buf_attachment *attach; | ||
42 | struct sg_table *sgt; | ||
43 | |||
44 | int pin_count; | ||
45 | |||
46 | struct nvgpu_list_node states; | ||
47 | |||
48 | u64 buffer_id; | ||
49 | }; | ||
50 | |||
51 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf, | ||
52 | struct dma_buf_attachment **attachment); | ||
53 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | ||
54 | struct dma_buf_attachment *attachment, | ||
55 | struct sg_table *sgt); | ||
56 | |||
57 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | ||
58 | |||
59 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | ||
60 | u64 offset, struct gk20a_buffer_state **state); | ||
61 | |||
62 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c new file mode 100644 index 00000000..8f33c5d2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/reboot.h> | ||
18 | #include <linux/dma-mapping.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <uapi/linux/nvgpu.h> | ||
21 | |||
22 | #include <nvgpu/defaults.h> | ||
23 | #include <nvgpu/kmem.h> | ||
24 | #include <nvgpu/nvgpu_common.h> | ||
25 | #include <nvgpu/soc.h> | ||
26 | #include <nvgpu/bug.h> | ||
27 | #include <nvgpu/enabled.h> | ||
28 | #include <nvgpu/debug.h> | ||
29 | #include <nvgpu/sizes.h> | ||
30 | |||
31 | #include "gk20a/gk20a.h" | ||
32 | #include "platform_gk20a.h" | ||
33 | #include "module.h" | ||
34 | #include "os_linux.h" | ||
35 | #include "sysfs.h" | ||
36 | #include "ioctl.h" | ||
37 | #include "gk20a/regops_gk20a.h" | ||
38 | |||
39 | #define EMC3D_DEFAULT_RATIO 750 | ||
40 | |||
41 | void nvgpu_kernel_restart(void *cmd) | ||
42 | { | ||
43 | kernel_restart(cmd); | ||
44 | } | ||
45 | |||
46 | static void nvgpu_init_vars(struct gk20a *g) | ||
47 | { | ||
48 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
49 | struct device *dev = dev_from_gk20a(g); | ||
50 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
51 | |||
52 | nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq); | ||
53 | nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq); | ||
54 | |||
55 | init_rwsem(&l->busy_lock); | ||
56 | nvgpu_rwsem_init(&g->deterministic_busy); | ||
57 | |||
58 | nvgpu_spinlock_init(&g->mc_enable_lock); | ||
59 | |||
60 | nvgpu_mutex_init(&platform->railgate_lock); | ||
61 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
62 | nvgpu_mutex_init(&g->client_lock); | ||
63 | nvgpu_mutex_init(&g->poweron_lock); | ||
64 | nvgpu_mutex_init(&g->poweroff_lock); | ||
65 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
66 | |||
67 | l->regs_saved = l->regs; | ||
68 | l->bar1_saved = l->bar1; | ||
69 | |||
70 | g->emc3d_ratio = EMC3D_DEFAULT_RATIO; | ||
71 | |||
72 | /* Set DMA parameters to allow larger sgt lists */ | ||
73 | dev->dma_parms = &l->dma_parms; | ||
74 | dma_set_max_seg_size(dev, UINT_MAX); | ||
75 | |||
76 | /* | ||
77 | * A default of 16GB is the largest supported DMA size that is | ||
78 | * acceptable to all currently supported Tegra SoCs. | ||
79 | */ | ||
80 | if (!platform->dma_mask) | ||
81 | platform->dma_mask = DMA_BIT_MASK(34); | ||
82 | |||
83 | dma_set_mask(dev, platform->dma_mask); | ||
84 | dma_set_coherent_mask(dev, platform->dma_mask); | ||
85 | |||
86 | nvgpu_init_list_node(&g->profiler_objects); | ||
87 | |||
88 | nvgpu_init_list_node(&g->boardobj_head); | ||
89 | nvgpu_init_list_node(&g->boardobjgrp_head); | ||
90 | } | ||
91 | |||
92 | static void nvgpu_init_gr_vars(struct gk20a *g) | ||
93 | { | ||
94 | gk20a_init_gr(g); | ||
95 | |||
96 | nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); | ||
97 | g->gr.max_comptag_mem = totalram_pages | ||
98 | >> (10 - (PAGE_SHIFT - 10)); | ||
99 | } | ||
100 | |||
101 | static void nvgpu_init_timeout(struct gk20a *g) | ||
102 | { | ||
103 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
104 | |||
105 | g->timeouts_disabled_by_user = false; | ||
106 | nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0); | ||
107 | |||
108 | if (nvgpu_platform_is_silicon(g)) { | ||
109 | g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
110 | } else if (nvgpu_platform_is_fpga(g)) { | ||
111 | g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA; | ||
112 | } else { | ||
113 | g->gr_idle_timeout_default = (u32)ULONG_MAX; | ||
114 | } | ||
115 | g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
116 | g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US; | ||
117 | } | ||
118 | |||
119 | static void nvgpu_init_timeslice(struct gk20a *g) | ||
120 | { | ||
121 | g->runlist_interleave = true; | ||
122 | |||
123 | g->timeslice_low_priority_us = 1300; | ||
124 | g->timeslice_medium_priority_us = 2600; | ||
125 | g->timeslice_high_priority_us = 5200; | ||
126 | |||
127 | g->min_timeslice_us = 1000; | ||
128 | g->max_timeslice_us = 50000; | ||
129 | } | ||
130 | |||
131 | static void nvgpu_init_pm_vars(struct gk20a *g) | ||
132 | { | ||
133 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
134 | |||
135 | /* | ||
136 | * Set up initial power settings. For non-slicon platforms, disable | ||
137 | * power features and for silicon platforms, read from platform data | ||
138 | */ | ||
139 | g->slcg_enabled = | ||
140 | nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false; | ||
141 | g->blcg_enabled = | ||
142 | nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false; | ||
143 | g->elcg_enabled = | ||
144 | nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false; | ||
145 | g->elpg_enabled = | ||
146 | nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false; | ||
147 | g->aelpg_enabled = | ||
148 | nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false; | ||
149 | g->mscg_enabled = | ||
150 | nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false; | ||
151 | g->can_elpg = | ||
152 | nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false; | ||
153 | |||
154 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG, | ||
155 | nvgpu_platform_is_silicon(g) ? platform->can_elcg : false); | ||
156 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG, | ||
157 | nvgpu_platform_is_silicon(g) ? platform->can_slcg : false); | ||
158 | __nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG, | ||
159 | nvgpu_platform_is_silicon(g) ? platform->can_blcg : false); | ||
160 | |||
161 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
162 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
163 | g->has_syncpoints = platform->has_syncpoints; | ||
164 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
165 | g->has_cde = platform->has_cde; | ||
166 | #endif | ||
167 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
168 | g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); | ||
169 | g->can_railgate = platform->can_railgate_init; | ||
170 | g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; | ||
171 | /* if default delay is not set, set default delay to 500msec */ | ||
172 | if (platform->railgate_delay_init) | ||
173 | g->railgate_delay = platform->railgate_delay_init; | ||
174 | else | ||
175 | g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT; | ||
176 | __nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon); | ||
177 | |||
178 | /* set default values to aelpg parameters */ | ||
179 | g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US; | ||
180 | g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US; | ||
181 | g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US; | ||
182 | g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US; | ||
183 | g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT; | ||
184 | |||
185 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm); | ||
186 | } | ||
187 | |||
188 | static void nvgpu_init_vbios_vars(struct gk20a *g) | ||
189 | { | ||
190 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
191 | |||
192 | __nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos); | ||
193 | g->vbios_min_version = platform->vbios_min_version; | ||
194 | } | ||
195 | |||
196 | static void nvgpu_init_ltc_vars(struct gk20a *g) | ||
197 | { | ||
198 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
199 | |||
200 | g->ltc_streamid = platform->ltc_streamid; | ||
201 | } | ||
202 | |||
203 | static void nvgpu_init_mm_vars(struct gk20a *g) | ||
204 | { | ||
205 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | ||
206 | |||
207 | g->mm.disable_bigpage = platform->disable_bigpage; | ||
208 | __nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE, | ||
209 | platform->honors_aperture); | ||
210 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, | ||
211 | platform->unified_memory); | ||
212 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
213 | platform->unify_address_spaces); | ||
214 | |||
215 | nvgpu_mutex_init(&g->mm.tlb_lock); | ||
216 | nvgpu_mutex_init(&g->mm.priv_lock); | ||
217 | } | ||
218 | |||
219 | int nvgpu_probe(struct gk20a *g, | ||
220 | const char *debugfs_symlink, | ||
221 | const char *interface_name, | ||
222 | struct class *class) | ||
223 | { | ||
224 | struct device *dev = dev_from_gk20a(g); | ||
225 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
226 | int err = 0; | ||
227 | |||
228 | nvgpu_init_vars(g); | ||
229 | nvgpu_init_gr_vars(g); | ||
230 | nvgpu_init_timeout(g); | ||
231 | nvgpu_init_timeslice(g); | ||
232 | nvgpu_init_pm_vars(g); | ||
233 | nvgpu_init_vbios_vars(g); | ||
234 | nvgpu_init_ltc_vars(g); | ||
235 | err = nvgpu_init_soc_vars(g); | ||
236 | if (err) { | ||
237 | nvgpu_err(g, "init soc vars failed"); | ||
238 | return err; | ||
239 | } | ||
240 | |||
241 | /* Initialize the platform interface. */ | ||
242 | err = platform->probe(dev); | ||
243 | if (err) { | ||
244 | if (err == -EPROBE_DEFER) | ||
245 | nvgpu_info(g, "platform probe failed"); | ||
246 | else | ||
247 | nvgpu_err(g, "platform probe failed"); | ||
248 | return err; | ||
249 | } | ||
250 | |||
251 | nvgpu_init_mm_vars(g); | ||
252 | |||
253 | /* platform probe can defer do user init only if probe succeeds */ | ||
254 | err = gk20a_user_init(dev, interface_name, class); | ||
255 | if (err) | ||
256 | return err; | ||
257 | |||
258 | if (platform->late_probe) { | ||
259 | err = platform->late_probe(dev); | ||
260 | if (err) { | ||
261 | nvgpu_err(g, "late probe failed"); | ||
262 | return err; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | nvgpu_create_sysfs(dev); | ||
267 | gk20a_debug_init(g, debugfs_symlink); | ||
268 | |||
269 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
270 | if (!g->dbg_regops_tmp_buf) { | ||
271 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
272 | return -ENOMEM; | ||
273 | } | ||
274 | g->dbg_regops_tmp_buf_ops = | ||
275 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
276 | |||
277 | g->remove_support = gk20a_remove_support; | ||
278 | |||
279 | nvgpu_ref_init(&g->refcount); | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * cyclic_delta - Returns delta of cyclic integers a and b. | ||
286 | * | ||
287 | * @a - First integer | ||
288 | * @b - Second integer | ||
289 | * | ||
290 | * Note: if a is ahead of b, delta is positive. | ||
291 | */ | ||
292 | static int cyclic_delta(int a, int b) | ||
293 | { | ||
294 | return a - b; | ||
295 | } | ||
296 | |||
297 | /** | ||
298 | * nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete | ||
299 | * | ||
300 | * @g - The GPU to wait on. | ||
301 | * | ||
302 | * Waits until all interrupt handlers that have been scheduled to run have | ||
303 | * completed. | ||
304 | */ | ||
305 | void nvgpu_wait_for_deferred_interrupts(struct gk20a *g) | ||
306 | { | ||
307 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
308 | int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count); | ||
309 | int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count); | ||
310 | |||
311 | /* wait until all stalling irqs are handled */ | ||
312 | NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq, | ||
313 | cyclic_delta(stall_irq_threshold, | ||
314 | atomic_read(&l->sw_irq_stall_last_handled)) | ||
315 | <= 0, 0); | ||
316 | |||
317 | /* wait until all non-stalling irqs are handled */ | ||
318 | NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq, | ||
319 | cyclic_delta(nonstall_irq_threshold, | ||
320 | atomic_read(&l->sw_irq_nonstall_last_handled)) | ||
321 | <= 0, 0); | ||
322 | } | ||
323 | |||
324 | static void nvgpu_free_gk20a(struct gk20a *g) | ||
325 | { | ||
326 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
327 | |||
328 | kfree(l); | ||
329 | } | ||
330 | |||
331 | void nvgpu_init_gk20a(struct gk20a *g) | ||
332 | { | ||
333 | g->free = nvgpu_free_gk20a; | ||
334 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.h b/drivers/gpu/nvgpu/os/linux/driver_common.h new file mode 100644 index 00000000..6f42f775 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/driver_common.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_LINUX_DRIVER_COMMON | ||
18 | #define NVGPU_LINUX_DRIVER_COMMON | ||
19 | |||
20 | void nvgpu_init_gk20a(struct gk20a *g); | ||
21 | |||
22 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/dt.c b/drivers/gpu/nvgpu/os/linux/dt.c new file mode 100644 index 00000000..88e391e3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/dt.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dt.h> | ||
18 | #include <linux/of.h> | ||
19 | |||
20 | #include "os_linux.h" | ||
21 | |||
22 | int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name, | ||
23 | u32 index, u32 *value) | ||
24 | { | ||
25 | struct device *dev = dev_from_gk20a(g); | ||
26 | struct device_node *np = dev->of_node; | ||
27 | |||
28 | return of_property_read_u32_index(np, name, index, value); | ||
29 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/firmware.c b/drivers/gpu/nvgpu/os/linux/firmware.c new file mode 100644 index 00000000..9a4dc653 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/firmware.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/firmware.h> | ||
18 | |||
19 | #include <nvgpu/kmem.h> | ||
20 | #include <nvgpu/bug.h> | ||
21 | #include <nvgpu/firmware.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | #include "platform_gk20a.h" | ||
25 | #include "os_linux.h" | ||
26 | |||
27 | static const struct firmware *do_request_firmware(struct device *dev, | ||
28 | const char *prefix, const char *fw_name, int flags) | ||
29 | { | ||
30 | const struct firmware *fw; | ||
31 | char *fw_path = NULL; | ||
32 | int path_len, err; | ||
33 | |||
34 | if (prefix) { | ||
35 | path_len = strlen(prefix) + strlen(fw_name); | ||
36 | path_len += 2; /* for the path separator and zero terminator*/ | ||
37 | |||
38 | fw_path = nvgpu_kzalloc(get_gk20a(dev), | ||
39 | sizeof(*fw_path) * path_len); | ||
40 | if (!fw_path) | ||
41 | return NULL; | ||
42 | |||
43 | sprintf(fw_path, "%s/%s", prefix, fw_name); | ||
44 | fw_name = fw_path; | ||
45 | } | ||
46 | |||
47 | if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN) | ||
48 | err = request_firmware_direct(&fw, fw_name, dev); | ||
49 | else | ||
50 | err = request_firmware(&fw, fw_name, dev); | ||
51 | |||
52 | nvgpu_kfree(get_gk20a(dev), fw_path); | ||
53 | if (err) | ||
54 | return NULL; | ||
55 | return fw; | ||
56 | } | ||
57 | |||
58 | /* This is a simple wrapper around request_firmware that takes 'fw_name' and | ||
59 | * applies an IP specific relative path prefix to it. The caller is | ||
60 | * responsible for calling nvgpu_release_firmware later. */ | ||
61 | struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g, | ||
62 | const char *fw_name, | ||
63 | int flags) | ||
64 | { | ||
65 | struct device *dev = dev_from_gk20a(g); | ||
66 | struct nvgpu_firmware *fw; | ||
67 | const struct firmware *linux_fw; | ||
68 | |||
69 | /* current->fs is NULL when calling from SYS_EXIT. | ||
70 | Add a check here to prevent crash in request_firmware */ | ||
71 | if (!current->fs || !fw_name) | ||
72 | return NULL; | ||
73 | |||
74 | fw = nvgpu_kzalloc(g, sizeof(*fw)); | ||
75 | if (!fw) | ||
76 | return NULL; | ||
77 | |||
78 | linux_fw = do_request_firmware(dev, g->name, fw_name, flags); | ||
79 | |||
80 | #ifdef CONFIG_TEGRA_GK20A | ||
81 | /* TO BE REMOVED - Support loading from legacy SOC specific path. */ | ||
82 | if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) { | ||
83 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
84 | linux_fw = do_request_firmware(dev, | ||
85 | platform->soc_name, fw_name, flags); | ||
86 | } | ||
87 | #endif | ||
88 | |||
89 | if (!linux_fw) | ||
90 | goto err; | ||
91 | |||
92 | fw->data = nvgpu_kmalloc(g, linux_fw->size); | ||
93 | if (!fw->data) | ||
94 | goto err_release; | ||
95 | |||
96 | memcpy(fw->data, linux_fw->data, linux_fw->size); | ||
97 | fw->size = linux_fw->size; | ||
98 | |||
99 | release_firmware(linux_fw); | ||
100 | |||
101 | return fw; | ||
102 | |||
103 | err_release: | ||
104 | release_firmware(linux_fw); | ||
105 | err: | ||
106 | nvgpu_kfree(g, fw); | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw) | ||
111 | { | ||
112 | if(!fw) | ||
113 | return; | ||
114 | |||
115 | nvgpu_kfree(g, fw->data); | ||
116 | nvgpu_kfree(g, fw); | ||
117 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/fuse.c b/drivers/gpu/nvgpu/os/linux/fuse.c new file mode 100644 index 00000000..27851f92 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/fuse.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <soc/tegra/fuse.h> | ||
15 | |||
16 | #include <nvgpu/fuse.h> | ||
17 | |||
18 | int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g) | ||
19 | { | ||
20 | return tegra_sku_info.gpu_speedo_id; | ||
21 | } | ||
22 | |||
23 | /* | ||
24 | * Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100 | ||
25 | * Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100 | ||
26 | */ | ||
27 | void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val) | ||
28 | { | ||
29 | tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0); | ||
30 | } | ||
31 | |||
32 | void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val) | ||
33 | { | ||
34 | tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0); | ||
35 | } | ||
36 | |||
37 | void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val) | ||
38 | { | ||
39 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0); | ||
40 | } | ||
41 | |||
42 | void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val) | ||
43 | { | ||
44 | tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0); | ||
45 | } | ||
46 | |||
47 | int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val) | ||
48 | { | ||
49 | return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val); | ||
50 | } | ||
51 | |||
52 | int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val) | ||
53 | { | ||
54 | return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val); | ||
55 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/intr.c b/drivers/gpu/nvgpu/os/linux/intr.c new file mode 100644 index 00000000..7ffc7e87 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/intr.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <trace/events/gk20a.h> | ||
15 | #include <linux/irqreturn.h> | ||
16 | |||
17 | #include "gk20a/gk20a.h" | ||
18 | #include "gk20a/mc_gk20a.h" | ||
19 | |||
20 | #include <nvgpu/atomic.h> | ||
21 | #include <nvgpu/unit.h> | ||
22 | #include "os_linux.h" | ||
23 | |||
24 | irqreturn_t nvgpu_intr_stall(struct gk20a *g) | ||
25 | { | ||
26 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
27 | u32 mc_intr_0; | ||
28 | |||
29 | trace_mc_gk20a_intr_stall(g->name); | ||
30 | |||
31 | if (!g->power_on) | ||
32 | return IRQ_NONE; | ||
33 | |||
34 | /* not from gpu when sharing irq with others */ | ||
35 | mc_intr_0 = g->ops.mc.intr_stall(g); | ||
36 | if (unlikely(!mc_intr_0)) | ||
37 | return IRQ_NONE; | ||
38 | |||
39 | g->ops.mc.intr_stall_pause(g); | ||
40 | |||
41 | atomic_inc(&l->hw_irq_stall_count); | ||
42 | |||
43 | trace_mc_gk20a_intr_stall_done(g->name); | ||
44 | |||
45 | return IRQ_WAKE_THREAD; | ||
46 | } | ||
47 | |||
48 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g) | ||
49 | { | ||
50 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
51 | int hw_irq_count; | ||
52 | |||
53 | nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched"); | ||
54 | |||
55 | trace_mc_gk20a_intr_thread_stall(g->name); | ||
56 | |||
57 | hw_irq_count = atomic_read(&l->hw_irq_stall_count); | ||
58 | g->ops.mc.isr_stall(g); | ||
59 | g->ops.mc.intr_stall_resume(g); | ||
60 | /* sync handled irq counter before re-enabling interrupts */ | ||
61 | atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count); | ||
62 | |||
63 | nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq); | ||
64 | |||
65 | trace_mc_gk20a_intr_thread_stall_done(g->name); | ||
66 | |||
67 | return IRQ_HANDLED; | ||
68 | } | ||
69 | |||
70 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g) | ||
71 | { | ||
72 | u32 non_stall_intr_val; | ||
73 | u32 hw_irq_count; | ||
74 | int ops_old, ops_new, ops = 0; | ||
75 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
76 | |||
77 | if (!g->power_on) | ||
78 | return IRQ_NONE; | ||
79 | |||
80 | /* not from gpu when sharing irq with others */ | ||
81 | non_stall_intr_val = g->ops.mc.intr_nonstall(g); | ||
82 | if (unlikely(!non_stall_intr_val)) | ||
83 | return IRQ_NONE; | ||
84 | |||
85 | g->ops.mc.intr_nonstall_pause(g); | ||
86 | |||
87 | ops = g->ops.mc.isr_nonstall(g); | ||
88 | if (ops) { | ||
89 | do { | ||
90 | ops_old = atomic_read(&l->nonstall_ops); | ||
91 | ops_new = ops_old | ops; | ||
92 | } while (ops_old != atomic_cmpxchg(&l->nonstall_ops, | ||
93 | ops_old, ops_new)); | ||
94 | |||
95 | queue_work(l->nonstall_work_queue, &l->nonstall_fn_work); | ||
96 | } | ||
97 | |||
98 | hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count); | ||
99 | |||
100 | /* sync handled irq counter before re-enabling interrupts */ | ||
101 | atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count); | ||
102 | |||
103 | g->ops.mc.intr_nonstall_resume(g); | ||
104 | |||
105 | nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq); | ||
106 | |||
107 | return IRQ_HANDLED; | ||
108 | } | ||
109 | |||
110 | void nvgpu_intr_nonstall_cb(struct work_struct *work) | ||
111 | { | ||
112 | struct nvgpu_os_linux *l = | ||
113 | container_of(work, struct nvgpu_os_linux, nonstall_fn_work); | ||
114 | struct gk20a *g = &l->g; | ||
115 | |||
116 | do { | ||
117 | u32 ops; | ||
118 | |||
119 | ops = atomic_xchg(&l->nonstall_ops, 0); | ||
120 | mc_gk20a_handle_intr_nonstall(g, ops); | ||
121 | } while (atomic_read(&l->nonstall_ops) != 0); | ||
122 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/intr.h b/drivers/gpu/nvgpu/os/linux/intr.h new file mode 100644 index 00000000..d43cdccb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/intr.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef __NVGPU_LINUX_INTR_H__ | ||
15 | #define __NVGPU_LINUX_INTR_H__ | ||
16 | struct gk20a; | ||
17 | |||
18 | irqreturn_t nvgpu_intr_stall(struct gk20a *g); | ||
19 | irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g); | ||
20 | irqreturn_t nvgpu_intr_nonstall(struct gk20a *g); | ||
21 | void nvgpu_intr_nonstall_cb(struct work_struct *work); | ||
22 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/io.c b/drivers/gpu/nvgpu/os/linux/io.c new file mode 100644 index 00000000..c06512a5 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/io.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/io.h> | ||
15 | #include <nvgpu/types.h> | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | #include "gk20a/gk20a.h" | ||
19 | |||
20 | void nvgpu_writel(struct gk20a *g, u32 r, u32 v) | ||
21 | { | ||
22 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
23 | |||
24 | if (unlikely(!l->regs)) { | ||
25 | __gk20a_warn_on_no_regs(); | ||
26 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
27 | } else { | ||
28 | writel_relaxed(v, l->regs + r); | ||
29 | nvgpu_wmb(); | ||
30 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | u32 nvgpu_readl(struct gk20a *g, u32 r) | ||
35 | { | ||
36 | u32 v = __nvgpu_readl(g, r); | ||
37 | |||
38 | if (v == 0xffffffff) | ||
39 | __nvgpu_check_gpu_state(g); | ||
40 | |||
41 | return v; | ||
42 | } | ||
43 | |||
44 | u32 __nvgpu_readl(struct gk20a *g, u32 r) | ||
45 | { | ||
46 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
47 | u32 v = 0xffffffff; | ||
48 | |||
49 | if (unlikely(!l->regs)) { | ||
50 | __gk20a_warn_on_no_regs(); | ||
51 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
52 | } else { | ||
53 | v = readl(l->regs + r); | ||
54 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
55 | } | ||
56 | |||
57 | return v; | ||
58 | } | ||
59 | |||
60 | void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v) | ||
61 | { | ||
62 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
63 | |||
64 | if (unlikely(!l->regs)) { | ||
65 | __gk20a_warn_on_no_regs(); | ||
66 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); | ||
67 | } else { | ||
68 | nvgpu_wmb(); | ||
69 | do { | ||
70 | writel_relaxed(v, l->regs + r); | ||
71 | } while (readl(l->regs + r) != v); | ||
72 | nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v) | ||
77 | { | ||
78 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
79 | |||
80 | if (unlikely(!l->bar1)) { | ||
81 | __gk20a_warn_on_no_regs(); | ||
82 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
83 | } else { | ||
84 | nvgpu_wmb(); | ||
85 | writel_relaxed(v, l->bar1 + b); | ||
86 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
87 | } | ||
88 | } | ||
89 | |||
90 | u32 nvgpu_bar1_readl(struct gk20a *g, u32 b) | ||
91 | { | ||
92 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
93 | u32 v = 0xffffffff; | ||
94 | |||
95 | if (unlikely(!l->bar1)) { | ||
96 | __gk20a_warn_on_no_regs(); | ||
97 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v); | ||
98 | } else { | ||
99 | v = readl(l->bar1 + b); | ||
100 | nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v); | ||
101 | } | ||
102 | |||
103 | return v; | ||
104 | } | ||
105 | |||
106 | bool nvgpu_io_exists(struct gk20a *g) | ||
107 | { | ||
108 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
109 | |||
110 | return l->regs != NULL; | ||
111 | } | ||
112 | |||
113 | bool nvgpu_io_valid_reg(struct gk20a *g, u32 r) | ||
114 | { | ||
115 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
116 | |||
117 | return r < resource_size(l->regs); | ||
118 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/io_usermode.c b/drivers/gpu/nvgpu/os/linux/io_usermode.c new file mode 100644 index 00000000..ce7c9e75 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/io_usermode.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/io.h> | ||
15 | #include <nvgpu/types.h> | ||
16 | |||
17 | #include "os_linux.h" | ||
18 | #include "gk20a/gk20a.h" | ||
19 | |||
20 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
21 | |||
22 | void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v) | ||
23 | { | ||
24 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
25 | void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r()); | ||
26 | |||
27 | writel_relaxed(v, reg); | ||
28 | nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v); | ||
29 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.c b/drivers/gpu/nvgpu/os/linux/ioctl.c new file mode 100644 index 00000000..359e5103 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl.c | |||
@@ -0,0 +1,296 @@ | |||
1 | /* | ||
2 | * NVGPU IOCTLs | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/file.h> | ||
20 | |||
21 | #include <nvgpu/nvgpu_common.h> | ||
22 | #include <nvgpu/ctxsw_trace.h> | ||
23 | |||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "gk20a/dbg_gpu_gk20a.h" | ||
26 | |||
27 | #include "ioctl_channel.h" | ||
28 | #include "ioctl_ctrl.h" | ||
29 | #include "ioctl_as.h" | ||
30 | #include "ioctl_tsg.h" | ||
31 | #include "ioctl_dbg.h" | ||
32 | #include "module.h" | ||
33 | #include "os_linux.h" | ||
34 | #include "ctxsw_trace.h" | ||
35 | #include "platform_gk20a.h" | ||
36 | |||
37 | #define GK20A_NUM_CDEVS 7 | ||
38 | |||
39 | const struct file_operations gk20a_channel_ops = { | ||
40 | .owner = THIS_MODULE, | ||
41 | .release = gk20a_channel_release, | ||
42 | .open = gk20a_channel_open, | ||
43 | #ifdef CONFIG_COMPAT | ||
44 | .compat_ioctl = gk20a_channel_ioctl, | ||
45 | #endif | ||
46 | .unlocked_ioctl = gk20a_channel_ioctl, | ||
47 | }; | ||
48 | |||
49 | static const struct file_operations gk20a_ctrl_ops = { | ||
50 | .owner = THIS_MODULE, | ||
51 | .release = gk20a_ctrl_dev_release, | ||
52 | .open = gk20a_ctrl_dev_open, | ||
53 | .unlocked_ioctl = gk20a_ctrl_dev_ioctl, | ||
54 | #ifdef CONFIG_COMPAT | ||
55 | .compat_ioctl = gk20a_ctrl_dev_ioctl, | ||
56 | #endif | ||
57 | }; | ||
58 | |||
59 | static const struct file_operations gk20a_dbg_ops = { | ||
60 | .owner = THIS_MODULE, | ||
61 | .release = gk20a_dbg_gpu_dev_release, | ||
62 | .open = gk20a_dbg_gpu_dev_open, | ||
63 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
64 | .poll = gk20a_dbg_gpu_dev_poll, | ||
65 | #ifdef CONFIG_COMPAT | ||
66 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
67 | #endif | ||
68 | }; | ||
69 | |||
70 | static const struct file_operations gk20a_as_ops = { | ||
71 | .owner = THIS_MODULE, | ||
72 | .release = gk20a_as_dev_release, | ||
73 | .open = gk20a_as_dev_open, | ||
74 | #ifdef CONFIG_COMPAT | ||
75 | .compat_ioctl = gk20a_as_dev_ioctl, | ||
76 | #endif | ||
77 | .unlocked_ioctl = gk20a_as_dev_ioctl, | ||
78 | }; | ||
79 | |||
80 | /* | ||
81 | * Note: We use a different 'open' to trigger handling of the profiler session. | ||
82 | * Most of the code is shared between them... Though, at some point if the | ||
83 | * code does get too tangled trying to handle each in the same path we can | ||
84 | * separate them cleanly. | ||
85 | */ | ||
86 | static const struct file_operations gk20a_prof_ops = { | ||
87 | .owner = THIS_MODULE, | ||
88 | .release = gk20a_dbg_gpu_dev_release, | ||
89 | .open = gk20a_prof_gpu_dev_open, | ||
90 | .unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
91 | #ifdef CONFIG_COMPAT | ||
92 | .compat_ioctl = gk20a_dbg_gpu_dev_ioctl, | ||
93 | #endif | ||
94 | }; | ||
95 | |||
96 | static const struct file_operations gk20a_tsg_ops = { | ||
97 | .owner = THIS_MODULE, | ||
98 | .release = nvgpu_ioctl_tsg_dev_release, | ||
99 | .open = nvgpu_ioctl_tsg_dev_open, | ||
100 | #ifdef CONFIG_COMPAT | ||
101 | .compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
102 | #endif | ||
103 | .unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl, | ||
104 | }; | ||
105 | |||
106 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
107 | static const struct file_operations gk20a_ctxsw_ops = { | ||
108 | .owner = THIS_MODULE, | ||
109 | .release = gk20a_ctxsw_dev_release, | ||
110 | .open = gk20a_ctxsw_dev_open, | ||
111 | #ifdef CONFIG_COMPAT | ||
112 | .compat_ioctl = gk20a_ctxsw_dev_ioctl, | ||
113 | #endif | ||
114 | .unlocked_ioctl = gk20a_ctxsw_dev_ioctl, | ||
115 | .poll = gk20a_ctxsw_dev_poll, | ||
116 | .read = gk20a_ctxsw_dev_read, | ||
117 | .mmap = gk20a_ctxsw_dev_mmap, | ||
118 | }; | ||
119 | #endif | ||
120 | |||
121 | static const struct file_operations gk20a_sched_ops = { | ||
122 | .owner = THIS_MODULE, | ||
123 | .release = gk20a_sched_dev_release, | ||
124 | .open = gk20a_sched_dev_open, | ||
125 | #ifdef CONFIG_COMPAT | ||
126 | .compat_ioctl = gk20a_sched_dev_ioctl, | ||
127 | #endif | ||
128 | .unlocked_ioctl = gk20a_sched_dev_ioctl, | ||
129 | .poll = gk20a_sched_dev_poll, | ||
130 | .read = gk20a_sched_dev_read, | ||
131 | }; | ||
132 | |||
133 | static int gk20a_create_device( | ||
134 | struct device *dev, int devno, | ||
135 | const char *interface_name, const char *cdev_name, | ||
136 | struct cdev *cdev, struct device **out, | ||
137 | const struct file_operations *ops, | ||
138 | struct class *class) | ||
139 | { | ||
140 | struct device *subdev; | ||
141 | int err; | ||
142 | struct gk20a *g = gk20a_from_dev(dev); | ||
143 | |||
144 | nvgpu_log_fn(g, " "); | ||
145 | |||
146 | cdev_init(cdev, ops); | ||
147 | cdev->owner = THIS_MODULE; | ||
148 | |||
149 | err = cdev_add(cdev, devno, 1); | ||
150 | if (err) { | ||
151 | dev_err(dev, "failed to add %s cdev\n", cdev_name); | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | subdev = device_create(class, NULL, devno, NULL, | ||
156 | interface_name, cdev_name); | ||
157 | |||
158 | if (IS_ERR(subdev)) { | ||
159 | err = PTR_ERR(dev); | ||
160 | cdev_del(cdev); | ||
161 | dev_err(dev, "failed to create %s device for %s\n", | ||
162 | cdev_name, dev_name(dev)); | ||
163 | return err; | ||
164 | } | ||
165 | |||
166 | *out = subdev; | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | void gk20a_user_deinit(struct device *dev, struct class *class) | ||
171 | { | ||
172 | struct gk20a *g = gk20a_from_dev(dev); | ||
173 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
174 | |||
175 | if (l->channel.node) { | ||
176 | device_destroy(class, l->channel.cdev.dev); | ||
177 | cdev_del(&l->channel.cdev); | ||
178 | } | ||
179 | |||
180 | if (l->as_dev.node) { | ||
181 | device_destroy(class, l->as_dev.cdev.dev); | ||
182 | cdev_del(&l->as_dev.cdev); | ||
183 | } | ||
184 | |||
185 | if (l->ctrl.node) { | ||
186 | device_destroy(class, l->ctrl.cdev.dev); | ||
187 | cdev_del(&l->ctrl.cdev); | ||
188 | } | ||
189 | |||
190 | if (l->dbg.node) { | ||
191 | device_destroy(class, l->dbg.cdev.dev); | ||
192 | cdev_del(&l->dbg.cdev); | ||
193 | } | ||
194 | |||
195 | if (l->prof.node) { | ||
196 | device_destroy(class, l->prof.cdev.dev); | ||
197 | cdev_del(&l->prof.cdev); | ||
198 | } | ||
199 | |||
200 | if (l->tsg.node) { | ||
201 | device_destroy(class, l->tsg.cdev.dev); | ||
202 | cdev_del(&l->tsg.cdev); | ||
203 | } | ||
204 | |||
205 | if (l->ctxsw.node) { | ||
206 | device_destroy(class, l->ctxsw.cdev.dev); | ||
207 | cdev_del(&l->ctxsw.cdev); | ||
208 | } | ||
209 | |||
210 | if (l->sched.node) { | ||
211 | device_destroy(class, l->sched.cdev.dev); | ||
212 | cdev_del(&l->sched.cdev); | ||
213 | } | ||
214 | |||
215 | if (l->cdev_region) | ||
216 | unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS); | ||
217 | } | ||
218 | |||
219 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
220 | struct class *class) | ||
221 | { | ||
222 | int err; | ||
223 | dev_t devno; | ||
224 | struct gk20a *g = gk20a_from_dev(dev); | ||
225 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
226 | |||
227 | err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev)); | ||
228 | if (err) { | ||
229 | dev_err(dev, "failed to allocate devno\n"); | ||
230 | goto fail; | ||
231 | } | ||
232 | l->cdev_region = devno; | ||
233 | |||
234 | err = gk20a_create_device(dev, devno++, interface_name, "", | ||
235 | &l->channel.cdev, &l->channel.node, | ||
236 | &gk20a_channel_ops, | ||
237 | class); | ||
238 | if (err) | ||
239 | goto fail; | ||
240 | |||
241 | err = gk20a_create_device(dev, devno++, interface_name, "-as", | ||
242 | &l->as_dev.cdev, &l->as_dev.node, | ||
243 | &gk20a_as_ops, | ||
244 | class); | ||
245 | if (err) | ||
246 | goto fail; | ||
247 | |||
248 | err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", | ||
249 | &l->ctrl.cdev, &l->ctrl.node, | ||
250 | &gk20a_ctrl_ops, | ||
251 | class); | ||
252 | if (err) | ||
253 | goto fail; | ||
254 | |||
255 | err = gk20a_create_device(dev, devno++, interface_name, "-dbg", | ||
256 | &l->dbg.cdev, &l->dbg.node, | ||
257 | &gk20a_dbg_ops, | ||
258 | class); | ||
259 | if (err) | ||
260 | goto fail; | ||
261 | |||
262 | err = gk20a_create_device(dev, devno++, interface_name, "-prof", | ||
263 | &l->prof.cdev, &l->prof.node, | ||
264 | &gk20a_prof_ops, | ||
265 | class); | ||
266 | if (err) | ||
267 | goto fail; | ||
268 | |||
269 | err = gk20a_create_device(dev, devno++, interface_name, "-tsg", | ||
270 | &l->tsg.cdev, &l->tsg.node, | ||
271 | &gk20a_tsg_ops, | ||
272 | class); | ||
273 | if (err) | ||
274 | goto fail; | ||
275 | |||
276 | #if defined(CONFIG_GK20A_CTXSW_TRACE) | ||
277 | err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", | ||
278 | &l->ctxsw.cdev, &l->ctxsw.node, | ||
279 | &gk20a_ctxsw_ops, | ||
280 | class); | ||
281 | if (err) | ||
282 | goto fail; | ||
283 | #endif | ||
284 | |||
285 | err = gk20a_create_device(dev, devno++, interface_name, "-sched", | ||
286 | &l->sched.cdev, &l->sched.node, | ||
287 | &gk20a_sched_ops, | ||
288 | class); | ||
289 | if (err) | ||
290 | goto fail; | ||
291 | |||
292 | return 0; | ||
293 | fail: | ||
294 | gk20a_user_deinit(dev, &nvgpu_class); | ||
295 | return err; | ||
296 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl.h b/drivers/gpu/nvgpu/os/linux/ioctl.h new file mode 100644 index 00000000..7bf16711 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_IOCTL_H__ | ||
14 | #define __NVGPU_IOCTL_H__ | ||
15 | |||
16 | struct device; | ||
17 | struct class; | ||
18 | |||
19 | int gk20a_user_init(struct device *dev, const char *interface_name, | ||
20 | struct class *class); | ||
21 | void gk20a_user_deinit(struct device *dev, struct class *class); | ||
22 | |||
23 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c new file mode 100644 index 00000000..47f612cc --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * GK20A Address Spaces | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/cdev.h> | ||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/fs.h> | ||
19 | |||
20 | #include <trace/events/gk20a.h> | ||
21 | |||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <nvgpu/gmmu.h> | ||
25 | #include <nvgpu/vm_area.h> | ||
26 | #include <nvgpu/log2.h> | ||
27 | |||
28 | #include <nvgpu/linux/vm.h> | ||
29 | |||
30 | #include "gk20a/gk20a.h" | ||
31 | #include "platform_gk20a.h" | ||
32 | #include "ioctl_as.h" | ||
33 | #include "os_linux.h" | ||
34 | |||
35 | static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags) | ||
36 | { | ||
37 | u32 core_flags = 0; | ||
38 | |||
39 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
40 | core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET; | ||
41 | if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) | ||
42 | core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE; | ||
43 | |||
44 | return core_flags; | ||
45 | } | ||
46 | |||
47 | static int gk20a_as_ioctl_bind_channel( | ||
48 | struct gk20a_as_share *as_share, | ||
49 | struct nvgpu_as_bind_channel_args *args) | ||
50 | { | ||
51 | int err = 0; | ||
52 | struct channel_gk20a *ch; | ||
53 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
54 | |||
55 | nvgpu_log_fn(g, " "); | ||
56 | |||
57 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
58 | if (!ch) | ||
59 | return -EINVAL; | ||
60 | |||
61 | if (gk20a_channel_as_bound(ch)) { | ||
62 | err = -EINVAL; | ||
63 | goto out; | ||
64 | } | ||
65 | |||
66 | /* this will set channel_gk20a->vm */ | ||
67 | err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch); | ||
68 | |||
69 | out: | ||
70 | gk20a_channel_put(ch); | ||
71 | return err; | ||
72 | } | ||
73 | |||
74 | static int gk20a_as_ioctl_alloc_space( | ||
75 | struct gk20a_as_share *as_share, | ||
76 | struct nvgpu_as_alloc_space_args *args) | ||
77 | { | ||
78 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
79 | |||
80 | nvgpu_log_fn(g, " "); | ||
81 | return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size, | ||
82 | &args->o_a.offset, | ||
83 | gk20a_as_translate_as_alloc_space_flags(g, | ||
84 | args->flags)); | ||
85 | } | ||
86 | |||
87 | static int gk20a_as_ioctl_free_space( | ||
88 | struct gk20a_as_share *as_share, | ||
89 | struct nvgpu_as_free_space_args *args) | ||
90 | { | ||
91 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
92 | |||
93 | nvgpu_log_fn(g, " "); | ||
94 | return nvgpu_vm_area_free(as_share->vm, args->offset); | ||
95 | } | ||
96 | |||
97 | static int gk20a_as_ioctl_map_buffer_ex( | ||
98 | struct gk20a_as_share *as_share, | ||
99 | struct nvgpu_as_map_buffer_ex_args *args) | ||
100 | { | ||
101 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
102 | |||
103 | nvgpu_log_fn(g, " "); | ||
104 | |||
105 | /* unsupported, direct kind control must be used */ | ||
106 | if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) { | ||
107 | struct gk20a *g = as_share->vm->mm->g; | ||
108 | nvgpu_log_info(g, "Direct kind control must be requested"); | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, | ||
113 | &args->offset, args->flags, | ||
114 | args->compr_kind, | ||
115 | args->incompr_kind, | ||
116 | args->buffer_offset, | ||
117 | args->mapping_size, | ||
118 | NULL); | ||
119 | } | ||
120 | |||
121 | static int gk20a_as_ioctl_unmap_buffer( | ||
122 | struct gk20a_as_share *as_share, | ||
123 | struct nvgpu_as_unmap_buffer_args *args) | ||
124 | { | ||
125 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
126 | |||
127 | nvgpu_log_fn(g, " "); | ||
128 | |||
129 | nvgpu_vm_unmap(as_share->vm, args->offset, NULL); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int gk20a_as_ioctl_map_buffer_batch( | ||
135 | struct gk20a_as_share *as_share, | ||
136 | struct nvgpu_as_map_buffer_batch_args *args) | ||
137 | { | ||
138 | struct gk20a *g = gk20a_from_vm(as_share->vm); | ||
139 | u32 i; | ||
140 | int err = 0; | ||
141 | |||
142 | struct nvgpu_as_unmap_buffer_args __user *user_unmap_args = | ||
143 | (struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t) | ||
144 | args->unmaps; | ||
145 | struct nvgpu_as_map_buffer_ex_args __user *user_map_args = | ||
146 | (struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t) | ||
147 | args->maps; | ||
148 | |||
149 | struct vm_gk20a_mapping_batch batch; | ||
150 | |||
151 | nvgpu_log_fn(g, " "); | ||
152 | |||
153 | if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT || | ||
154 | args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT) | ||
155 | return -EINVAL; | ||
156 | |||
157 | nvgpu_vm_mapping_batch_start(&batch); | ||
158 | |||
159 | for (i = 0; i < args->num_unmaps; ++i) { | ||
160 | struct nvgpu_as_unmap_buffer_args unmap_args; | ||
161 | |||
162 | if (copy_from_user(&unmap_args, &user_unmap_args[i], | ||
163 | sizeof(unmap_args))) { | ||
164 | err = -EFAULT; | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); | ||
169 | } | ||
170 | |||
171 | if (err) { | ||
172 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
173 | |||
174 | args->num_unmaps = i; | ||
175 | args->num_maps = 0; | ||
176 | return err; | ||
177 | } | ||
178 | |||
179 | for (i = 0; i < args->num_maps; ++i) { | ||
180 | s16 compressible_kind; | ||
181 | s16 incompressible_kind; | ||
182 | |||
183 | struct nvgpu_as_map_buffer_ex_args map_args; | ||
184 | memset(&map_args, 0, sizeof(map_args)); | ||
185 | |||
186 | if (copy_from_user(&map_args, &user_map_args[i], | ||
187 | sizeof(map_args))) { | ||
188 | err = -EFAULT; | ||
189 | break; | ||
190 | } | ||
191 | |||
192 | if (map_args.flags & | ||
193 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) { | ||
194 | compressible_kind = map_args.compr_kind; | ||
195 | incompressible_kind = map_args.incompr_kind; | ||
196 | } else { | ||
197 | /* direct kind control must be used */ | ||
198 | err = -EINVAL; | ||
199 | break; | ||
200 | } | ||
201 | |||
202 | err = nvgpu_vm_map_buffer( | ||
203 | as_share->vm, map_args.dmabuf_fd, | ||
204 | &map_args.offset, map_args.flags, | ||
205 | compressible_kind, incompressible_kind, | ||
206 | map_args.buffer_offset, | ||
207 | map_args.mapping_size, | ||
208 | &batch); | ||
209 | if (err) | ||
210 | break; | ||
211 | } | ||
212 | |||
213 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | ||
214 | |||
215 | if (err) | ||
216 | args->num_maps = i; | ||
217 | /* note: args->num_unmaps will be unmodified, which is ok | ||
218 | * since all unmaps are done */ | ||
219 | |||
220 | return err; | ||
221 | } | ||
222 | |||
223 | static int gk20a_as_ioctl_get_va_regions( | ||
224 | struct gk20a_as_share *as_share, | ||
225 | struct nvgpu_as_get_va_regions_args *args) | ||
226 | { | ||
227 | unsigned int i; | ||
228 | unsigned int write_entries; | ||
229 | struct nvgpu_as_va_region __user *user_region_ptr; | ||
230 | struct vm_gk20a *vm = as_share->vm; | ||
231 | struct gk20a *g = gk20a_from_vm(vm); | ||
232 | unsigned int page_sizes = gmmu_page_size_kernel; | ||
233 | |||
234 | nvgpu_log_fn(g, " "); | ||
235 | |||
236 | if (!vm->big_pages) | ||
237 | page_sizes--; | ||
238 | |||
239 | write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); | ||
240 | if (write_entries > page_sizes) | ||
241 | write_entries = page_sizes; | ||
242 | |||
243 | user_region_ptr = | ||
244 | (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; | ||
245 | |||
246 | for (i = 0; i < write_entries; ++i) { | ||
247 | struct nvgpu_as_va_region region; | ||
248 | struct nvgpu_allocator *vma = vm->vma[i]; | ||
249 | |||
250 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | ||
251 | |||
252 | region.page_size = vm->gmmu_page_sizes[i]; | ||
253 | region.offset = nvgpu_alloc_base(vma); | ||
254 | /* No __aeabi_uldivmod() on some platforms... */ | ||
255 | region.pages = (nvgpu_alloc_end(vma) - | ||
256 | nvgpu_alloc_base(vma)) >> ilog2(region.page_size); | ||
257 | |||
258 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | ||
259 | return -EFAULT; | ||
260 | } | ||
261 | |||
262 | args->buf_size = | ||
263 | page_sizes * sizeof(struct nvgpu_as_va_region); | ||
264 | |||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | static int nvgpu_as_ioctl_get_sync_ro_map( | ||
269 | struct gk20a_as_share *as_share, | ||
270 | struct nvgpu_as_get_sync_ro_map_args *args) | ||
271 | { | ||
272 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
273 | struct vm_gk20a *vm = as_share->vm; | ||
274 | struct gk20a *g = gk20a_from_vm(vm); | ||
275 | u64 base_gpuva; | ||
276 | u32 sync_size; | ||
277 | int err = 0; | ||
278 | |||
279 | if (!g->ops.fifo.get_sync_ro_map) | ||
280 | return -EINVAL; | ||
281 | |||
282 | if (!gk20a_platform_has_syncpoints(g)) | ||
283 | return -EINVAL; | ||
284 | |||
285 | err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size); | ||
286 | if (err) | ||
287 | return err; | ||
288 | |||
289 | args->base_gpuva = base_gpuva; | ||
290 | args->sync_size = sync_size; | ||
291 | |||
292 | return err; | ||
293 | #else | ||
294 | return -EINVAL; | ||
295 | #endif | ||
296 | } | ||
297 | |||
298 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) | ||
299 | { | ||
300 | struct nvgpu_os_linux *l; | ||
301 | struct gk20a_as_share *as_share; | ||
302 | struct gk20a *g; | ||
303 | int err; | ||
304 | |||
305 | l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev); | ||
306 | g = &l->g; | ||
307 | |||
308 | nvgpu_log_fn(g, " "); | ||
309 | |||
310 | err = gk20a_as_alloc_share(g, 0, 0, &as_share); | ||
311 | if (err) { | ||
312 | nvgpu_log_fn(g, "failed to alloc share"); | ||
313 | return err; | ||
314 | } | ||
315 | |||
316 | filp->private_data = as_share; | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | int gk20a_as_dev_release(struct inode *inode, struct file *filp) | ||
321 | { | ||
322 | struct gk20a_as_share *as_share = filp->private_data; | ||
323 | |||
324 | if (!as_share) | ||
325 | return 0; | ||
326 | |||
327 | return gk20a_as_release_share(as_share); | ||
328 | } | ||
329 | |||
330 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
331 | { | ||
332 | int err = 0; | ||
333 | struct gk20a_as_share *as_share = filp->private_data; | ||
334 | struct gk20a *g = gk20a_from_as(as_share->as); | ||
335 | |||
336 | u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE]; | ||
337 | |||
338 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
339 | |||
340 | if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) || | ||
341 | (_IOC_NR(cmd) == 0) || | ||
342 | (_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) || | ||
343 | (_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE)) | ||
344 | return -EINVAL; | ||
345 | |||
346 | memset(buf, 0, sizeof(buf)); | ||
347 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
348 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
349 | return -EFAULT; | ||
350 | } | ||
351 | |||
352 | err = gk20a_busy(g); | ||
353 | if (err) | ||
354 | return err; | ||
355 | |||
356 | switch (cmd) { | ||
357 | case NVGPU_AS_IOCTL_BIND_CHANNEL: | ||
358 | trace_gk20a_as_ioctl_bind_channel(g->name); | ||
359 | err = gk20a_as_ioctl_bind_channel(as_share, | ||
360 | (struct nvgpu_as_bind_channel_args *)buf); | ||
361 | |||
362 | break; | ||
363 | case NVGPU32_AS_IOCTL_ALLOC_SPACE: | ||
364 | { | ||
365 | struct nvgpu32_as_alloc_space_args *args32 = | ||
366 | (struct nvgpu32_as_alloc_space_args *)buf; | ||
367 | struct nvgpu_as_alloc_space_args args; | ||
368 | |||
369 | args.pages = args32->pages; | ||
370 | args.page_size = args32->page_size; | ||
371 | args.flags = args32->flags; | ||
372 | args.o_a.offset = args32->o_a.offset; | ||
373 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
374 | err = gk20a_as_ioctl_alloc_space(as_share, &args); | ||
375 | args32->o_a.offset = args.o_a.offset; | ||
376 | break; | ||
377 | } | ||
378 | case NVGPU_AS_IOCTL_ALLOC_SPACE: | ||
379 | trace_gk20a_as_ioctl_alloc_space(g->name); | ||
380 | err = gk20a_as_ioctl_alloc_space(as_share, | ||
381 | (struct nvgpu_as_alloc_space_args *)buf); | ||
382 | break; | ||
383 | case NVGPU_AS_IOCTL_FREE_SPACE: | ||
384 | trace_gk20a_as_ioctl_free_space(g->name); | ||
385 | err = gk20a_as_ioctl_free_space(as_share, | ||
386 | (struct nvgpu_as_free_space_args *)buf); | ||
387 | break; | ||
388 | case NVGPU_AS_IOCTL_MAP_BUFFER_EX: | ||
389 | trace_gk20a_as_ioctl_map_buffer(g->name); | ||
390 | err = gk20a_as_ioctl_map_buffer_ex(as_share, | ||
391 | (struct nvgpu_as_map_buffer_ex_args *)buf); | ||
392 | break; | ||
393 | case NVGPU_AS_IOCTL_UNMAP_BUFFER: | ||
394 | trace_gk20a_as_ioctl_unmap_buffer(g->name); | ||
395 | err = gk20a_as_ioctl_unmap_buffer(as_share, | ||
396 | (struct nvgpu_as_unmap_buffer_args *)buf); | ||
397 | break; | ||
398 | case NVGPU_AS_IOCTL_GET_VA_REGIONS: | ||
399 | trace_gk20a_as_ioctl_get_va_regions(g->name); | ||
400 | err = gk20a_as_ioctl_get_va_regions(as_share, | ||
401 | (struct nvgpu_as_get_va_regions_args *)buf); | ||
402 | break; | ||
403 | case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH: | ||
404 | err = gk20a_as_ioctl_map_buffer_batch(as_share, | ||
405 | (struct nvgpu_as_map_buffer_batch_args *)buf); | ||
406 | break; | ||
407 | case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP: | ||
408 | err = nvgpu_as_ioctl_get_sync_ro_map(as_share, | ||
409 | (struct nvgpu_as_get_sync_ro_map_args *)buf); | ||
410 | break; | ||
411 | default: | ||
412 | err = -ENOTTY; | ||
413 | break; | ||
414 | } | ||
415 | |||
416 | gk20a_idle(g); | ||
417 | |||
418 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
419 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
420 | err = -EFAULT; | ||
421 | |||
422 | return err; | ||
423 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.h b/drivers/gpu/nvgpu/os/linux/ioctl_as.h new file mode 100644 index 00000000..b3de3782 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * GK20A Address Spaces | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | #ifndef __NVGPU_COMMON_LINUX_AS_H__ | ||
16 | #define __NVGPU_COMMON_LINUX_AS_H__ | ||
17 | |||
18 | struct inode; | ||
19 | struct file; | ||
20 | |||
21 | /* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and | ||
22 | * num_maps */ | ||
23 | #define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256 | ||
24 | |||
25 | /* struct file_operations driver interface */ | ||
26 | int gk20a_as_dev_open(struct inode *inode, struct file *filp); | ||
27 | int gk20a_as_dev_release(struct inode *inode, struct file *filp); | ||
28 | long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
29 | |||
30 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c new file mode 100644 index 00000000..b04bb9de --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
@@ -0,0 +1,1388 @@ | |||
1 | /* | ||
2 | * GK20A Graphics channel | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <trace/events/gk20a.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/anon_inodes.h> | ||
22 | #include <linux/dma-buf.h> | ||
23 | #include <linux/poll.h> | ||
24 | #include <uapi/linux/nvgpu.h> | ||
25 | |||
26 | #include <nvgpu/semaphore.h> | ||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/kmem.h> | ||
29 | #include <nvgpu/log.h> | ||
30 | #include <nvgpu/list.h> | ||
31 | #include <nvgpu/debug.h> | ||
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/error_notifier.h> | ||
34 | #include <nvgpu/barrier.h> | ||
35 | #include <nvgpu/nvhost.h> | ||
36 | #include <nvgpu/os_sched.h> | ||
37 | |||
38 | #include "gk20a/gk20a.h" | ||
39 | #include "gk20a/dbg_gpu_gk20a.h" | ||
40 | #include "gk20a/fence_gk20a.h" | ||
41 | |||
42 | #include "platform_gk20a.h" | ||
43 | #include "ioctl_channel.h" | ||
44 | #include "channel.h" | ||
45 | #include "os_linux.h" | ||
46 | #include "ctxsw_trace.h" | ||
47 | |||
48 | /* the minimal size of client buffer */ | ||
49 | #define CSS_MIN_CLIENT_SNAPSHOT_SIZE \ | ||
50 | (sizeof(struct gk20a_cs_snapshot_fifo) + \ | ||
51 | sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256) | ||
52 | |||
53 | static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode) | ||
54 | { | ||
55 | switch (graphics_preempt_mode) { | ||
56 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
57 | return "WFI"; | ||
58 | default: | ||
59 | return "?"; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | static const char *gr_gk20a_compute_preempt_mode_name(u32 compute_preempt_mode) | ||
64 | { | ||
65 | switch (compute_preempt_mode) { | ||
66 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
67 | return "WFI"; | ||
68 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
69 | return "CTA"; | ||
70 | default: | ||
71 | return "?"; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static void gk20a_channel_trace_sched_param( | ||
76 | void (*trace)(int chid, int tsgid, pid_t pid, u32 timeslice, | ||
77 | u32 timeout, const char *interleave, | ||
78 | const char *graphics_preempt_mode, | ||
79 | const char *compute_preempt_mode), | ||
80 | struct channel_gk20a *ch) | ||
81 | { | ||
82 | struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); | ||
83 | |||
84 | if (!tsg) | ||
85 | return; | ||
86 | |||
87 | (trace)(ch->chid, ch->tsgid, ch->pid, | ||
88 | tsg_gk20a_from_ch(ch)->timeslice_us, | ||
89 | ch->timeout_ms_max, | ||
90 | gk20a_fifo_interleave_level_name(tsg->interleave_level), | ||
91 | gr_gk20a_graphics_preempt_mode_name( | ||
92 | tsg->gr_ctx.graphics_preempt_mode), | ||
93 | gr_gk20a_compute_preempt_mode_name( | ||
94 | tsg->gr_ctx.compute_preempt_mode)); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Although channels do have pointers back to the gk20a struct that they were | ||
99 | * created under in cases where the driver is killed that pointer can be bad. | ||
100 | * The channel memory can be freed before the release() function for a given | ||
101 | * channel is called. This happens when the driver dies and userspace doesn't | ||
102 | * get a chance to call release() until after the entire gk20a driver data is | ||
103 | * unloaded and freed. | ||
104 | */ | ||
105 | struct channel_priv { | ||
106 | struct gk20a *g; | ||
107 | struct channel_gk20a *c; | ||
108 | }; | ||
109 | |||
110 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
111 | |||
112 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch) | ||
113 | { | ||
114 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
115 | |||
116 | /* disable existing cyclestats buffer */ | ||
117 | nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); | ||
118 | if (priv->cyclestate_buffer_handler) { | ||
119 | dma_buf_vunmap(priv->cyclestate_buffer_handler, | ||
120 | ch->cyclestate.cyclestate_buffer); | ||
121 | dma_buf_put(priv->cyclestate_buffer_handler); | ||
122 | priv->cyclestate_buffer_handler = NULL; | ||
123 | ch->cyclestate.cyclestate_buffer = NULL; | ||
124 | ch->cyclestate.cyclestate_buffer_size = 0; | ||
125 | } | ||
126 | nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); | ||
127 | } | ||
128 | |||
129 | static int gk20a_channel_cycle_stats(struct channel_gk20a *ch, | ||
130 | struct nvgpu_cycle_stats_args *args) | ||
131 | { | ||
132 | struct dma_buf *dmabuf; | ||
133 | void *virtual_address; | ||
134 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
135 | |||
136 | /* is it allowed to handle calls for current GPU? */ | ||
137 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS)) | ||
138 | return -ENOSYS; | ||
139 | |||
140 | if (args->dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
141 | |||
142 | /* set up new cyclestats buffer */ | ||
143 | dmabuf = dma_buf_get(args->dmabuf_fd); | ||
144 | if (IS_ERR(dmabuf)) | ||
145 | return PTR_ERR(dmabuf); | ||
146 | virtual_address = dma_buf_vmap(dmabuf); | ||
147 | if (!virtual_address) | ||
148 | return -ENOMEM; | ||
149 | |||
150 | priv->cyclestate_buffer_handler = dmabuf; | ||
151 | ch->cyclestate.cyclestate_buffer = virtual_address; | ||
152 | ch->cyclestate.cyclestate_buffer_size = dmabuf->size; | ||
153 | return 0; | ||
154 | |||
155 | } else if (!args->dmabuf_fd && priv->cyclestate_buffer_handler) { | ||
156 | gk20a_channel_free_cycle_stats_buffer(ch); | ||
157 | return 0; | ||
158 | |||
159 | } else if (!args->dmabuf_fd && !priv->cyclestate_buffer_handler) { | ||
160 | /* no requst from GL */ | ||
161 | return 0; | ||
162 | |||
163 | } else { | ||
164 | pr_err("channel already has cyclestats buffer\n"); | ||
165 | return -EINVAL; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
170 | { | ||
171 | int ret; | ||
172 | |||
173 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
174 | if (ch->cs_client) | ||
175 | ret = gr_gk20a_css_flush(ch, ch->cs_client); | ||
176 | else | ||
177 | ret = -EBADF; | ||
178 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
179 | |||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
184 | u32 dmabuf_fd, | ||
185 | u32 perfmon_id_count, | ||
186 | u32 *perfmon_id_start) | ||
187 | { | ||
188 | int ret = 0; | ||
189 | struct gk20a *g = ch->g; | ||
190 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
191 | struct gk20a_cs_snapshot_client *client; | ||
192 | |||
193 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
194 | if (ch->cs_client) { | ||
195 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
196 | return -EEXIST; | ||
197 | } | ||
198 | |||
199 | client_linux = nvgpu_kzalloc(g, sizeof(*client_linux)); | ||
200 | if (!client_linux) { | ||
201 | ret = -ENOMEM; | ||
202 | goto err; | ||
203 | } | ||
204 | |||
205 | client_linux->dmabuf_fd = dmabuf_fd; | ||
206 | client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd); | ||
207 | if (IS_ERR(client_linux->dma_handler)) { | ||
208 | ret = PTR_ERR(client_linux->dma_handler); | ||
209 | client_linux->dma_handler = NULL; | ||
210 | goto err_free; | ||
211 | } | ||
212 | |||
213 | client = &client_linux->cs_client; | ||
214 | client->snapshot_size = client_linux->dma_handler->size; | ||
215 | if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) { | ||
216 | ret = -ENOMEM; | ||
217 | goto err_put; | ||
218 | } | ||
219 | |||
220 | client->snapshot = (struct gk20a_cs_snapshot_fifo *) | ||
221 | dma_buf_vmap(client_linux->dma_handler); | ||
222 | if (!client->snapshot) { | ||
223 | ret = -ENOMEM; | ||
224 | goto err_put; | ||
225 | } | ||
226 | |||
227 | ch->cs_client = client; | ||
228 | |||
229 | ret = gr_gk20a_css_attach(ch, | ||
230 | perfmon_id_count, | ||
231 | perfmon_id_start, | ||
232 | ch->cs_client); | ||
233 | |||
234 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
235 | |||
236 | return ret; | ||
237 | |||
238 | err_put: | ||
239 | dma_buf_put(client_linux->dma_handler); | ||
240 | err_free: | ||
241 | nvgpu_kfree(g, client_linux); | ||
242 | err: | ||
243 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch) | ||
248 | { | ||
249 | int ret; | ||
250 | struct gk20a_cs_snapshot_client_linux *client_linux; | ||
251 | |||
252 | nvgpu_mutex_acquire(&ch->cs_client_mutex); | ||
253 | if (!ch->cs_client) { | ||
254 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | client_linux = container_of(ch->cs_client, | ||
259 | struct gk20a_cs_snapshot_client_linux, | ||
260 | cs_client); | ||
261 | |||
262 | ret = gr_gk20a_css_detach(ch, ch->cs_client); | ||
263 | |||
264 | if (client_linux->dma_handler) { | ||
265 | if (ch->cs_client->snapshot) | ||
266 | dma_buf_vunmap(client_linux->dma_handler, | ||
267 | ch->cs_client->snapshot); | ||
268 | dma_buf_put(client_linux->dma_handler); | ||
269 | } | ||
270 | |||
271 | ch->cs_client = NULL; | ||
272 | nvgpu_kfree(ch->g, client_linux); | ||
273 | |||
274 | nvgpu_mutex_release(&ch->cs_client_mutex); | ||
275 | |||
276 | return ret; | ||
277 | } | ||
278 | |||
279 | static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, | ||
280 | struct nvgpu_cycle_stats_snapshot_args *args) | ||
281 | { | ||
282 | int ret; | ||
283 | |||
284 | /* is it allowed to handle calls for current GPU? */ | ||
285 | if (!nvgpu_is_enabled(ch->g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT)) | ||
286 | return -ENOSYS; | ||
287 | |||
288 | if (!args->dmabuf_fd) | ||
289 | return -EINVAL; | ||
290 | |||
291 | /* handle the command (most frequent cases first) */ | ||
292 | switch (args->cmd) { | ||
293 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | ||
294 | ret = gk20a_flush_cycle_stats_snapshot(ch); | ||
295 | args->extra = 0; | ||
296 | break; | ||
297 | |||
298 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH: | ||
299 | ret = gk20a_attach_cycle_stats_snapshot(ch, | ||
300 | args->dmabuf_fd, | ||
301 | args->extra, | ||
302 | &args->extra); | ||
303 | break; | ||
304 | |||
305 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH: | ||
306 | ret = gk20a_channel_free_cycle_stats_snapshot(ch); | ||
307 | args->extra = 0; | ||
308 | break; | ||
309 | |||
310 | default: | ||
311 | pr_err("cyclestats: unknown command %u\n", args->cmd); | ||
312 | ret = -EINVAL; | ||
313 | break; | ||
314 | } | ||
315 | |||
316 | return ret; | ||
317 | } | ||
318 | #endif | ||
319 | |||
320 | static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, | ||
321 | struct nvgpu_channel_wdt_args *args) | ||
322 | { | ||
323 | u32 status = args->wdt_status & (NVGPU_IOCTL_CHANNEL_DISABLE_WDT | | ||
324 | NVGPU_IOCTL_CHANNEL_ENABLE_WDT); | ||
325 | |||
326 | if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT) | ||
327 | ch->timeout.enabled = false; | ||
328 | else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT) | ||
329 | ch->timeout.enabled = true; | ||
330 | else | ||
331 | return -EINVAL; | ||
332 | |||
333 | if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT) | ||
334 | ch->timeout.limit_ms = args->timeout_ms; | ||
335 | |||
336 | ch->timeout.debug_dump = (args->wdt_status & | ||
337 | NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0; | ||
338 | |||
339 | return 0; | ||
340 | } | ||
341 | |||
342 | static void gk20a_channel_free_error_notifiers(struct channel_gk20a *ch) | ||
343 | { | ||
344 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
345 | |||
346 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
347 | if (priv->error_notifier.dmabuf) { | ||
348 | dma_buf_vunmap(priv->error_notifier.dmabuf, priv->error_notifier.vaddr); | ||
349 | dma_buf_put(priv->error_notifier.dmabuf); | ||
350 | priv->error_notifier.dmabuf = NULL; | ||
351 | priv->error_notifier.notification = NULL; | ||
352 | priv->error_notifier.vaddr = NULL; | ||
353 | } | ||
354 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
355 | } | ||
356 | |||
357 | static int gk20a_init_error_notifier(struct channel_gk20a *ch, | ||
358 | struct nvgpu_set_error_notifier *args) | ||
359 | { | ||
360 | struct dma_buf *dmabuf; | ||
361 | void *va; | ||
362 | u64 end = args->offset + sizeof(struct nvgpu_notification); | ||
363 | struct nvgpu_channel_linux *priv = ch->os_priv; | ||
364 | |||
365 | if (!args->mem) { | ||
366 | pr_err("gk20a_init_error_notifier: invalid memory handle\n"); | ||
367 | return -EINVAL; | ||
368 | } | ||
369 | |||
370 | dmabuf = dma_buf_get(args->mem); | ||
371 | |||
372 | gk20a_channel_free_error_notifiers(ch); | ||
373 | |||
374 | if (IS_ERR(dmabuf)) { | ||
375 | pr_err("Invalid handle: %d\n", args->mem); | ||
376 | return -EINVAL; | ||
377 | } | ||
378 | |||
379 | if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) { | ||
380 | dma_buf_put(dmabuf); | ||
381 | nvgpu_err(ch->g, "gk20a_init_error_notifier: invalid offset"); | ||
382 | return -EINVAL; | ||
383 | } | ||
384 | |||
385 | nvgpu_speculation_barrier(); | ||
386 | |||
387 | /* map handle */ | ||
388 | va = dma_buf_vmap(dmabuf); | ||
389 | if (!va) { | ||
390 | dma_buf_put(dmabuf); | ||
391 | pr_err("Cannot map notifier handle\n"); | ||
392 | return -ENOMEM; | ||
393 | } | ||
394 | |||
395 | priv->error_notifier.notification = va + args->offset; | ||
396 | priv->error_notifier.vaddr = va; | ||
397 | memset(priv->error_notifier.notification, 0, | ||
398 | sizeof(struct nvgpu_notification)); | ||
399 | |||
400 | /* set channel notifiers pointer */ | ||
401 | nvgpu_mutex_acquire(&priv->error_notifier.mutex); | ||
402 | priv->error_notifier.dmabuf = dmabuf; | ||
403 | nvgpu_mutex_release(&priv->error_notifier.mutex); | ||
404 | |||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * This returns the channel with a reference. The caller must | ||
410 | * gk20a_channel_put() the ref back after use. | ||
411 | * | ||
412 | * NULL is returned if the channel was not found. | ||
413 | */ | ||
414 | struct channel_gk20a *gk20a_get_channel_from_file(int fd) | ||
415 | { | ||
416 | struct channel_gk20a *ch; | ||
417 | struct channel_priv *priv; | ||
418 | struct file *f = fget(fd); | ||
419 | |||
420 | if (!f) | ||
421 | return NULL; | ||
422 | |||
423 | if (f->f_op != &gk20a_channel_ops) { | ||
424 | fput(f); | ||
425 | return NULL; | ||
426 | } | ||
427 | |||
428 | priv = (struct channel_priv *)f->private_data; | ||
429 | ch = gk20a_channel_get(priv->c); | ||
430 | fput(f); | ||
431 | return ch; | ||
432 | } | ||
433 | |||
434 | int gk20a_channel_release(struct inode *inode, struct file *filp) | ||
435 | { | ||
436 | struct channel_priv *priv = filp->private_data; | ||
437 | struct channel_gk20a *ch; | ||
438 | struct gk20a *g; | ||
439 | |||
440 | int err; | ||
441 | |||
442 | /* We could still end up here even if the channel_open failed, e.g. | ||
443 | * if we ran out of hw channel IDs. | ||
444 | */ | ||
445 | if (!priv) | ||
446 | return 0; | ||
447 | |||
448 | ch = priv->c; | ||
449 | g = priv->g; | ||
450 | |||
451 | err = gk20a_busy(g); | ||
452 | if (err) { | ||
453 | nvgpu_err(g, "failed to release a channel!"); | ||
454 | goto channel_release; | ||
455 | } | ||
456 | |||
457 | trace_gk20a_channel_release(dev_name(dev_from_gk20a(g))); | ||
458 | |||
459 | gk20a_channel_close(ch); | ||
460 | gk20a_channel_free_error_notifiers(ch); | ||
461 | |||
462 | gk20a_idle(g); | ||
463 | |||
464 | channel_release: | ||
465 | gk20a_put(g); | ||
466 | nvgpu_kfree(g, filp->private_data); | ||
467 | filp->private_data = NULL; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | /* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */ | ||
472 | static int __gk20a_channel_open(struct gk20a *g, | ||
473 | struct file *filp, s32 runlist_id) | ||
474 | { | ||
475 | int err; | ||
476 | struct channel_gk20a *ch; | ||
477 | struct channel_priv *priv; | ||
478 | |||
479 | nvgpu_log_fn(g, " "); | ||
480 | |||
481 | g = gk20a_get(g); | ||
482 | if (!g) | ||
483 | return -ENODEV; | ||
484 | |||
485 | trace_gk20a_channel_open(dev_name(dev_from_gk20a(g))); | ||
486 | |||
487 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
488 | if (!priv) { | ||
489 | err = -ENOMEM; | ||
490 | goto free_ref; | ||
491 | } | ||
492 | |||
493 | err = gk20a_busy(g); | ||
494 | if (err) { | ||
495 | nvgpu_err(g, "failed to power on, %d", err); | ||
496 | goto fail_busy; | ||
497 | } | ||
498 | /* All the user space channel should be non privilege */ | ||
499 | ch = gk20a_open_new_channel(g, runlist_id, false, | ||
500 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
501 | gk20a_idle(g); | ||
502 | if (!ch) { | ||
503 | nvgpu_err(g, | ||
504 | "failed to get f"); | ||
505 | err = -ENOMEM; | ||
506 | goto fail_busy; | ||
507 | } | ||
508 | |||
509 | gk20a_channel_trace_sched_param( | ||
510 | trace_gk20a_channel_sched_defaults, ch); | ||
511 | |||
512 | priv->g = g; | ||
513 | priv->c = ch; | ||
514 | |||
515 | filp->private_data = priv; | ||
516 | return 0; | ||
517 | |||
518 | fail_busy: | ||
519 | nvgpu_kfree(g, priv); | ||
520 | free_ref: | ||
521 | gk20a_put(g); | ||
522 | return err; | ||
523 | } | ||
524 | |||
525 | int gk20a_channel_open(struct inode *inode, struct file *filp) | ||
526 | { | ||
527 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
528 | struct nvgpu_os_linux, channel.cdev); | ||
529 | struct gk20a *g = &l->g; | ||
530 | int ret; | ||
531 | |||
532 | nvgpu_log_fn(g, "start"); | ||
533 | ret = __gk20a_channel_open(g, filp, -1); | ||
534 | |||
535 | nvgpu_log_fn(g, "end"); | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
540 | struct nvgpu_channel_open_args *args) | ||
541 | { | ||
542 | int err; | ||
543 | int fd; | ||
544 | struct file *file; | ||
545 | char name[64]; | ||
546 | s32 runlist_id = args->in.runlist_id; | ||
547 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
548 | |||
549 | err = get_unused_fd_flags(O_RDWR); | ||
550 | if (err < 0) | ||
551 | return err; | ||
552 | fd = err; | ||
553 | |||
554 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", | ||
555 | dev_name(dev_from_gk20a(g)), fd); | ||
556 | |||
557 | file = anon_inode_getfile(name, l->channel.cdev.ops, NULL, O_RDWR); | ||
558 | if (IS_ERR(file)) { | ||
559 | err = PTR_ERR(file); | ||
560 | goto clean_up; | ||
561 | } | ||
562 | |||
563 | err = __gk20a_channel_open(g, file, runlist_id); | ||
564 | if (err) | ||
565 | goto clean_up_file; | ||
566 | |||
567 | fd_install(fd, file); | ||
568 | args->out.channel_fd = fd; | ||
569 | return 0; | ||
570 | |||
571 | clean_up_file: | ||
572 | fput(file); | ||
573 | clean_up: | ||
574 | put_unused_fd(fd); | ||
575 | return err; | ||
576 | } | ||
577 | |||
578 | static u32 nvgpu_gpfifo_user_flags_to_common_flags(u32 user_flags) | ||
579 | { | ||
580 | u32 flags = 0; | ||
581 | |||
582 | if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) | ||
583 | flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_VPR; | ||
584 | |||
585 | if (user_flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) | ||
586 | flags |= NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC; | ||
587 | |||
588 | if (user_flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) | ||
589 | flags |= NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE; | ||
590 | |||
591 | return flags; | ||
592 | } | ||
593 | |||
594 | static void nvgpu_get_gpfifo_ex_args( | ||
595 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args, | ||
596 | struct nvgpu_gpfifo_args *gpfifo_args) | ||
597 | { | ||
598 | gpfifo_args->num_entries = alloc_gpfifo_ex_args->num_entries; | ||
599 | gpfifo_args->num_inflight_jobs = alloc_gpfifo_ex_args->num_inflight_jobs; | ||
600 | gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( | ||
601 | alloc_gpfifo_ex_args->flags); | ||
602 | } | ||
603 | |||
604 | static void nvgpu_get_gpfifo_args( | ||
605 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args, | ||
606 | struct nvgpu_gpfifo_args *gpfifo_args) | ||
607 | { | ||
608 | /* | ||
609 | * Kernel can insert one extra gpfifo entry before user | ||
610 | * submitted gpfifos and another one after, for internal usage. | ||
611 | * Triple the requested size. | ||
612 | */ | ||
613 | gpfifo_args->num_entries = alloc_gpfifo_args->num_entries * 3; | ||
614 | gpfifo_args->num_inflight_jobs = 0; | ||
615 | gpfifo_args->flags = nvgpu_gpfifo_user_flags_to_common_flags( | ||
616 | alloc_gpfifo_args->flags); | ||
617 | } | ||
618 | |||
619 | static void nvgpu_get_fence_args( | ||
620 | struct nvgpu_fence *fence_args_in, | ||
621 | struct nvgpu_channel_fence *fence_args_out) | ||
622 | { | ||
623 | fence_args_out->id = fence_args_in->id; | ||
624 | fence_args_out->value = fence_args_in->value; | ||
625 | } | ||
626 | |||
627 | static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch, | ||
628 | ulong id, u32 offset, | ||
629 | u32 payload, u32 timeout) | ||
630 | { | ||
631 | struct dma_buf *dmabuf; | ||
632 | void *data; | ||
633 | u32 *semaphore; | ||
634 | int ret = 0; | ||
635 | |||
636 | /* do not wait if channel has timed out */ | ||
637 | if (ch->has_timedout) | ||
638 | return -ETIMEDOUT; | ||
639 | |||
640 | dmabuf = dma_buf_get(id); | ||
641 | if (IS_ERR(dmabuf)) { | ||
642 | nvgpu_err(ch->g, "invalid notifier nvmap handle 0x%lx", id); | ||
643 | return -EINVAL; | ||
644 | } | ||
645 | |||
646 | data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT); | ||
647 | if (!data) { | ||
648 | nvgpu_err(ch->g, "failed to map notifier memory"); | ||
649 | ret = -EINVAL; | ||
650 | goto cleanup_put; | ||
651 | } | ||
652 | |||
653 | semaphore = data + (offset & ~PAGE_MASK); | ||
654 | |||
655 | ret = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
656 | &ch->semaphore_wq, | ||
657 | *semaphore == payload || ch->has_timedout, | ||
658 | timeout); | ||
659 | |||
660 | dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data); | ||
661 | cleanup_put: | ||
662 | dma_buf_put(dmabuf); | ||
663 | return ret; | ||
664 | } | ||
665 | |||
666 | static int gk20a_channel_wait(struct channel_gk20a *ch, | ||
667 | struct nvgpu_wait_args *args) | ||
668 | { | ||
669 | struct dma_buf *dmabuf; | ||
670 | struct gk20a *g = ch->g; | ||
671 | struct notification *notif; | ||
672 | struct timespec tv; | ||
673 | u64 jiffies; | ||
674 | ulong id; | ||
675 | u32 offset; | ||
676 | int remain, ret = 0; | ||
677 | u64 end; | ||
678 | |||
679 | nvgpu_log_fn(g, " "); | ||
680 | |||
681 | if (ch->has_timedout) | ||
682 | return -ETIMEDOUT; | ||
683 | |||
684 | switch (args->type) { | ||
685 | case NVGPU_WAIT_TYPE_NOTIFIER: | ||
686 | id = args->condition.notifier.dmabuf_fd; | ||
687 | offset = args->condition.notifier.offset; | ||
688 | end = offset + sizeof(struct notification); | ||
689 | |||
690 | dmabuf = dma_buf_get(id); | ||
691 | if (IS_ERR(dmabuf)) { | ||
692 | nvgpu_err(g, "invalid notifier nvmap handle 0x%lx", | ||
693 | id); | ||
694 | return -EINVAL; | ||
695 | } | ||
696 | |||
697 | if (end > dmabuf->size || end < sizeof(struct notification)) { | ||
698 | dma_buf_put(dmabuf); | ||
699 | nvgpu_err(g, "invalid notifier offset"); | ||
700 | return -EINVAL; | ||
701 | } | ||
702 | |||
703 | nvgpu_speculation_barrier(); | ||
704 | |||
705 | notif = dma_buf_vmap(dmabuf); | ||
706 | if (!notif) { | ||
707 | nvgpu_err(g, "failed to map notifier memory"); | ||
708 | return -ENOMEM; | ||
709 | } | ||
710 | |||
711 | notif = (struct notification *)((uintptr_t)notif + offset); | ||
712 | |||
713 | /* user should set status pending before | ||
714 | * calling this ioctl */ | ||
715 | remain = NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
716 | &ch->notifier_wq, | ||
717 | notif->status == 0 || ch->has_timedout, | ||
718 | args->timeout); | ||
719 | |||
720 | if (remain == 0 && notif->status != 0) { | ||
721 | ret = -ETIMEDOUT; | ||
722 | goto notif_clean_up; | ||
723 | } else if (remain < 0) { | ||
724 | ret = -EINTR; | ||
725 | goto notif_clean_up; | ||
726 | } | ||
727 | |||
728 | /* TBD: fill in correct information */ | ||
729 | jiffies = get_jiffies_64(); | ||
730 | jiffies_to_timespec(jiffies, &tv); | ||
731 | notif->timestamp.nanoseconds[0] = tv.tv_nsec; | ||
732 | notif->timestamp.nanoseconds[1] = tv.tv_sec; | ||
733 | notif->info32 = 0xDEADBEEF; /* should be object name */ | ||
734 | notif->info16 = ch->chid; /* should be method offset */ | ||
735 | |||
736 | notif_clean_up: | ||
737 | dma_buf_vunmap(dmabuf, notif); | ||
738 | return ret; | ||
739 | |||
740 | case NVGPU_WAIT_TYPE_SEMAPHORE: | ||
741 | ret = gk20a_channel_wait_semaphore(ch, | ||
742 | args->condition.semaphore.dmabuf_fd, | ||
743 | args->condition.semaphore.offset, | ||
744 | args->condition.semaphore.payload, | ||
745 | args->timeout); | ||
746 | |||
747 | break; | ||
748 | |||
749 | default: | ||
750 | ret = -EINVAL; | ||
751 | break; | ||
752 | } | ||
753 | |||
754 | return ret; | ||
755 | } | ||
756 | |||
757 | static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, | ||
758 | struct nvgpu_zcull_bind_args *args) | ||
759 | { | ||
760 | struct gk20a *g = ch->g; | ||
761 | struct gr_gk20a *gr = &g->gr; | ||
762 | |||
763 | nvgpu_log_fn(gr->g, " "); | ||
764 | |||
765 | return g->ops.gr.bind_ctxsw_zcull(g, gr, ch, | ||
766 | args->gpu_va, args->mode); | ||
767 | } | ||
768 | |||
769 | static int gk20a_ioctl_channel_submit_gpfifo( | ||
770 | struct channel_gk20a *ch, | ||
771 | struct nvgpu_submit_gpfifo_args *args) | ||
772 | { | ||
773 | struct nvgpu_channel_fence fence; | ||
774 | struct gk20a_fence *fence_out; | ||
775 | struct fifo_profile_gk20a *profile = NULL; | ||
776 | u32 submit_flags = 0; | ||
777 | int fd = -1; | ||
778 | struct gk20a *g = ch->g; | ||
779 | |||
780 | int ret = 0; | ||
781 | nvgpu_log_fn(g, " "); | ||
782 | |||
783 | profile = gk20a_fifo_profile_acquire(ch->g); | ||
784 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_ENTRY); | ||
785 | |||
786 | if (ch->has_timedout) | ||
787 | return -ETIMEDOUT; | ||
788 | |||
789 | nvgpu_get_fence_args(&args->fence, &fence); | ||
790 | submit_flags = | ||
791 | nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); | ||
792 | |||
793 | /* Try and allocate an fd here*/ | ||
794 | if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | ||
795 | && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { | ||
796 | fd = get_unused_fd_flags(O_RDWR); | ||
797 | if (fd < 0) | ||
798 | return fd; | ||
799 | } | ||
800 | |||
801 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, | ||
802 | submit_flags, &fence, | ||
803 | &fence_out, profile); | ||
804 | |||
805 | if (ret) { | ||
806 | if (fd != -1) | ||
807 | put_unused_fd(fd); | ||
808 | goto clean_up; | ||
809 | } | ||
810 | |||
811 | /* Convert fence_out to something we can pass back to user space. */ | ||
812 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { | ||
813 | if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | ||
814 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
815 | if (ret) | ||
816 | put_unused_fd(fd); | ||
817 | else | ||
818 | args->fence.id = fd; | ||
819 | } else { | ||
820 | args->fence.id = fence_out->syncpt_id; | ||
821 | args->fence.value = fence_out->syncpt_value; | ||
822 | } | ||
823 | } | ||
824 | gk20a_fence_put(fence_out); | ||
825 | |||
826 | gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT); | ||
827 | if (profile) | ||
828 | gk20a_fifo_profile_release(ch->g, profile); | ||
829 | |||
830 | clean_up: | ||
831 | return ret; | ||
832 | } | ||
833 | |||
834 | /* | ||
835 | * Convert linux specific runlist level of the form NVGPU_RUNLIST_INTERLEAVE_LEVEL_* | ||
836 | * to common runlist level of the form NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_* | ||
837 | */ | ||
838 | u32 nvgpu_get_common_runlist_level(u32 level) | ||
839 | { | ||
840 | switch (level) { | ||
841 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | ||
842 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | ||
843 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | ||
844 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM; | ||
845 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: | ||
846 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH; | ||
847 | default: | ||
848 | pr_err("%s: incorrect runlist level\n", __func__); | ||
849 | } | ||
850 | |||
851 | return level; | ||
852 | } | ||
853 | |||
854 | static u32 nvgpu_obj_ctx_user_flags_to_common_flags(u32 user_flags) | ||
855 | { | ||
856 | u32 flags = 0; | ||
857 | |||
858 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_GFXP) | ||
859 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP; | ||
860 | |||
861 | if (user_flags & NVGPU_ALLOC_OBJ_FLAGS_CILP) | ||
862 | flags |= NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP; | ||
863 | |||
864 | return flags; | ||
865 | } | ||
866 | |||
867 | static int nvgpu_ioctl_channel_alloc_obj_ctx(struct channel_gk20a *ch, | ||
868 | u32 class_num, u32 user_flags) | ||
869 | { | ||
870 | return ch->g->ops.gr.alloc_obj_ctx(ch, class_num, | ||
871 | nvgpu_obj_ctx_user_flags_to_common_flags(user_flags)); | ||
872 | } | ||
873 | |||
874 | /* | ||
875 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
876 | * into linux preemption mode flags of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
877 | */ | ||
878 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags) | ||
879 | { | ||
880 | u32 flags = 0; | ||
881 | |||
882 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_WFI) | ||
883 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
884 | if (graphics_preempt_mode_flags & NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) | ||
885 | flags |= NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
886 | |||
887 | return flags; | ||
888 | } | ||
889 | |||
890 | /* | ||
891 | * Convert common preemption mode flags of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
892 | * into linux preemption mode flags of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
893 | */ | ||
894 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags) | ||
895 | { | ||
896 | u32 flags = 0; | ||
897 | |||
898 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_WFI) | ||
899 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
900 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CTA) | ||
901 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
902 | if (compute_preempt_mode_flags & NVGPU_PREEMPTION_MODE_COMPUTE_CILP) | ||
903 | flags |= NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
904 | |||
905 | return flags; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
910 | * into linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
911 | */ | ||
912 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
913 | { | ||
914 | switch (graphics_preempt_mode) { | ||
915 | case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: | ||
916 | return NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
917 | case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: | ||
918 | return NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | ||
919 | } | ||
920 | |||
921 | return graphics_preempt_mode; | ||
922 | } | ||
923 | |||
924 | /* | ||
925 | * Convert common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
926 | * into linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
927 | */ | ||
928 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) | ||
929 | { | ||
930 | switch (compute_preempt_mode) { | ||
931 | case NVGPU_PREEMPTION_MODE_COMPUTE_WFI: | ||
932 | return NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
933 | case NVGPU_PREEMPTION_MODE_COMPUTE_CTA: | ||
934 | return NVGPU_COMPUTE_PREEMPTION_MODE_CTA; | ||
935 | case NVGPU_PREEMPTION_MODE_COMPUTE_CILP: | ||
936 | return NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | ||
937 | } | ||
938 | |||
939 | return compute_preempt_mode; | ||
940 | } | ||
941 | |||
942 | /* | ||
943 | * Convert linux preemption modes of the form NVGPU_GRAPHICS_PREEMPTION_MODE_* | ||
944 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_GRAPHICS_* | ||
945 | */ | ||
946 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | ||
947 | { | ||
948 | switch (graphics_preempt_mode) { | ||
949 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: | ||
950 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | ||
951 | case NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP: | ||
952 | return NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | ||
953 | } | ||
954 | |||
955 | return graphics_preempt_mode; | ||
956 | } | ||
957 | |||
958 | /* | ||
959 | * Convert linux preemption modes of the form NVGPU_COMPUTE_PREEMPTION_MODE_* | ||
960 | * into common preemption modes of the form NVGPU_PREEMPTION_MODE_COMPUTE_* | ||
961 | */ | ||
962 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) | ||
963 | { | ||
964 | switch (compute_preempt_mode) { | ||
965 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | ||
966 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; | ||
967 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: | ||
968 | return NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | ||
969 | case NVGPU_COMPUTE_PREEMPTION_MODE_CILP: | ||
970 | return NVGPU_PREEMPTION_MODE_COMPUTE_CILP; | ||
971 | } | ||
972 | |||
973 | return compute_preempt_mode; | ||
974 | } | ||
975 | |||
976 | static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, | ||
977 | u32 graphics_preempt_mode, u32 compute_preempt_mode) | ||
978 | { | ||
979 | int err; | ||
980 | |||
981 | if (ch->g->ops.gr.set_preemption_mode) { | ||
982 | err = gk20a_busy(ch->g); | ||
983 | if (err) { | ||
984 | nvgpu_err(ch->g, "failed to power on, %d", err); | ||
985 | return err; | ||
986 | } | ||
987 | err = ch->g->ops.gr.set_preemption_mode(ch, | ||
988 | nvgpu_get_common_graphics_preempt_mode(graphics_preempt_mode), | ||
989 | nvgpu_get_common_compute_preempt_mode(compute_preempt_mode)); | ||
990 | gk20a_idle(ch->g); | ||
991 | } else { | ||
992 | err = -EINVAL; | ||
993 | } | ||
994 | |||
995 | return err; | ||
996 | } | ||
997 | |||
998 | static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, | ||
999 | struct nvgpu_get_user_syncpoint_args *args) | ||
1000 | { | ||
1001 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1002 | struct gk20a *g = ch->g; | ||
1003 | int err; | ||
1004 | |||
1005 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { | ||
1006 | nvgpu_err(g, "user syncpoints not supported"); | ||
1007 | return -EINVAL; | ||
1008 | } | ||
1009 | |||
1010 | if (!gk20a_platform_has_syncpoints(g)) { | ||
1011 | nvgpu_err(g, "syncpoints not supported"); | ||
1012 | return -EINVAL; | ||
1013 | } | ||
1014 | |||
1015 | if (g->aggressive_sync_destroy_thresh) { | ||
1016 | nvgpu_err(g, "sufficient syncpoints not available"); | ||
1017 | return -EINVAL; | ||
1018 | } | ||
1019 | |||
1020 | nvgpu_mutex_acquire(&ch->sync_lock); | ||
1021 | if (ch->user_sync) { | ||
1022 | nvgpu_mutex_release(&ch->sync_lock); | ||
1023 | } else { | ||
1024 | ch->user_sync = gk20a_channel_sync_create(ch, true); | ||
1025 | if (!ch->user_sync) { | ||
1026 | nvgpu_mutex_release(&ch->sync_lock); | ||
1027 | return -ENOMEM; | ||
1028 | } | ||
1029 | nvgpu_mutex_release(&ch->sync_lock); | ||
1030 | |||
1031 | if (g->ops.fifo.resetup_ramfc) { | ||
1032 | err = g->ops.fifo.resetup_ramfc(ch); | ||
1033 | if (err) | ||
1034 | return err; | ||
1035 | } | ||
1036 | } | ||
1037 | |||
1038 | args->syncpoint_id = ch->user_sync->syncpt_id(ch->user_sync); | ||
1039 | args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, | ||
1040 | args->syncpoint_id); | ||
1041 | if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) | ||
1042 | args->gpu_va = ch->user_sync->syncpt_address(ch->user_sync); | ||
1043 | else | ||
1044 | args->gpu_va = 0; | ||
1045 | |||
1046 | return 0; | ||
1047 | #else | ||
1048 | return -EINVAL; | ||
1049 | #endif | ||
1050 | } | ||
1051 | |||
1052 | long gk20a_channel_ioctl(struct file *filp, | ||
1053 | unsigned int cmd, unsigned long arg) | ||
1054 | { | ||
1055 | struct channel_priv *priv = filp->private_data; | ||
1056 | struct channel_gk20a *ch = priv->c; | ||
1057 | struct device *dev = dev_from_gk20a(ch->g); | ||
1058 | u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0}; | ||
1059 | int err = 0; | ||
1060 | struct gk20a *g = ch->g; | ||
1061 | |||
1062 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
1063 | |||
1064 | if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) || | ||
1065 | (_IOC_NR(cmd) == 0) || | ||
1066 | (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) || | ||
1067 | (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE)) | ||
1068 | return -EINVAL; | ||
1069 | |||
1070 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1071 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1072 | return -EFAULT; | ||
1073 | } | ||
1074 | |||
1075 | /* take a ref or return timeout if channel refs can't be taken */ | ||
1076 | ch = gk20a_channel_get(ch); | ||
1077 | if (!ch) | ||
1078 | return -ETIMEDOUT; | ||
1079 | |||
1080 | /* protect our sanity for threaded userspace - most of the channel is | ||
1081 | * not thread safe */ | ||
1082 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
1083 | |||
1084 | /* this ioctl call keeps a ref to the file which keeps a ref to the | ||
1085 | * channel */ | ||
1086 | |||
1087 | switch (cmd) { | ||
1088 | case NVGPU_IOCTL_CHANNEL_OPEN: | ||
1089 | err = gk20a_channel_open_ioctl(ch->g, | ||
1090 | (struct nvgpu_channel_open_args *)buf); | ||
1091 | break; | ||
1092 | case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD: | ||
1093 | break; | ||
1094 | case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: | ||
1095 | { | ||
1096 | struct nvgpu_alloc_obj_ctx_args *args = | ||
1097 | (struct nvgpu_alloc_obj_ctx_args *)buf; | ||
1098 | |||
1099 | err = gk20a_busy(ch->g); | ||
1100 | if (err) { | ||
1101 | dev_err(dev, | ||
1102 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1103 | __func__, cmd); | ||
1104 | break; | ||
1105 | } | ||
1106 | err = nvgpu_ioctl_channel_alloc_obj_ctx(ch, args->class_num, args->flags); | ||
1107 | gk20a_idle(ch->g); | ||
1108 | break; | ||
1109 | } | ||
1110 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: | ||
1111 | { | ||
1112 | struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args = | ||
1113 | (struct nvgpu_alloc_gpfifo_ex_args *)buf; | ||
1114 | struct nvgpu_gpfifo_args gpfifo_args; | ||
1115 | |||
1116 | nvgpu_get_gpfifo_ex_args(alloc_gpfifo_ex_args, &gpfifo_args); | ||
1117 | |||
1118 | err = gk20a_busy(ch->g); | ||
1119 | if (err) { | ||
1120 | dev_err(dev, | ||
1121 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1122 | __func__, cmd); | ||
1123 | break; | ||
1124 | } | ||
1125 | |||
1126 | if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { | ||
1127 | err = -EINVAL; | ||
1128 | gk20a_idle(ch->g); | ||
1129 | break; | ||
1130 | } | ||
1131 | err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); | ||
1132 | gk20a_idle(ch->g); | ||
1133 | break; | ||
1134 | } | ||
1135 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
1136 | { | ||
1137 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = | ||
1138 | (struct nvgpu_alloc_gpfifo_args *)buf; | ||
1139 | struct nvgpu_gpfifo_args gpfifo_args; | ||
1140 | |||
1141 | nvgpu_get_gpfifo_args(alloc_gpfifo_args, &gpfifo_args); | ||
1142 | |||
1143 | err = gk20a_busy(ch->g); | ||
1144 | if (err) { | ||
1145 | dev_err(dev, | ||
1146 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1147 | __func__, cmd); | ||
1148 | break; | ||
1149 | } | ||
1150 | |||
1151 | err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); | ||
1152 | gk20a_idle(ch->g); | ||
1153 | break; | ||
1154 | } | ||
1155 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: | ||
1156 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | ||
1157 | (struct nvgpu_submit_gpfifo_args *)buf); | ||
1158 | break; | ||
1159 | case NVGPU_IOCTL_CHANNEL_WAIT: | ||
1160 | err = gk20a_busy(ch->g); | ||
1161 | if (err) { | ||
1162 | dev_err(dev, | ||
1163 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1164 | __func__, cmd); | ||
1165 | break; | ||
1166 | } | ||
1167 | |||
1168 | /* waiting is thread-safe, not dropping this mutex could | ||
1169 | * deadlock in certain conditions */ | ||
1170 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
1171 | |||
1172 | err = gk20a_channel_wait(ch, | ||
1173 | (struct nvgpu_wait_args *)buf); | ||
1174 | |||
1175 | nvgpu_mutex_acquire(&ch->ioctl_lock); | ||
1176 | |||
1177 | gk20a_idle(ch->g); | ||
1178 | break; | ||
1179 | case NVGPU_IOCTL_CHANNEL_ZCULL_BIND: | ||
1180 | err = gk20a_busy(ch->g); | ||
1181 | if (err) { | ||
1182 | dev_err(dev, | ||
1183 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1184 | __func__, cmd); | ||
1185 | break; | ||
1186 | } | ||
1187 | err = gk20a_channel_zcull_bind(ch, | ||
1188 | (struct nvgpu_zcull_bind_args *)buf); | ||
1189 | gk20a_idle(ch->g); | ||
1190 | break; | ||
1191 | case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER: | ||
1192 | err = gk20a_busy(ch->g); | ||
1193 | if (err) { | ||
1194 | dev_err(dev, | ||
1195 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1196 | __func__, cmd); | ||
1197 | break; | ||
1198 | } | ||
1199 | err = gk20a_init_error_notifier(ch, | ||
1200 | (struct nvgpu_set_error_notifier *)buf); | ||
1201 | gk20a_idle(ch->g); | ||
1202 | break; | ||
1203 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
1204 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS: | ||
1205 | err = gk20a_busy(ch->g); | ||
1206 | if (err) { | ||
1207 | dev_err(dev, | ||
1208 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1209 | __func__, cmd); | ||
1210 | break; | ||
1211 | } | ||
1212 | err = gk20a_channel_cycle_stats(ch, | ||
1213 | (struct nvgpu_cycle_stats_args *)buf); | ||
1214 | gk20a_idle(ch->g); | ||
1215 | break; | ||
1216 | #endif | ||
1217 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT: | ||
1218 | { | ||
1219 | u32 timeout = | ||
1220 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
1221 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
1222 | timeout, ch->chid); | ||
1223 | ch->timeout_ms_max = timeout; | ||
1224 | gk20a_channel_trace_sched_param( | ||
1225 | trace_gk20a_channel_set_timeout, ch); | ||
1226 | break; | ||
1227 | } | ||
1228 | case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: | ||
1229 | { | ||
1230 | u32 timeout = | ||
1231 | (u32)((struct nvgpu_set_timeout_args *)buf)->timeout; | ||
1232 | bool timeout_debug_dump = !((u32) | ||
1233 | ((struct nvgpu_set_timeout_ex_args *)buf)->flags & | ||
1234 | (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); | ||
1235 | nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", | ||
1236 | timeout, ch->chid); | ||
1237 | ch->timeout_ms_max = timeout; | ||
1238 | ch->timeout_debug_dump = timeout_debug_dump; | ||
1239 | gk20a_channel_trace_sched_param( | ||
1240 | trace_gk20a_channel_set_timeout, ch); | ||
1241 | break; | ||
1242 | } | ||
1243 | case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: | ||
1244 | ((struct nvgpu_get_param_args *)buf)->value = | ||
1245 | ch->has_timedout; | ||
1246 | break; | ||
1247 | case NVGPU_IOCTL_CHANNEL_ENABLE: | ||
1248 | err = gk20a_busy(ch->g); | ||
1249 | if (err) { | ||
1250 | dev_err(dev, | ||
1251 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1252 | __func__, cmd); | ||
1253 | break; | ||
1254 | } | ||
1255 | if (ch->g->ops.fifo.enable_channel) | ||
1256 | ch->g->ops.fifo.enable_channel(ch); | ||
1257 | else | ||
1258 | err = -ENOSYS; | ||
1259 | gk20a_idle(ch->g); | ||
1260 | break; | ||
1261 | case NVGPU_IOCTL_CHANNEL_DISABLE: | ||
1262 | err = gk20a_busy(ch->g); | ||
1263 | if (err) { | ||
1264 | dev_err(dev, | ||
1265 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1266 | __func__, cmd); | ||
1267 | break; | ||
1268 | } | ||
1269 | if (ch->g->ops.fifo.disable_channel) | ||
1270 | ch->g->ops.fifo.disable_channel(ch); | ||
1271 | else | ||
1272 | err = -ENOSYS; | ||
1273 | gk20a_idle(ch->g); | ||
1274 | break; | ||
1275 | case NVGPU_IOCTL_CHANNEL_PREEMPT: | ||
1276 | err = gk20a_busy(ch->g); | ||
1277 | if (err) { | ||
1278 | dev_err(dev, | ||
1279 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1280 | __func__, cmd); | ||
1281 | break; | ||
1282 | } | ||
1283 | err = gk20a_fifo_preempt(ch->g, ch); | ||
1284 | gk20a_idle(ch->g); | ||
1285 | break; | ||
1286 | case NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST: | ||
1287 | if (!capable(CAP_SYS_NICE)) { | ||
1288 | err = -EPERM; | ||
1289 | break; | ||
1290 | } | ||
1291 | if (!ch->g->ops.fifo.reschedule_runlist) { | ||
1292 | err = -ENOSYS; | ||
1293 | break; | ||
1294 | } | ||
1295 | err = gk20a_busy(ch->g); | ||
1296 | if (err) { | ||
1297 | dev_err(dev, | ||
1298 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1299 | __func__, cmd); | ||
1300 | break; | ||
1301 | } | ||
1302 | err = ch->g->ops.fifo.reschedule_runlist(ch, | ||
1303 | NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT & | ||
1304 | ((struct nvgpu_reschedule_runlist_args *)buf)->flags); | ||
1305 | gk20a_idle(ch->g); | ||
1306 | break; | ||
1307 | case NVGPU_IOCTL_CHANNEL_FORCE_RESET: | ||
1308 | err = gk20a_busy(ch->g); | ||
1309 | if (err) { | ||
1310 | dev_err(dev, | ||
1311 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1312 | __func__, cmd); | ||
1313 | break; | ||
1314 | } | ||
1315 | err = ch->g->ops.fifo.force_reset_ch(ch, | ||
1316 | NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true); | ||
1317 | gk20a_idle(ch->g); | ||
1318 | break; | ||
1319 | #ifdef CONFIG_GK20A_CYCLE_STATS | ||
1320 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT: | ||
1321 | err = gk20a_busy(ch->g); | ||
1322 | if (err) { | ||
1323 | dev_err(dev, | ||
1324 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1325 | __func__, cmd); | ||
1326 | break; | ||
1327 | } | ||
1328 | err = gk20a_channel_cycle_stats_snapshot(ch, | ||
1329 | (struct nvgpu_cycle_stats_snapshot_args *)buf); | ||
1330 | gk20a_idle(ch->g); | ||
1331 | break; | ||
1332 | #endif | ||
1333 | case NVGPU_IOCTL_CHANNEL_WDT: | ||
1334 | err = gk20a_channel_set_wdt_status(ch, | ||
1335 | (struct nvgpu_channel_wdt_args *)buf); | ||
1336 | break; | ||
1337 | case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE: | ||
1338 | err = nvgpu_ioctl_channel_set_preemption_mode(ch, | ||
1339 | ((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode, | ||
1340 | ((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode); | ||
1341 | break; | ||
1342 | case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: | ||
1343 | if (ch->g->ops.gr.set_boosted_ctx) { | ||
1344 | bool boost = | ||
1345 | ((struct nvgpu_boosted_ctx_args *)buf)->boost; | ||
1346 | |||
1347 | err = gk20a_busy(ch->g); | ||
1348 | if (err) { | ||
1349 | dev_err(dev, | ||
1350 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1351 | __func__, cmd); | ||
1352 | break; | ||
1353 | } | ||
1354 | err = ch->g->ops.gr.set_boosted_ctx(ch, boost); | ||
1355 | gk20a_idle(ch->g); | ||
1356 | } else { | ||
1357 | err = -EINVAL; | ||
1358 | } | ||
1359 | break; | ||
1360 | case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: | ||
1361 | err = gk20a_busy(ch->g); | ||
1362 | if (err) { | ||
1363 | dev_err(dev, | ||
1364 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
1365 | __func__, cmd); | ||
1366 | break; | ||
1367 | } | ||
1368 | err = nvgpu_ioctl_channel_get_user_syncpoint(ch, | ||
1369 | (struct nvgpu_get_user_syncpoint_args *)buf); | ||
1370 | gk20a_idle(ch->g); | ||
1371 | break; | ||
1372 | default: | ||
1373 | dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); | ||
1374 | err = -ENOTTY; | ||
1375 | break; | ||
1376 | } | ||
1377 | |||
1378 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
1379 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
1380 | |||
1381 | nvgpu_mutex_release(&ch->ioctl_lock); | ||
1382 | |||
1383 | gk20a_channel_put(ch); | ||
1384 | |||
1385 | nvgpu_log_fn(g, "end"); | ||
1386 | |||
1387 | return err; | ||
1388 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.h b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h new file mode 100644 index 00000000..48cff1ea --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_IOCTL_CHANNEL_H__ | ||
14 | #define __NVGPU_IOCTL_CHANNEL_H__ | ||
15 | |||
16 | #include <linux/fs.h> | ||
17 | |||
18 | #include "gk20a/css_gr_gk20a.h" | ||
19 | |||
20 | struct inode; | ||
21 | struct file; | ||
22 | struct gk20a; | ||
23 | struct nvgpu_channel_open_args; | ||
24 | |||
25 | struct gk20a_cs_snapshot_client_linux { | ||
26 | struct gk20a_cs_snapshot_client cs_client; | ||
27 | |||
28 | u32 dmabuf_fd; | ||
29 | struct dma_buf *dma_handler; | ||
30 | }; | ||
31 | |||
32 | int gk20a_channel_open(struct inode *inode, struct file *filp); | ||
33 | int gk20a_channel_release(struct inode *inode, struct file *filp); | ||
34 | long gk20a_channel_ioctl(struct file *filp, | ||
35 | unsigned int cmd, unsigned long arg); | ||
36 | int gk20a_channel_open_ioctl(struct gk20a *g, | ||
37 | struct nvgpu_channel_open_args *args); | ||
38 | |||
39 | int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch); | ||
40 | void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch); | ||
41 | |||
42 | extern const struct file_operations gk20a_channel_ops; | ||
43 | |||
44 | u32 nvgpu_get_common_runlist_level(u32 level); | ||
45 | |||
46 | u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags); | ||
47 | u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags); | ||
48 | u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode); | ||
49 | u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode); | ||
50 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c new file mode 100644 index 00000000..501b5f93 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c | |||
@@ -0,0 +1,562 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/cdev.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/anon_inodes.h> | ||
20 | #include <linux/uaccess.h> | ||
21 | #include <linux/poll.h> | ||
22 | #ifdef CONFIG_DEBUG_FS | ||
23 | #include <linux/debugfs.h> | ||
24 | #endif | ||
25 | #include <uapi/linux/nvgpu.h> | ||
26 | |||
27 | #include <nvgpu/bitops.h> | ||
28 | #include <nvgpu/lock.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/kref.h> | ||
33 | #include <nvgpu/log.h> | ||
34 | #include <nvgpu/barrier.h> | ||
35 | #include <nvgpu/cond.h> | ||
36 | #include <nvgpu/list.h> | ||
37 | #include <nvgpu/clk_arb.h> | ||
38 | |||
39 | #include "gk20a/gk20a.h" | ||
40 | #include "clk/clk.h" | ||
41 | #include "pstate/pstate.h" | ||
42 | #include "lpwr/lpwr.h" | ||
43 | #include "volt/volt.h" | ||
44 | |||
45 | #ifdef CONFIG_DEBUG_FS | ||
46 | #include "os_linux.h" | ||
47 | #endif | ||
48 | |||
49 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | ||
50 | struct file *filp) | ||
51 | { | ||
52 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
53 | struct nvgpu_clk_session *session = dev->session; | ||
54 | |||
55 | |||
56 | clk_arb_dbg(session->g, " "); | ||
57 | |||
58 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
59 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask) | ||
64 | { | ||
65 | unsigned int poll_mask = 0; | ||
66 | |||
67 | if (nvgpu_poll_mask & NVGPU_POLLIN) | ||
68 | poll_mask |= POLLIN; | ||
69 | if (nvgpu_poll_mask & NVGPU_POLLPRI) | ||
70 | poll_mask |= POLLPRI; | ||
71 | if (nvgpu_poll_mask & NVGPU_POLLOUT) | ||
72 | poll_mask |= POLLOUT; | ||
73 | if (nvgpu_poll_mask & NVGPU_POLLRDNORM) | ||
74 | poll_mask |= POLLRDNORM; | ||
75 | if (nvgpu_poll_mask & NVGPU_POLLHUP) | ||
76 | poll_mask |= POLLHUP; | ||
77 | |||
78 | return poll_mask; | ||
79 | } | ||
80 | |||
81 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | ||
82 | { | ||
83 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
84 | |||
85 | clk_arb_dbg(dev->session->g, " "); | ||
86 | |||
87 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
88 | return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0)); | ||
89 | } | ||
90 | |||
91 | void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) | ||
92 | { | ||
93 | nvgpu_cond_broadcast_interruptible(&dev->readout_wq); | ||
94 | } | ||
95 | |||
96 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | ||
97 | struct file *filp) | ||
98 | { | ||
99 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
100 | struct nvgpu_clk_session *session = dev->session; | ||
101 | struct nvgpu_clk_arb *arb; | ||
102 | |||
103 | arb = session->g->clk_arb; | ||
104 | |||
105 | clk_arb_dbg(session->g, " "); | ||
106 | |||
107 | if (arb) { | ||
108 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
109 | nvgpu_list_del(&dev->link); | ||
110 | nvgpu_spinlock_release(&arb->users_lock); | ||
111 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | ||
112 | } | ||
113 | |||
114 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
115 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event) | ||
121 | { | ||
122 | u32 nvgpu_gpu_event; | ||
123 | |||
124 | switch (nvgpu_event) { | ||
125 | case NVGPU_EVENT_VF_UPDATE: | ||
126 | nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE; | ||
127 | break; | ||
128 | case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE: | ||
129 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE; | ||
130 | break; | ||
131 | case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE: | ||
132 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE; | ||
133 | break; | ||
134 | case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED: | ||
135 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED; | ||
136 | break; | ||
137 | case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED: | ||
138 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED; | ||
139 | break; | ||
140 | case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD: | ||
141 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD; | ||
142 | break; | ||
143 | case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD: | ||
144 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD; | ||
145 | break; | ||
146 | case NVGPU_EVENT_ALARM_GPU_LOST: | ||
147 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST; | ||
148 | break; | ||
149 | default: | ||
150 | /* Control shouldn't come here */ | ||
151 | nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1; | ||
152 | break; | ||
153 | } | ||
154 | return nvgpu_gpu_event; | ||
155 | } | ||
156 | |||
157 | static inline u32 __pending_event(struct nvgpu_clk_dev *dev, | ||
158 | struct nvgpu_gpu_event_info *info) { | ||
159 | |||
160 | u32 tail, head; | ||
161 | u32 events = 0; | ||
162 | struct nvgpu_clk_notification *p_notif; | ||
163 | |||
164 | tail = nvgpu_atomic_read(&dev->queue.tail); | ||
165 | head = nvgpu_atomic_read(&dev->queue.head); | ||
166 | |||
167 | head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; | ||
168 | |||
169 | if (_WRAPGTEQ(tail, head) && info) { | ||
170 | head++; | ||
171 | p_notif = &dev->queue.notifications[head % dev->queue.size]; | ||
172 | events |= nvgpu_convert_gpu_event(p_notif->notification); | ||
173 | info->event_id = ffs(events) - 1; | ||
174 | info->timestamp = p_notif->timestamp; | ||
175 | nvgpu_atomic_set(&dev->queue.head, head); | ||
176 | } | ||
177 | |||
178 | return events; | ||
179 | } | ||
180 | |||
181 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | ||
182 | size_t size, loff_t *off) | ||
183 | { | ||
184 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
185 | struct nvgpu_gpu_event_info info; | ||
186 | ssize_t err; | ||
187 | |||
188 | clk_arb_dbg(dev->session->g, | ||
189 | "filp=%p, buf=%p, size=%zu", filp, buf, size); | ||
190 | |||
191 | if ((size - *off) < sizeof(info)) | ||
192 | return 0; | ||
193 | |||
194 | memset(&info, 0, sizeof(info)); | ||
195 | /* Get the oldest event from the queue */ | ||
196 | while (!__pending_event(dev, &info)) { | ||
197 | if (filp->f_flags & O_NONBLOCK) | ||
198 | return -EAGAIN; | ||
199 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
200 | __pending_event(dev, &info), 0); | ||
201 | if (err) | ||
202 | return err; | ||
203 | if (info.timestamp) | ||
204 | break; | ||
205 | } | ||
206 | |||
207 | if (copy_to_user(buf + *off, &info, sizeof(info))) | ||
208 | return -EFAULT; | ||
209 | |||
210 | return sizeof(info); | ||
211 | } | ||
212 | |||
213 | static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, | ||
214 | struct nvgpu_gpu_set_event_filter_args *args) | ||
215 | { | ||
216 | struct gk20a *g = dev->session->g; | ||
217 | u32 mask; | ||
218 | |||
219 | nvgpu_log(g, gpu_dbg_fn, " "); | ||
220 | |||
221 | if (args->flags) | ||
222 | return -EINVAL; | ||
223 | |||
224 | if (args->size != 1) | ||
225 | return -EINVAL; | ||
226 | |||
227 | if (copy_from_user(&mask, (void __user *) args->buffer, | ||
228 | args->size * sizeof(u32))) | ||
229 | return -EFAULT; | ||
230 | |||
231 | /* update alarm mask */ | ||
232 | nvgpu_atomic_set(&dev->enabled_mask, mask); | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
238 | unsigned long arg) | ||
239 | { | ||
240 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
241 | struct gk20a *g = dev->session->g; | ||
242 | u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; | ||
243 | int err = 0; | ||
244 | |||
245 | nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); | ||
246 | |||
247 | if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
248 | || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) | ||
249 | return -EINVAL; | ||
250 | |||
251 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); | ||
252 | |||
253 | memset(buf, 0, sizeof(buf)); | ||
254 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
255 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
256 | return -EFAULT; | ||
257 | } | ||
258 | |||
259 | switch (cmd) { | ||
260 | case NVGPU_EVENT_IOCTL_SET_FILTER: | ||
261 | err = nvgpu_clk_arb_set_event_filter(dev, | ||
262 | (struct nvgpu_gpu_set_event_filter_args *)buf); | ||
263 | break; | ||
264 | default: | ||
265 | nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); | ||
266 | err = -ENOTTY; | ||
267 | } | ||
268 | |||
269 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
270 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
271 | |||
272 | return err; | ||
273 | } | ||
274 | |||
275 | static const struct file_operations completion_dev_ops = { | ||
276 | .owner = THIS_MODULE, | ||
277 | .release = nvgpu_clk_arb_release_completion_dev, | ||
278 | .poll = nvgpu_clk_arb_poll_dev, | ||
279 | }; | ||
280 | |||
281 | static const struct file_operations event_dev_ops = { | ||
282 | .owner = THIS_MODULE, | ||
283 | .release = nvgpu_clk_arb_release_event_dev, | ||
284 | .poll = nvgpu_clk_arb_poll_dev, | ||
285 | .read = nvgpu_clk_arb_read_event_dev, | ||
286 | #ifdef CONFIG_COMPAT | ||
287 | .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
288 | #endif | ||
289 | .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
290 | }; | ||
291 | |||
292 | static int nvgpu_clk_arb_install_fd(struct gk20a *g, | ||
293 | struct nvgpu_clk_session *session, | ||
294 | const struct file_operations *fops, | ||
295 | struct nvgpu_clk_dev **_dev) | ||
296 | { | ||
297 | struct file *file; | ||
298 | int fd; | ||
299 | int err; | ||
300 | int status; | ||
301 | char name[64]; | ||
302 | struct nvgpu_clk_dev *dev; | ||
303 | |||
304 | clk_arb_dbg(g, " "); | ||
305 | |||
306 | dev = nvgpu_kzalloc(g, sizeof(*dev)); | ||
307 | if (!dev) | ||
308 | return -ENOMEM; | ||
309 | |||
310 | status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, | ||
311 | DEFAULT_EVENT_NUMBER); | ||
312 | if (status < 0) { | ||
313 | err = status; | ||
314 | goto fail; | ||
315 | } | ||
316 | |||
317 | fd = get_unused_fd_flags(O_RDWR); | ||
318 | if (fd < 0) { | ||
319 | err = fd; | ||
320 | goto fail; | ||
321 | } | ||
322 | |||
323 | snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); | ||
324 | file = anon_inode_getfile(name, fops, dev, O_RDWR); | ||
325 | if (IS_ERR(file)) { | ||
326 | err = PTR_ERR(file); | ||
327 | goto fail_fd; | ||
328 | } | ||
329 | |||
330 | fd_install(fd, file); | ||
331 | |||
332 | nvgpu_cond_init(&dev->readout_wq); | ||
333 | |||
334 | nvgpu_atomic_set(&dev->poll_mask, 0); | ||
335 | |||
336 | dev->session = session; | ||
337 | nvgpu_ref_init(&dev->refcount); | ||
338 | |||
339 | nvgpu_ref_get(&session->refcount); | ||
340 | |||
341 | *_dev = dev; | ||
342 | |||
343 | return fd; | ||
344 | |||
345 | fail_fd: | ||
346 | put_unused_fd(fd); | ||
347 | fail: | ||
348 | nvgpu_kfree(g, dev); | ||
349 | |||
350 | return err; | ||
351 | } | ||
352 | |||
353 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | ||
354 | struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) | ||
355 | { | ||
356 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
357 | struct nvgpu_clk_dev *dev; | ||
358 | int fd; | ||
359 | |||
360 | clk_arb_dbg(g, " "); | ||
361 | |||
362 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | ||
363 | if (fd < 0) | ||
364 | return fd; | ||
365 | |||
366 | /* TODO: alarm mask needs to be set to default value to prevent | ||
367 | * failures of legacy tests. This will be removed when sanity is | ||
368 | * updated | ||
369 | */ | ||
370 | if (alarm_mask) | ||
371 | nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); | ||
372 | else | ||
373 | nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); | ||
374 | |||
375 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | ||
376 | |||
377 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
378 | nvgpu_list_add_tail(&dev->link, &arb->users); | ||
379 | nvgpu_spinlock_release(&arb->users_lock); | ||
380 | |||
381 | *event_fd = fd; | ||
382 | |||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
387 | struct nvgpu_clk_session *session, int *request_fd) | ||
388 | { | ||
389 | struct nvgpu_clk_dev *dev; | ||
390 | int fd; | ||
391 | |||
392 | clk_arb_dbg(g, " "); | ||
393 | |||
394 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
395 | if (fd < 0) | ||
396 | return fd; | ||
397 | |||
398 | *request_fd = fd; | ||
399 | |||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | ||
404 | struct nvgpu_clk_session *session, int request_fd) | ||
405 | { | ||
406 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
407 | struct nvgpu_clk_dev *dev; | ||
408 | struct fd fd; | ||
409 | int err = 0; | ||
410 | |||
411 | clk_arb_dbg(g, " "); | ||
412 | |||
413 | fd = fdget(request_fd); | ||
414 | if (!fd.file) | ||
415 | return -EINVAL; | ||
416 | |||
417 | if (fd.file->f_op != &completion_dev_ops) { | ||
418 | err = -EINVAL; | ||
419 | goto fdput_fd; | ||
420 | } | ||
421 | |||
422 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | ||
423 | |||
424 | if (!dev || dev->session != session) { | ||
425 | err = -EINVAL; | ||
426 | goto fdput_fd; | ||
427 | } | ||
428 | nvgpu_ref_get(&dev->refcount); | ||
429 | nvgpu_spinlock_acquire(&session->session_lock); | ||
430 | nvgpu_list_add(&dev->node, &session->targets); | ||
431 | nvgpu_spinlock_release(&session->session_lock); | ||
432 | nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); | ||
433 | |||
434 | fdput_fd: | ||
435 | fdput(fd); | ||
436 | return err; | ||
437 | } | ||
438 | |||
439 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, | ||
440 | int request_fd, u32 api_domain, u16 target_mhz) | ||
441 | { | ||
442 | struct nvgpu_clk_dev *dev; | ||
443 | struct fd fd; | ||
444 | int err = 0; | ||
445 | |||
446 | clk_arb_dbg(session->g, | ||
447 | "domain=0x%08x target_mhz=%u", api_domain, target_mhz); | ||
448 | |||
449 | fd = fdget(request_fd); | ||
450 | if (!fd.file) | ||
451 | return -EINVAL; | ||
452 | |||
453 | if (fd.file->f_op != &completion_dev_ops) { | ||
454 | err = -EINVAL; | ||
455 | goto fdput_fd; | ||
456 | } | ||
457 | |||
458 | dev = fd.file->private_data; | ||
459 | if (!dev || dev->session != session) { | ||
460 | err = -EINVAL; | ||
461 | goto fdput_fd; | ||
462 | } | ||
463 | |||
464 | switch (api_domain) { | ||
465 | case NVGPU_CLK_DOMAIN_MCLK: | ||
466 | dev->mclk_target_mhz = target_mhz; | ||
467 | break; | ||
468 | |||
469 | case NVGPU_CLK_DOMAIN_GPCCLK: | ||
470 | dev->gpc2clk_target_mhz = target_mhz * 2ULL; | ||
471 | break; | ||
472 | |||
473 | default: | ||
474 | err = -EINVAL; | ||
475 | } | ||
476 | |||
477 | fdput_fd: | ||
478 | fdput(fd); | ||
479 | return err; | ||
480 | } | ||
481 | |||
482 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | ||
483 | { | ||
484 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | ||
485 | u32 api_domains = 0; | ||
486 | |||
487 | if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) | ||
488 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); | ||
489 | |||
490 | if (clk_domains & CTRL_CLK_DOMAIN_MCLK) | ||
491 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); | ||
492 | |||
493 | return api_domains; | ||
494 | } | ||
495 | |||
496 | #ifdef CONFIG_DEBUG_FS | ||
497 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
498 | { | ||
499 | struct gk20a *g = s->private; | ||
500 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
501 | struct nvgpu_clk_arb_debug *debug; | ||
502 | |||
503 | u64 num; | ||
504 | s64 tmp, avg, std, max, min; | ||
505 | |||
506 | debug = NV_ACCESS_ONCE(arb->debug); | ||
507 | /* Make copy of structure and ensure no reordering */ | ||
508 | nvgpu_smp_rmb(); | ||
509 | if (!debug) | ||
510 | return -EINVAL; | ||
511 | |||
512 | std = debug->switch_std; | ||
513 | avg = debug->switch_avg; | ||
514 | max = debug->switch_max; | ||
515 | min = debug->switch_min; | ||
516 | num = debug->switch_num; | ||
517 | |||
518 | tmp = std; | ||
519 | do_div(tmp, num); | ||
520 | seq_printf(s, "Number of transitions: %lld\n", | ||
521 | num); | ||
522 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
523 | max, min); | ||
524 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
525 | avg, int_sqrt(tmp)); | ||
526 | |||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
531 | { | ||
532 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
533 | } | ||
534 | |||
535 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
536 | .open = nvgpu_clk_arb_stats_open, | ||
537 | .read = seq_read, | ||
538 | .llseek = seq_lseek, | ||
539 | .release = single_release, | ||
540 | }; | ||
541 | |||
542 | |||
543 | int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
544 | { | ||
545 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
546 | struct dentry *gpu_root = l->debugfs; | ||
547 | struct dentry *d; | ||
548 | |||
549 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
550 | |||
551 | d = debugfs_create_file( | ||
552 | "arb_stats", | ||
553 | S_IRUGO, | ||
554 | gpu_root, | ||
555 | g, | ||
556 | &nvgpu_clk_arb_stats_fops); | ||
557 | if (!d) | ||
558 | return -ENOMEM; | ||
559 | |||
560 | return 0; | ||
561 | } | ||
562 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c new file mode 100644 index 00000000..73a8131d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | |||
@@ -0,0 +1,1962 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/cdev.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/anon_inodes.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <nvgpu/bitops.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/bug.h> | ||
27 | #include <nvgpu/ptimer.h> | ||
28 | #include <nvgpu/vidmem.h> | ||
29 | #include <nvgpu/log.h> | ||
30 | #include <nvgpu/enabled.h> | ||
31 | #include <nvgpu/sizes.h> | ||
32 | |||
33 | #include <nvgpu/linux/vidmem.h> | ||
34 | |||
35 | #include "ioctl_ctrl.h" | ||
36 | #include "ioctl_dbg.h" | ||
37 | #include "ioctl_as.h" | ||
38 | #include "ioctl_tsg.h" | ||
39 | #include "ioctl_channel.h" | ||
40 | #include "gk20a/gk20a.h" | ||
41 | #include "gk20a/fence_gk20a.h" | ||
42 | |||
43 | #include "platform_gk20a.h" | ||
44 | #include "os_linux.h" | ||
45 | #include "dmabuf.h" | ||
46 | #include "channel.h" | ||
47 | |||
48 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \ | ||
49 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) | ||
50 | #define MHZ_TO_HZ(a) ((u64)a * MHZ) | ||
51 | |||
52 | struct gk20a_ctrl_priv { | ||
53 | struct device *dev; | ||
54 | struct gk20a *g; | ||
55 | struct nvgpu_clk_session *clk_session; | ||
56 | }; | ||
57 | |||
58 | static u32 gk20a_as_translate_as_alloc_flags(struct gk20a *g, u32 flags) | ||
59 | { | ||
60 | u32 core_flags = 0; | ||
61 | |||
62 | if (flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) | ||
63 | core_flags |= NVGPU_AS_ALLOC_USERSPACE_MANAGED; | ||
64 | |||
65 | return core_flags; | ||
66 | } | ||
67 | |||
68 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | ||
69 | { | ||
70 | struct nvgpu_os_linux *l; | ||
71 | struct gk20a *g; | ||
72 | struct gk20a_ctrl_priv *priv; | ||
73 | int err = 0; | ||
74 | |||
75 | l = container_of(inode->i_cdev, | ||
76 | struct nvgpu_os_linux, ctrl.cdev); | ||
77 | g = gk20a_get(&l->g); | ||
78 | if (!g) | ||
79 | return -ENODEV; | ||
80 | |||
81 | nvgpu_log_fn(g, " "); | ||
82 | |||
83 | priv = nvgpu_kzalloc(g, sizeof(struct gk20a_ctrl_priv)); | ||
84 | if (!priv) { | ||
85 | err = -ENOMEM; | ||
86 | goto free_ref; | ||
87 | } | ||
88 | filp->private_data = priv; | ||
89 | priv->dev = dev_from_gk20a(g); | ||
90 | /* | ||
91 | * We dont close the arbiter fd's after driver teardown to support | ||
92 | * GPU_LOST events, so we store g here, instead of dereferencing the | ||
93 | * dev structure on teardown | ||
94 | */ | ||
95 | priv->g = g; | ||
96 | |||
97 | if (!g->sw_ready) { | ||
98 | err = gk20a_busy(g); | ||
99 | if (err) | ||
100 | goto free_ref; | ||
101 | gk20a_idle(g); | ||
102 | } | ||
103 | |||
104 | err = nvgpu_clk_arb_init_session(g, &priv->clk_session); | ||
105 | free_ref: | ||
106 | if (err) | ||
107 | gk20a_put(g); | ||
108 | return err; | ||
109 | } | ||
110 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) | ||
111 | { | ||
112 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
113 | struct gk20a *g = priv->g; | ||
114 | |||
115 | nvgpu_log_fn(g, " "); | ||
116 | |||
117 | if (priv->clk_session) | ||
118 | nvgpu_clk_arb_release_session(g, priv->clk_session); | ||
119 | |||
120 | gk20a_put(g); | ||
121 | nvgpu_kfree(g, priv); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | struct nvgpu_flags_mapping { | ||
127 | u64 ioctl_flag; | ||
128 | int enabled_flag; | ||
129 | }; | ||
130 | |||
131 | static struct nvgpu_flags_mapping flags_mapping[] = { | ||
132 | {NVGPU_GPU_FLAGS_HAS_SYNCPOINTS, | ||
133 | NVGPU_HAS_SYNCPOINTS}, | ||
134 | {NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS, | ||
135 | NVGPU_SUPPORT_PARTIAL_MAPPINGS}, | ||
136 | {NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS, | ||
137 | NVGPU_SUPPORT_SPARSE_ALLOCS}, | ||
138 | {NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS, | ||
139 | NVGPU_SUPPORT_SYNC_FENCE_FDS}, | ||
140 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS, | ||
141 | NVGPU_SUPPORT_CYCLE_STATS}, | ||
142 | {NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT, | ||
143 | NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT}, | ||
144 | {NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS, | ||
145 | NVGPU_SUPPORT_USERSPACE_MANAGED_AS}, | ||
146 | {NVGPU_GPU_FLAGS_SUPPORT_TSG, | ||
147 | NVGPU_SUPPORT_TSG}, | ||
148 | {NVGPU_GPU_FLAGS_SUPPORT_CLOCK_CONTROLS, | ||
149 | NVGPU_SUPPORT_CLOCK_CONTROLS}, | ||
150 | {NVGPU_GPU_FLAGS_SUPPORT_GET_VOLTAGE, | ||
151 | NVGPU_SUPPORT_GET_VOLTAGE}, | ||
152 | {NVGPU_GPU_FLAGS_SUPPORT_GET_CURRENT, | ||
153 | NVGPU_SUPPORT_GET_CURRENT}, | ||
154 | {NVGPU_GPU_FLAGS_SUPPORT_GET_POWER, | ||
155 | NVGPU_SUPPORT_GET_POWER}, | ||
156 | {NVGPU_GPU_FLAGS_SUPPORT_GET_TEMPERATURE, | ||
157 | NVGPU_SUPPORT_GET_TEMPERATURE}, | ||
158 | {NVGPU_GPU_FLAGS_SUPPORT_SET_THERM_ALERT_LIMIT, | ||
159 | NVGPU_SUPPORT_SET_THERM_ALERT_LIMIT}, | ||
160 | {NVGPU_GPU_FLAGS_SUPPORT_DEVICE_EVENTS, | ||
161 | NVGPU_SUPPORT_DEVICE_EVENTS}, | ||
162 | {NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE, | ||
163 | NVGPU_SUPPORT_FECS_CTXSW_TRACE}, | ||
164 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING, | ||
165 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, | ||
166 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | ||
167 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, | ||
168 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, | ||
169 | NVGPU_SUPPORT_DETERMINISTIC_OPTS}, | ||
170 | {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, | ||
171 | NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, | ||
172 | {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, | ||
173 | NVGPU_SUPPORT_USER_SYNCPOINT}, | ||
174 | {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, | ||
175 | NVGPU_SUPPORT_IO_COHERENCE}, | ||
176 | {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, | ||
177 | NVGPU_SUPPORT_RESCHEDULE_RUNLIST}, | ||
178 | {NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL, | ||
179 | NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL}, | ||
180 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF, | ||
181 | NVGPU_ECC_ENABLED_SM_LRF}, | ||
182 | {NVGPU_GPU_FLAGS_ECC_ENABLED_SM_SHM, | ||
183 | NVGPU_ECC_ENABLED_SM_SHM}, | ||
184 | {NVGPU_GPU_FLAGS_ECC_ENABLED_TEX, | ||
185 | NVGPU_ECC_ENABLED_TEX}, | ||
186 | {NVGPU_GPU_FLAGS_ECC_ENABLED_LTC, | ||
187 | NVGPU_ECC_ENABLED_LTC}, | ||
188 | {NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS, | ||
189 | NVGPU_SUPPORT_TSG_SUBCONTEXTS}, | ||
190 | {NVGPU_GPU_FLAGS_SUPPORT_SCG, | ||
191 | NVGPU_SUPPORT_SCG}, | ||
192 | {NVGPU_GPU_FLAGS_SUPPORT_VPR, | ||
193 | NVGPU_SUPPORT_VPR}, | ||
194 | }; | ||
195 | |||
196 | static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) | ||
197 | { | ||
198 | unsigned int i; | ||
199 | u64 ioctl_flags = 0; | ||
200 | |||
201 | for (i = 0; i < sizeof(flags_mapping)/sizeof(*flags_mapping); i++) { | ||
202 | if (nvgpu_is_enabled(g, flags_mapping[i].enabled_flag)) | ||
203 | ioctl_flags |= flags_mapping[i].ioctl_flag; | ||
204 | } | ||
205 | |||
206 | return ioctl_flags; | ||
207 | } | ||
208 | |||
209 | static void nvgpu_set_preemption_mode_flags(struct gk20a *g, | ||
210 | struct nvgpu_gpu_characteristics *gpu) | ||
211 | { | ||
212 | struct nvgpu_preemption_modes_rec preemption_mode_rec; | ||
213 | |||
214 | g->ops.gr.get_preemption_mode_flags(g, &preemption_mode_rec); | ||
215 | |||
216 | gpu->graphics_preemption_mode_flags = | ||
217 | nvgpu_get_ioctl_graphics_preempt_mode_flags( | ||
218 | preemption_mode_rec.graphics_preemption_mode_flags); | ||
219 | gpu->compute_preemption_mode_flags = | ||
220 | nvgpu_get_ioctl_compute_preempt_mode_flags( | ||
221 | preemption_mode_rec.compute_preemption_mode_flags); | ||
222 | |||
223 | gpu->default_graphics_preempt_mode = | ||
224 | nvgpu_get_ioctl_graphics_preempt_mode( | ||
225 | preemption_mode_rec.default_graphics_preempt_mode); | ||
226 | gpu->default_compute_preempt_mode = | ||
227 | nvgpu_get_ioctl_compute_preempt_mode( | ||
228 | preemption_mode_rec.default_compute_preempt_mode); | ||
229 | } | ||
230 | |||
231 | static long | ||
232 | gk20a_ctrl_ioctl_gpu_characteristics( | ||
233 | struct gk20a *g, | ||
234 | struct nvgpu_gpu_get_characteristics *request) | ||
235 | { | ||
236 | struct nvgpu_gpu_characteristics gpu; | ||
237 | long err = 0; | ||
238 | |||
239 | if (gk20a_busy(g)) { | ||
240 | nvgpu_err(g, "failed to power on gpu"); | ||
241 | return -EINVAL; | ||
242 | } | ||
243 | |||
244 | memset(&gpu, 0, sizeof(gpu)); | ||
245 | |||
246 | gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); | ||
247 | gpu.on_board_video_memory_size = 0; /* integrated GPU */ | ||
248 | |||
249 | gpu.num_gpc = g->gr.gpc_count; | ||
250 | gpu.max_gpc_count = g->gr.max_gpc_count; | ||
251 | |||
252 | gpu.num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; | ||
253 | |||
254 | gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ | ||
255 | |||
256 | gpu.compression_page_size = g->ops.fb.compression_page_size(g); | ||
257 | |||
258 | gpu.gpc_mask = (1 << g->gr.gpc_count)-1; | ||
259 | |||
260 | gpu.flags = nvgpu_ctrl_ioctl_gpu_characteristics_flags(g); | ||
261 | |||
262 | gpu.arch = g->params.gpu_arch; | ||
263 | gpu.impl = g->params.gpu_impl; | ||
264 | gpu.rev = g->params.gpu_rev; | ||
265 | gpu.reg_ops_limit = NVGPU_IOCTL_DBG_REG_OPS_LIMIT; | ||
266 | gpu.map_buffer_batch_limit = nvgpu_is_enabled(g, NVGPU_SUPPORT_MAP_BUFFER_BATCH) ? | ||
267 | NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT : 0; | ||
268 | gpu.twod_class = g->ops.get_litter_value(g, GPU_LIT_TWOD_CLASS); | ||
269 | gpu.threed_class = g->ops.get_litter_value(g, GPU_LIT_THREED_CLASS); | ||
270 | gpu.compute_class = g->ops.get_litter_value(g, GPU_LIT_COMPUTE_CLASS); | ||
271 | gpu.gpfifo_class = g->ops.get_litter_value(g, GPU_LIT_GPFIFO_CLASS); | ||
272 | gpu.inline_to_memory_class = | ||
273 | g->ops.get_litter_value(g, GPU_LIT_I2M_CLASS); | ||
274 | gpu.dma_copy_class = | ||
275 | g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
276 | |||
277 | gpu.vbios_version = g->bios.vbios_version; | ||
278 | gpu.vbios_oem_version = g->bios.vbios_oem_version; | ||
279 | |||
280 | gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g); | ||
281 | gpu.pde_coverage_bit_count = | ||
282 | g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0]; | ||
283 | gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g); | ||
284 | |||
285 | gpu.sm_arch_sm_version = g->params.sm_arch_sm_version; | ||
286 | gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; | ||
287 | gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; | ||
288 | |||
289 | gpu.max_css_buffer_size = g->gr.max_css_buffer_size; | ||
290 | |||
291 | gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; | ||
292 | gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; | ||
293 | gpu.dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; | ||
294 | gpu.ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; | ||
295 | gpu.as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; | ||
296 | gpu.event_ioctl_nr_last = NVGPU_EVENT_IOCTL_LAST; | ||
297 | gpu.gpu_va_bit_count = 40; | ||
298 | |||
299 | strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname)); | ||
300 | gpu.max_fbps_count = g->ops.gr.get_max_fbps_count(g); | ||
301 | gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); | ||
302 | gpu.max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); | ||
303 | gpu.max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); | ||
304 | gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; | ||
305 | gpu.gr_gobs_per_comptagline_per_slice = | ||
306 | g->gr.gobs_per_comptagline_per_slice; | ||
307 | gpu.num_ltc = g->ltc_count; | ||
308 | gpu.lts_per_ltc = g->gr.slices_per_ltc; | ||
309 | gpu.cbc_cache_line_size = g->gr.cacheline_size; | ||
310 | gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; | ||
311 | |||
312 | if (g->ops.clk.get_maxrate) | ||
313 | gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
314 | |||
315 | gpu.local_video_memory_size = g->mm.vidmem.size; | ||
316 | |||
317 | gpu.pci_vendor_id = g->pci_vendor_id; | ||
318 | gpu.pci_device_id = g->pci_device_id; | ||
319 | gpu.pci_subsystem_vendor_id = g->pci_subsystem_vendor_id; | ||
320 | gpu.pci_subsystem_device_id = g->pci_subsystem_device_id; | ||
321 | gpu.pci_class = g->pci_class; | ||
322 | gpu.pci_revision = g->pci_revision; | ||
323 | |||
324 | nvgpu_set_preemption_mode_flags(g, &gpu); | ||
325 | |||
326 | if (request->gpu_characteristics_buf_size > 0) { | ||
327 | size_t write_size = sizeof(gpu); | ||
328 | |||
329 | if (write_size > request->gpu_characteristics_buf_size) | ||
330 | write_size = request->gpu_characteristics_buf_size; | ||
331 | |||
332 | err = copy_to_user((void __user *)(uintptr_t) | ||
333 | request->gpu_characteristics_buf_addr, | ||
334 | &gpu, write_size); | ||
335 | } | ||
336 | |||
337 | if (err == 0) | ||
338 | request->gpu_characteristics_buf_size = sizeof(gpu); | ||
339 | |||
340 | gk20a_idle(g); | ||
341 | |||
342 | return err; | ||
343 | } | ||
344 | |||
345 | static int gk20a_ctrl_prepare_compressible_read( | ||
346 | struct gk20a *g, | ||
347 | struct nvgpu_gpu_prepare_compressible_read_args *args) | ||
348 | { | ||
349 | int ret = -ENOSYS; | ||
350 | |||
351 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
352 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
353 | struct nvgpu_channel_fence fence; | ||
354 | struct gk20a_fence *fence_out = NULL; | ||
355 | int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( | ||
356 | args->submit_flags); | ||
357 | int fd = -1; | ||
358 | |||
359 | fence.id = args->fence.syncpt_id; | ||
360 | fence.value = args->fence.syncpt_value; | ||
361 | |||
362 | /* Try and allocate an fd here*/ | ||
363 | if ((submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) | ||
364 | && (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE)) { | ||
365 | fd = get_unused_fd_flags(O_RDWR); | ||
366 | if (fd < 0) | ||
367 | return fd; | ||
368 | } | ||
369 | |||
370 | ret = gk20a_prepare_compressible_read(l, args->handle, | ||
371 | args->request_compbits, args->offset, | ||
372 | args->compbits_hoffset, args->compbits_voffset, | ||
373 | args->scatterbuffer_offset, | ||
374 | args->width, args->height, args->block_height_log2, | ||
375 | submit_flags, &fence, &args->valid_compbits, | ||
376 | &args->zbc_color, &fence_out); | ||
377 | |||
378 | if (ret) { | ||
379 | if (fd != -1) | ||
380 | put_unused_fd(fd); | ||
381 | return ret; | ||
382 | } | ||
383 | |||
384 | /* Convert fence_out to something we can pass back to user space. */ | ||
385 | if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { | ||
386 | if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { | ||
387 | if (fence_out) { | ||
388 | ret = gk20a_fence_install_fd(fence_out, fd); | ||
389 | if (ret) | ||
390 | put_unused_fd(fd); | ||
391 | else | ||
392 | args->fence.fd = fd; | ||
393 | } else { | ||
394 | args->fence.fd = -1; | ||
395 | put_unused_fd(fd); | ||
396 | } | ||
397 | } else { | ||
398 | if (fence_out) { | ||
399 | args->fence.syncpt_id = fence_out->syncpt_id; | ||
400 | args->fence.syncpt_value = | ||
401 | fence_out->syncpt_value; | ||
402 | } else { | ||
403 | args->fence.syncpt_id = -1; | ||
404 | args->fence.syncpt_value = 0; | ||
405 | } | ||
406 | } | ||
407 | } | ||
408 | gk20a_fence_put(fence_out); | ||
409 | #endif | ||
410 | |||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | static int gk20a_ctrl_mark_compressible_write( | ||
415 | struct gk20a *g, | ||
416 | struct nvgpu_gpu_mark_compressible_write_args *args) | ||
417 | { | ||
418 | int ret = -ENOSYS; | ||
419 | |||
420 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
421 | ret = gk20a_mark_compressible_write(g, args->handle, | ||
422 | args->valid_compbits, args->offset, args->zbc_color); | ||
423 | #endif | ||
424 | |||
425 | return ret; | ||
426 | } | ||
427 | |||
428 | static int gk20a_ctrl_alloc_as( | ||
429 | struct gk20a *g, | ||
430 | struct nvgpu_alloc_as_args *args) | ||
431 | { | ||
432 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
433 | struct gk20a_as_share *as_share; | ||
434 | int err; | ||
435 | int fd; | ||
436 | struct file *file; | ||
437 | char name[64]; | ||
438 | |||
439 | err = get_unused_fd_flags(O_RDWR); | ||
440 | if (err < 0) | ||
441 | return err; | ||
442 | fd = err; | ||
443 | |||
444 | snprintf(name, sizeof(name), "nvhost-%s-fd%d", g->name, fd); | ||
445 | |||
446 | file = anon_inode_getfile(name, l->as_dev.cdev.ops, NULL, O_RDWR); | ||
447 | if (IS_ERR(file)) { | ||
448 | err = PTR_ERR(file); | ||
449 | goto clean_up; | ||
450 | } | ||
451 | |||
452 | err = gk20a_as_alloc_share(g, args->big_page_size, | ||
453 | gk20a_as_translate_as_alloc_flags(g, | ||
454 | args->flags), | ||
455 | &as_share); | ||
456 | if (err) | ||
457 | goto clean_up_file; | ||
458 | |||
459 | fd_install(fd, file); | ||
460 | file->private_data = as_share; | ||
461 | |||
462 | args->as_fd = fd; | ||
463 | return 0; | ||
464 | |||
465 | clean_up_file: | ||
466 | fput(file); | ||
467 | clean_up: | ||
468 | put_unused_fd(fd); | ||
469 | return err; | ||
470 | } | ||
471 | |||
472 | static int gk20a_ctrl_open_tsg(struct gk20a *g, | ||
473 | struct nvgpu_gpu_open_tsg_args *args) | ||
474 | { | ||
475 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
476 | int err; | ||
477 | int fd; | ||
478 | struct file *file; | ||
479 | char name[64]; | ||
480 | |||
481 | err = get_unused_fd_flags(O_RDWR); | ||
482 | if (err < 0) | ||
483 | return err; | ||
484 | fd = err; | ||
485 | |||
486 | snprintf(name, sizeof(name), "nvgpu-%s-tsg%d", g->name, fd); | ||
487 | |||
488 | file = anon_inode_getfile(name, l->tsg.cdev.ops, NULL, O_RDWR); | ||
489 | if (IS_ERR(file)) { | ||
490 | err = PTR_ERR(file); | ||
491 | goto clean_up; | ||
492 | } | ||
493 | |||
494 | err = nvgpu_ioctl_tsg_open(g, file); | ||
495 | if (err) | ||
496 | goto clean_up_file; | ||
497 | |||
498 | fd_install(fd, file); | ||
499 | args->tsg_fd = fd; | ||
500 | return 0; | ||
501 | |||
502 | clean_up_file: | ||
503 | fput(file); | ||
504 | clean_up: | ||
505 | put_unused_fd(fd); | ||
506 | return err; | ||
507 | } | ||
508 | |||
509 | static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | ||
510 | struct nvgpu_gpu_get_tpc_masks_args *args) | ||
511 | { | ||
512 | struct gr_gk20a *gr = &g->gr; | ||
513 | int err = 0; | ||
514 | const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count; | ||
515 | |||
516 | if (args->mask_buf_size > 0) { | ||
517 | size_t write_size = gpc_tpc_mask_size; | ||
518 | |||
519 | if (write_size > args->mask_buf_size) | ||
520 | write_size = args->mask_buf_size; | ||
521 | |||
522 | err = copy_to_user((void __user *)(uintptr_t) | ||
523 | args->mask_buf_addr, | ||
524 | gr->gpc_tpc_mask, write_size); | ||
525 | } | ||
526 | |||
527 | if (err == 0) | ||
528 | args->mask_buf_size = gpc_tpc_mask_size; | ||
529 | |||
530 | return err; | ||
531 | } | ||
532 | |||
533 | static int gk20a_ctrl_get_fbp_l2_masks( | ||
534 | struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) | ||
535 | { | ||
536 | struct gr_gk20a *gr = &g->gr; | ||
537 | int err = 0; | ||
538 | const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; | ||
539 | |||
540 | if (args->mask_buf_size > 0) { | ||
541 | size_t write_size = fbp_l2_mask_size; | ||
542 | |||
543 | if (write_size > args->mask_buf_size) | ||
544 | write_size = args->mask_buf_size; | ||
545 | |||
546 | err = copy_to_user((void __user *)(uintptr_t) | ||
547 | args->mask_buf_addr, | ||
548 | gr->fbp_rop_l2_en_mask, write_size); | ||
549 | } | ||
550 | |||
551 | if (err == 0) | ||
552 | args->mask_buf_size = fbp_l2_mask_size; | ||
553 | |||
554 | return err; | ||
555 | } | ||
556 | |||
557 | static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, | ||
558 | struct nvgpu_gpu_l2_fb_args *args) | ||
559 | { | ||
560 | int err = 0; | ||
561 | |||
562 | if ((!args->l2_flush && !args->fb_flush) || | ||
563 | (!args->l2_flush && args->l2_invalidate)) | ||
564 | return -EINVAL; | ||
565 | |||
566 | if (args->l2_flush) | ||
567 | g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); | ||
568 | |||
569 | if (args->fb_flush) | ||
570 | g->ops.mm.fb_flush(g); | ||
571 | |||
572 | return err; | ||
573 | } | ||
574 | |||
575 | /* Invalidate i-cache for kepler & maxwell */ | ||
576 | static int nvgpu_gpu_ioctl_inval_icache( | ||
577 | struct gk20a *g, | ||
578 | struct nvgpu_gpu_inval_icache_args *args) | ||
579 | { | ||
580 | struct channel_gk20a *ch; | ||
581 | int err; | ||
582 | |||
583 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
584 | if (!ch) | ||
585 | return -EINVAL; | ||
586 | |||
587 | /* Take the global lock, since we'll be doing global regops */ | ||
588 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
589 | err = g->ops.gr.inval_icache(g, ch); | ||
590 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
591 | |||
592 | gk20a_channel_put(ch); | ||
593 | return err; | ||
594 | } | ||
595 | |||
596 | static int nvgpu_gpu_ioctl_set_mmu_debug_mode( | ||
597 | struct gk20a *g, | ||
598 | struct nvgpu_gpu_mmu_debug_mode_args *args) | ||
599 | { | ||
600 | if (gk20a_busy(g)) { | ||
601 | nvgpu_err(g, "failed to power on gpu"); | ||
602 | return -EINVAL; | ||
603 | } | ||
604 | |||
605 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
606 | g->ops.fb.set_debug_mode(g, args->state == 1); | ||
607 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
608 | |||
609 | gk20a_idle(g); | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static int nvgpu_gpu_ioctl_set_debug_mode( | ||
614 | struct gk20a *g, | ||
615 | struct nvgpu_gpu_sm_debug_mode_args *args) | ||
616 | { | ||
617 | struct channel_gk20a *ch; | ||
618 | int err; | ||
619 | |||
620 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
621 | if (!ch) | ||
622 | return -EINVAL; | ||
623 | |||
624 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
625 | if (g->ops.gr.set_sm_debug_mode) | ||
626 | err = g->ops.gr.set_sm_debug_mode(g, ch, | ||
627 | args->sms, !!args->enable); | ||
628 | else | ||
629 | err = -ENOSYS; | ||
630 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
631 | |||
632 | gk20a_channel_put(ch); | ||
633 | return err; | ||
634 | } | ||
635 | |||
636 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) | ||
637 | { | ||
638 | int err; | ||
639 | |||
640 | err = gk20a_busy(g); | ||
641 | if (err) | ||
642 | return err; | ||
643 | |||
644 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
645 | err = g->ops.gr.trigger_suspend(g); | ||
646 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
647 | |||
648 | gk20a_idle(g); | ||
649 | |||
650 | return err; | ||
651 | } | ||
652 | |||
653 | static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | ||
654 | struct nvgpu_gpu_wait_pause_args *args) | ||
655 | { | ||
656 | int err; | ||
657 | struct warpstate *ioctl_w_state; | ||
658 | struct nvgpu_warpstate *w_state = NULL; | ||
659 | u32 sm_count, ioctl_size, size, sm_id; | ||
660 | |||
661 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | ||
662 | |||
663 | ioctl_size = sm_count * sizeof(struct warpstate); | ||
664 | ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); | ||
665 | if (!ioctl_w_state) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | size = sm_count * sizeof(struct nvgpu_warpstate); | ||
669 | w_state = nvgpu_kzalloc(g, size); | ||
670 | if (!w_state) { | ||
671 | err = -ENOMEM; | ||
672 | goto out_free; | ||
673 | } | ||
674 | |||
675 | err = gk20a_busy(g); | ||
676 | if (err) | ||
677 | goto out_free; | ||
678 | |||
679 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
680 | g->ops.gr.wait_for_pause(g, w_state); | ||
681 | |||
682 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
683 | ioctl_w_state[sm_id].valid_warps[0] = | ||
684 | w_state[sm_id].valid_warps[0]; | ||
685 | ioctl_w_state[sm_id].valid_warps[1] = | ||
686 | w_state[sm_id].valid_warps[1]; | ||
687 | ioctl_w_state[sm_id].trapped_warps[0] = | ||
688 | w_state[sm_id].trapped_warps[0]; | ||
689 | ioctl_w_state[sm_id].trapped_warps[1] = | ||
690 | w_state[sm_id].trapped_warps[1]; | ||
691 | ioctl_w_state[sm_id].paused_warps[0] = | ||
692 | w_state[sm_id].paused_warps[0]; | ||
693 | ioctl_w_state[sm_id].paused_warps[1] = | ||
694 | w_state[sm_id].paused_warps[1]; | ||
695 | } | ||
696 | /* Copy to user space - pointed by "args->pwarpstate" */ | ||
697 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, | ||
698 | w_state, ioctl_size)) { | ||
699 | nvgpu_log_fn(g, "copy_to_user failed!"); | ||
700 | err = -EFAULT; | ||
701 | } | ||
702 | |||
703 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
704 | |||
705 | gk20a_idle(g); | ||
706 | |||
707 | out_free: | ||
708 | nvgpu_kfree(g, w_state); | ||
709 | nvgpu_kfree(g, ioctl_w_state); | ||
710 | |||
711 | return err; | ||
712 | } | ||
713 | |||
714 | static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) | ||
715 | { | ||
716 | int err; | ||
717 | |||
718 | err = gk20a_busy(g); | ||
719 | if (err) | ||
720 | return err; | ||
721 | |||
722 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
723 | err = g->ops.gr.resume_from_pause(g); | ||
724 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
725 | |||
726 | gk20a_idle(g); | ||
727 | |||
728 | return err; | ||
729 | } | ||
730 | |||
731 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | ||
732 | { | ||
733 | int err; | ||
734 | |||
735 | err = gk20a_busy(g); | ||
736 | if (err) | ||
737 | return err; | ||
738 | |||
739 | err = g->ops.gr.clear_sm_errors(g); | ||
740 | |||
741 | gk20a_idle(g); | ||
742 | |||
743 | return err; | ||
744 | } | ||
745 | |||
746 | static int nvgpu_gpu_ioctl_has_any_exception( | ||
747 | struct gk20a *g, | ||
748 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | ||
749 | { | ||
750 | u32 tpc_exception_en; | ||
751 | |||
752 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
753 | tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); | ||
754 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
755 | |||
756 | args->tpc_exception_en_sm_mask = tpc_exception_en; | ||
757 | |||
758 | return 0; | ||
759 | } | ||
760 | |||
761 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, | ||
762 | struct nvgpu_gpu_num_vsms *args) | ||
763 | { | ||
764 | struct gr_gk20a *gr = &g->gr; | ||
765 | args->num_vsms = gr->no_of_sm; | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | static int gk20a_ctrl_vsm_mapping(struct gk20a *g, | ||
770 | struct nvgpu_gpu_vsms_mapping *args) | ||
771 | { | ||
772 | int err = 0; | ||
773 | struct gr_gk20a *gr = &g->gr; | ||
774 | size_t write_size = gr->no_of_sm * | ||
775 | sizeof(struct nvgpu_gpu_vsms_mapping_entry); | ||
776 | struct nvgpu_gpu_vsms_mapping_entry *vsms_buf; | ||
777 | u32 i; | ||
778 | |||
779 | vsms_buf = nvgpu_kzalloc(g, write_size); | ||
780 | if (vsms_buf == NULL) | ||
781 | return -ENOMEM; | ||
782 | |||
783 | for (i = 0; i < gr->no_of_sm; i++) { | ||
784 | vsms_buf[i].gpc_index = gr->sm_to_cluster[i].gpc_index; | ||
785 | if (g->ops.gr.get_nonpes_aware_tpc) | ||
786 | vsms_buf[i].tpc_index = | ||
787 | g->ops.gr.get_nonpes_aware_tpc(g, | ||
788 | gr->sm_to_cluster[i].gpc_index, | ||
789 | gr->sm_to_cluster[i].tpc_index); | ||
790 | else | ||
791 | vsms_buf[i].tpc_index = | ||
792 | gr->sm_to_cluster[i].tpc_index; | ||
793 | } | ||
794 | |||
795 | err = copy_to_user((void __user *)(uintptr_t) | ||
796 | args->vsms_map_buf_addr, | ||
797 | vsms_buf, write_size); | ||
798 | nvgpu_kfree(g, vsms_buf); | ||
799 | |||
800 | return err; | ||
801 | } | ||
802 | |||
803 | static int nvgpu_gpu_get_cpu_time_correlation_info( | ||
804 | struct gk20a *g, | ||
805 | struct nvgpu_gpu_get_cpu_time_correlation_info_args *args) | ||
806 | { | ||
807 | struct nvgpu_cpu_time_correlation_sample *samples; | ||
808 | int err; | ||
809 | u32 i; | ||
810 | |||
811 | if (args->count > NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_MAX_COUNT || | ||
812 | args->source_id != NVGPU_GPU_GET_CPU_TIME_CORRELATION_INFO_SRC_ID_TSC) | ||
813 | return -EINVAL; | ||
814 | |||
815 | samples = nvgpu_kzalloc(g, args->count * | ||
816 | sizeof(struct nvgpu_cpu_time_correlation_sample)); | ||
817 | if (!samples) { | ||
818 | return -ENOMEM; | ||
819 | } | ||
820 | |||
821 | err = g->ops.ptimer.get_timestamps_zipper(g, | ||
822 | args->source_id, args->count, samples); | ||
823 | if (!err) { | ||
824 | for (i = 0; i < args->count; i++) { | ||
825 | args->samples[i].cpu_timestamp = samples[i].cpu_timestamp; | ||
826 | args->samples[i].gpu_timestamp = samples[i].gpu_timestamp; | ||
827 | } | ||
828 | } | ||
829 | |||
830 | nvgpu_kfree(g, samples); | ||
831 | |||
832 | return err; | ||
833 | } | ||
834 | |||
835 | static int nvgpu_gpu_get_gpu_time( | ||
836 | struct gk20a *g, | ||
837 | struct nvgpu_gpu_get_gpu_time_args *args) | ||
838 | { | ||
839 | u64 time; | ||
840 | int err; | ||
841 | |||
842 | err = gk20a_busy(g); | ||
843 | if (err) | ||
844 | return err; | ||
845 | |||
846 | err = g->ops.ptimer.read_ptimer(g, &time); | ||
847 | if (!err) | ||
848 | args->gpu_timestamp = time; | ||
849 | |||
850 | gk20a_idle(g); | ||
851 | return err; | ||
852 | } | ||
853 | |||
854 | static int nvgpu_gpu_get_engine_info( | ||
855 | struct gk20a *g, | ||
856 | struct nvgpu_gpu_get_engine_info_args *args) | ||
857 | { | ||
858 | int err = 0; | ||
859 | u32 engine_enum = ENGINE_INVAL_GK20A; | ||
860 | u32 report_index = 0; | ||
861 | u32 engine_id_idx; | ||
862 | const u32 max_buffer_engines = args->engine_info_buf_size / | ||
863 | sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
864 | struct nvgpu_gpu_get_engine_info_item __user *dst_item_list = | ||
865 | (void __user *)(uintptr_t)args->engine_info_buf_addr; | ||
866 | |||
867 | for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; | ||
868 | ++engine_id_idx) { | ||
869 | u32 active_engine_id = g->fifo.active_engines_list[engine_id_idx]; | ||
870 | const struct fifo_engine_info_gk20a *src_info = | ||
871 | &g->fifo.engine_info[active_engine_id]; | ||
872 | struct nvgpu_gpu_get_engine_info_item dst_info; | ||
873 | |||
874 | memset(&dst_info, 0, sizeof(dst_info)); | ||
875 | |||
876 | engine_enum = src_info->engine_enum; | ||
877 | |||
878 | switch (engine_enum) { | ||
879 | case ENGINE_GR_GK20A: | ||
880 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR; | ||
881 | break; | ||
882 | |||
883 | case ENGINE_GRCE_GK20A: | ||
884 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_GR_COPY; | ||
885 | break; | ||
886 | |||
887 | case ENGINE_ASYNC_CE_GK20A: | ||
888 | dst_info.engine_id = NVGPU_GPU_ENGINE_ID_ASYNC_COPY; | ||
889 | break; | ||
890 | |||
891 | default: | ||
892 | nvgpu_err(g, "Unmapped engine enum %u", | ||
893 | engine_enum); | ||
894 | continue; | ||
895 | } | ||
896 | |||
897 | dst_info.engine_instance = src_info->inst_id; | ||
898 | dst_info.runlist_id = src_info->runlist_id; | ||
899 | |||
900 | if (report_index < max_buffer_engines) { | ||
901 | err = copy_to_user(&dst_item_list[report_index], | ||
902 | &dst_info, sizeof(dst_info)); | ||
903 | if (err) | ||
904 | goto clean_up; | ||
905 | } | ||
906 | |||
907 | ++report_index; | ||
908 | } | ||
909 | |||
910 | args->engine_info_buf_size = | ||
911 | report_index * sizeof(struct nvgpu_gpu_get_engine_info_item); | ||
912 | |||
913 | clean_up: | ||
914 | return err; | ||
915 | } | ||
916 | |||
917 | static int nvgpu_gpu_alloc_vidmem(struct gk20a *g, | ||
918 | struct nvgpu_gpu_alloc_vidmem_args *args) | ||
919 | { | ||
920 | u32 align = args->in.alignment ? args->in.alignment : SZ_4K; | ||
921 | int fd; | ||
922 | |||
923 | nvgpu_log_fn(g, " "); | ||
924 | |||
925 | /* not yet supported */ | ||
926 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK)) | ||
927 | return -EINVAL; | ||
928 | |||
929 | /* not yet supported */ | ||
930 | if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | if (args->in.size & (SZ_4K - 1)) | ||
934 | return -EINVAL; | ||
935 | |||
936 | if (!args->in.size) | ||
937 | return -EINVAL; | ||
938 | |||
939 | if (align & (align - 1)) | ||
940 | return -EINVAL; | ||
941 | |||
942 | if (align > roundup_pow_of_two(args->in.size)) { | ||
943 | /* log this special case, buddy allocator detail */ | ||
944 | nvgpu_warn(g, | ||
945 | "alignment larger than buffer size rounded up to power of 2 is not supported"); | ||
946 | return -EINVAL; | ||
947 | } | ||
948 | |||
949 | fd = nvgpu_vidmem_export_linux(g, args->in.size); | ||
950 | if (fd < 0) | ||
951 | return fd; | ||
952 | |||
953 | args->out.dmabuf_fd = fd; | ||
954 | |||
955 | nvgpu_log_fn(g, "done, fd=%d", fd); | ||
956 | |||
957 | return 0; | ||
958 | } | ||
959 | |||
960 | static int nvgpu_gpu_get_memory_state(struct gk20a *g, | ||
961 | struct nvgpu_gpu_get_memory_state_args *args) | ||
962 | { | ||
963 | int err; | ||
964 | |||
965 | nvgpu_log_fn(g, " "); | ||
966 | |||
967 | if (args->reserved[0] || args->reserved[1] || | ||
968 | args->reserved[2] || args->reserved[3]) | ||
969 | return -EINVAL; | ||
970 | |||
971 | err = nvgpu_vidmem_get_space(g, &args->total_free_bytes); | ||
972 | |||
973 | nvgpu_log_fn(g, "done, err=%d, bytes=%lld", err, args->total_free_bytes); | ||
974 | |||
975 | return err; | ||
976 | } | ||
977 | |||
978 | static u32 nvgpu_gpu_convert_clk_domain(u32 clk_domain) | ||
979 | { | ||
980 | u32 domain = 0; | ||
981 | |||
982 | if (clk_domain == NVGPU_GPU_CLK_DOMAIN_MCLK) | ||
983 | domain = NVGPU_CLK_DOMAIN_MCLK; | ||
984 | else if (clk_domain == NVGPU_GPU_CLK_DOMAIN_GPCCLK) | ||
985 | domain = NVGPU_CLK_DOMAIN_GPCCLK; | ||
986 | else | ||
987 | domain = NVGPU_CLK_DOMAIN_MAX + 1; | ||
988 | |||
989 | return domain; | ||
990 | } | ||
991 | |||
992 | static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, | ||
993 | struct gk20a_ctrl_priv *priv, | ||
994 | struct nvgpu_gpu_clk_vf_points_args *args) | ||
995 | { | ||
996 | struct nvgpu_gpu_clk_vf_point clk_point; | ||
997 | struct nvgpu_gpu_clk_vf_point __user *entry; | ||
998 | struct nvgpu_clk_session *session = priv->clk_session; | ||
999 | u32 clk_domains = 0; | ||
1000 | int err; | ||
1001 | u16 last_mhz; | ||
1002 | u16 *fpoints; | ||
1003 | u32 i; | ||
1004 | u32 max_points = 0; | ||
1005 | u32 num_points = 0; | ||
1006 | u16 min_mhz; | ||
1007 | u16 max_mhz; | ||
1008 | |||
1009 | nvgpu_log_fn(g, " "); | ||
1010 | |||
1011 | if (!session || args->flags) | ||
1012 | return -EINVAL; | ||
1013 | |||
1014 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1015 | args->num_entries = 0; | ||
1016 | |||
1017 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
1018 | nvgpu_gpu_convert_clk_domain(args->clk_domain))) | ||
1019 | return -EINVAL; | ||
1020 | |||
1021 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
1022 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1023 | &max_points, NULL); | ||
1024 | if (err) | ||
1025 | return err; | ||
1026 | |||
1027 | if (!args->max_entries) { | ||
1028 | args->max_entries = max_points; | ||
1029 | return 0; | ||
1030 | } | ||
1031 | |||
1032 | if (args->max_entries < max_points) | ||
1033 | return -EINVAL; | ||
1034 | |||
1035 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
1036 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1037 | &min_mhz, &max_mhz); | ||
1038 | if (err) | ||
1039 | return err; | ||
1040 | |||
1041 | fpoints = nvgpu_kcalloc(g, max_points, sizeof(u16)); | ||
1042 | if (!fpoints) | ||
1043 | return -ENOMEM; | ||
1044 | |||
1045 | err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, | ||
1046 | nvgpu_gpu_convert_clk_domain(args->clk_domain), | ||
1047 | &max_points, fpoints); | ||
1048 | if (err) | ||
1049 | goto fail; | ||
1050 | |||
1051 | entry = (struct nvgpu_gpu_clk_vf_point __user *) | ||
1052 | (uintptr_t)args->clk_vf_point_entries; | ||
1053 | |||
1054 | last_mhz = 0; | ||
1055 | num_points = 0; | ||
1056 | for (i = 0; (i < max_points) && !err; i++) { | ||
1057 | |||
1058 | /* filter out duplicate frequencies */ | ||
1059 | if (fpoints[i] == last_mhz) | ||
1060 | continue; | ||
1061 | |||
1062 | /* filter out out-of-range frequencies */ | ||
1063 | if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) | ||
1064 | continue; | ||
1065 | |||
1066 | last_mhz = fpoints[i]; | ||
1067 | clk_point.freq_hz = MHZ_TO_HZ(fpoints[i]); | ||
1068 | |||
1069 | err = copy_to_user((void __user *)entry, &clk_point, | ||
1070 | sizeof(clk_point)); | ||
1071 | |||
1072 | num_points++; | ||
1073 | entry++; | ||
1074 | } | ||
1075 | |||
1076 | args->num_entries = num_points; | ||
1077 | |||
1078 | fail: | ||
1079 | nvgpu_kfree(g, fpoints); | ||
1080 | return err; | ||
1081 | } | ||
1082 | |||
1083 | static int nvgpu_gpu_clk_get_range(struct gk20a *g, | ||
1084 | struct gk20a_ctrl_priv *priv, | ||
1085 | struct nvgpu_gpu_clk_range_args *args) | ||
1086 | { | ||
1087 | struct nvgpu_gpu_clk_range clk_range; | ||
1088 | struct nvgpu_gpu_clk_range __user *entry; | ||
1089 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1090 | |||
1091 | u32 clk_domains = 0; | ||
1092 | u32 num_domains; | ||
1093 | u32 num_entries; | ||
1094 | u32 i; | ||
1095 | int bit; | ||
1096 | int err; | ||
1097 | u16 min_mhz, max_mhz; | ||
1098 | |||
1099 | nvgpu_log_fn(g, " "); | ||
1100 | |||
1101 | if (!session) | ||
1102 | return -EINVAL; | ||
1103 | |||
1104 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1105 | num_domains = hweight_long(clk_domains); | ||
1106 | |||
1107 | if (!args->flags) { | ||
1108 | if (!args->num_entries) { | ||
1109 | args->num_entries = num_domains; | ||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | if (args->num_entries < num_domains) | ||
1114 | return -EINVAL; | ||
1115 | |||
1116 | args->num_entries = 0; | ||
1117 | num_entries = num_domains; | ||
1118 | |||
1119 | } else { | ||
1120 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
1121 | return -EINVAL; | ||
1122 | |||
1123 | num_entries = args->num_entries; | ||
1124 | if (num_entries > num_domains) | ||
1125 | return -EINVAL; | ||
1126 | } | ||
1127 | |||
1128 | entry = (struct nvgpu_gpu_clk_range __user *) | ||
1129 | (uintptr_t)args->clk_range_entries; | ||
1130 | |||
1131 | for (i = 0; i < num_entries; i++, entry++) { | ||
1132 | |||
1133 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
1134 | if (copy_from_user(&clk_range, (void __user *)entry, | ||
1135 | sizeof(clk_range))) | ||
1136 | return -EFAULT; | ||
1137 | } else { | ||
1138 | bit = ffs(clk_domains) - 1; | ||
1139 | clk_range.clk_domain = bit; | ||
1140 | clk_domains &= ~BIT(bit); | ||
1141 | } | ||
1142 | |||
1143 | clk_range.flags = 0; | ||
1144 | err = nvgpu_clk_arb_get_arbiter_clk_range(g, | ||
1145 | nvgpu_gpu_convert_clk_domain(clk_range.clk_domain), | ||
1146 | &min_mhz, &max_mhz); | ||
1147 | clk_range.min_hz = MHZ_TO_HZ(min_mhz); | ||
1148 | clk_range.max_hz = MHZ_TO_HZ(max_mhz); | ||
1149 | |||
1150 | if (err) | ||
1151 | return err; | ||
1152 | |||
1153 | err = copy_to_user(entry, &clk_range, sizeof(clk_range)); | ||
1154 | if (err) | ||
1155 | return -EFAULT; | ||
1156 | } | ||
1157 | |||
1158 | args->num_entries = num_entries; | ||
1159 | |||
1160 | return 0; | ||
1161 | } | ||
1162 | |||
1163 | static int nvgpu_gpu_clk_set_info(struct gk20a *g, | ||
1164 | struct gk20a_ctrl_priv *priv, | ||
1165 | struct nvgpu_gpu_clk_set_info_args *args) | ||
1166 | { | ||
1167 | struct nvgpu_gpu_clk_info clk_info; | ||
1168 | struct nvgpu_gpu_clk_info __user *entry; | ||
1169 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1170 | |||
1171 | int fd; | ||
1172 | u32 clk_domains = 0; | ||
1173 | u16 freq_mhz; | ||
1174 | int i; | ||
1175 | int ret; | ||
1176 | |||
1177 | nvgpu_log_fn(g, " "); | ||
1178 | |||
1179 | if (!session || args->flags) | ||
1180 | return -EINVAL; | ||
1181 | |||
1182 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1183 | if (!clk_domains) | ||
1184 | return -EINVAL; | ||
1185 | |||
1186 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1187 | (uintptr_t)args->clk_info_entries; | ||
1188 | |||
1189 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
1190 | |||
1191 | if (copy_from_user(&clk_info, entry, sizeof(clk_info))) | ||
1192 | return -EFAULT; | ||
1193 | |||
1194 | if (!nvgpu_clk_arb_is_valid_domain(g, | ||
1195 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) | ||
1196 | return -EINVAL; | ||
1197 | } | ||
1198 | |||
1199 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1200 | (uintptr_t)args->clk_info_entries; | ||
1201 | |||
1202 | ret = nvgpu_clk_arb_install_request_fd(g, session, &fd); | ||
1203 | if (ret < 0) | ||
1204 | return ret; | ||
1205 | |||
1206 | for (i = 0; i < args->num_entries; i++, entry++) { | ||
1207 | |||
1208 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
1209 | sizeof(clk_info))) | ||
1210 | return -EFAULT; | ||
1211 | freq_mhz = HZ_TO_MHZ(clk_info.freq_hz); | ||
1212 | |||
1213 | nvgpu_clk_arb_set_session_target_mhz(session, fd, | ||
1214 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); | ||
1215 | } | ||
1216 | |||
1217 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); | ||
1218 | if (ret < 0) | ||
1219 | return ret; | ||
1220 | |||
1221 | args->completion_fd = fd; | ||
1222 | |||
1223 | return ret; | ||
1224 | } | ||
1225 | |||
1226 | static int nvgpu_gpu_clk_get_info(struct gk20a *g, | ||
1227 | struct gk20a_ctrl_priv *priv, | ||
1228 | struct nvgpu_gpu_clk_get_info_args *args) | ||
1229 | { | ||
1230 | struct nvgpu_gpu_clk_info clk_info; | ||
1231 | struct nvgpu_gpu_clk_info __user *entry; | ||
1232 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1233 | u32 clk_domains = 0; | ||
1234 | u32 num_domains; | ||
1235 | u32 num_entries; | ||
1236 | u32 i; | ||
1237 | u16 freq_mhz; | ||
1238 | int err; | ||
1239 | int bit; | ||
1240 | |||
1241 | nvgpu_log_fn(g, " "); | ||
1242 | |||
1243 | if (!session) | ||
1244 | return -EINVAL; | ||
1245 | |||
1246 | clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); | ||
1247 | num_domains = hweight_long(clk_domains); | ||
1248 | |||
1249 | if (!args->flags) { | ||
1250 | if (!args->num_entries) { | ||
1251 | args->num_entries = num_domains; | ||
1252 | return 0; | ||
1253 | } | ||
1254 | |||
1255 | if (args->num_entries < num_domains) | ||
1256 | return -EINVAL; | ||
1257 | |||
1258 | args->num_entries = 0; | ||
1259 | num_entries = num_domains; | ||
1260 | |||
1261 | } else { | ||
1262 | if (args->flags != NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) | ||
1263 | return -EINVAL; | ||
1264 | |||
1265 | num_entries = args->num_entries; | ||
1266 | if (num_entries > num_domains * 3) | ||
1267 | return -EINVAL; | ||
1268 | } | ||
1269 | |||
1270 | entry = (struct nvgpu_gpu_clk_info __user *) | ||
1271 | (uintptr_t)args->clk_info_entries; | ||
1272 | |||
1273 | for (i = 0; i < num_entries; i++, entry++) { | ||
1274 | |||
1275 | if (args->flags == NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS) { | ||
1276 | if (copy_from_user(&clk_info, (void __user *)entry, | ||
1277 | sizeof(clk_info))) | ||
1278 | return -EFAULT; | ||
1279 | } else { | ||
1280 | bit = ffs(clk_domains) - 1; | ||
1281 | clk_info.clk_domain = bit; | ||
1282 | clk_domains &= ~BIT(bit); | ||
1283 | clk_info.clk_type = args->clk_type; | ||
1284 | } | ||
1285 | |||
1286 | switch (clk_info.clk_type) { | ||
1287 | case NVGPU_GPU_CLK_TYPE_TARGET: | ||
1288 | err = nvgpu_clk_arb_get_session_target_mhz(session, | ||
1289 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1290 | &freq_mhz); | ||
1291 | break; | ||
1292 | case NVGPU_GPU_CLK_TYPE_ACTUAL: | ||
1293 | err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, | ||
1294 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1295 | &freq_mhz); | ||
1296 | break; | ||
1297 | case NVGPU_GPU_CLK_TYPE_EFFECTIVE: | ||
1298 | err = nvgpu_clk_arb_get_arbiter_effective_mhz(g, | ||
1299 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), | ||
1300 | &freq_mhz); | ||
1301 | break; | ||
1302 | default: | ||
1303 | freq_mhz = 0; | ||
1304 | err = -EINVAL; | ||
1305 | break; | ||
1306 | } | ||
1307 | if (err) | ||
1308 | return err; | ||
1309 | |||
1310 | clk_info.flags = 0; | ||
1311 | clk_info.freq_hz = MHZ_TO_HZ(freq_mhz); | ||
1312 | |||
1313 | err = copy_to_user((void __user *)entry, &clk_info, | ||
1314 | sizeof(clk_info)); | ||
1315 | if (err) | ||
1316 | return -EFAULT; | ||
1317 | } | ||
1318 | |||
1319 | args->num_entries = num_entries; | ||
1320 | |||
1321 | return 0; | ||
1322 | } | ||
1323 | |||
1324 | static int nvgpu_gpu_get_event_fd(struct gk20a *g, | ||
1325 | struct gk20a_ctrl_priv *priv, | ||
1326 | struct nvgpu_gpu_get_event_fd_args *args) | ||
1327 | { | ||
1328 | struct nvgpu_clk_session *session = priv->clk_session; | ||
1329 | |||
1330 | nvgpu_log_fn(g, " "); | ||
1331 | |||
1332 | if (!session) | ||
1333 | return -EINVAL; | ||
1334 | |||
1335 | return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd, | ||
1336 | args->flags); | ||
1337 | } | ||
1338 | |||
1339 | static int nvgpu_gpu_get_voltage(struct gk20a *g, | ||
1340 | struct nvgpu_gpu_get_voltage_args *args) | ||
1341 | { | ||
1342 | int err = -EINVAL; | ||
1343 | |||
1344 | nvgpu_log_fn(g, " "); | ||
1345 | |||
1346 | if (args->reserved) | ||
1347 | return -EINVAL; | ||
1348 | |||
1349 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_VOLTAGE)) | ||
1350 | return -EINVAL; | ||
1351 | |||
1352 | err = gk20a_busy(g); | ||
1353 | if (err) | ||
1354 | return err; | ||
1355 | |||
1356 | switch (args->which) { | ||
1357 | case NVGPU_GPU_VOLTAGE_CORE: | ||
1358 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); | ||
1359 | break; | ||
1360 | case NVGPU_GPU_VOLTAGE_SRAM: | ||
1361 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_SRAM, &args->voltage); | ||
1362 | break; | ||
1363 | case NVGPU_GPU_VOLTAGE_BUS: | ||
1364 | err = pmgr_pwr_devices_get_voltage(g, &args->voltage); | ||
1365 | break; | ||
1366 | default: | ||
1367 | err = -EINVAL; | ||
1368 | } | ||
1369 | |||
1370 | gk20a_idle(g); | ||
1371 | |||
1372 | return err; | ||
1373 | } | ||
1374 | |||
1375 | static int nvgpu_gpu_get_current(struct gk20a *g, | ||
1376 | struct nvgpu_gpu_get_current_args *args) | ||
1377 | { | ||
1378 | int err; | ||
1379 | |||
1380 | nvgpu_log_fn(g, " "); | ||
1381 | |||
1382 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1383 | return -EINVAL; | ||
1384 | |||
1385 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_CURRENT)) | ||
1386 | return -EINVAL; | ||
1387 | |||
1388 | err = gk20a_busy(g); | ||
1389 | if (err) | ||
1390 | return err; | ||
1391 | |||
1392 | err = pmgr_pwr_devices_get_current(g, &args->currnt); | ||
1393 | |||
1394 | gk20a_idle(g); | ||
1395 | |||
1396 | return err; | ||
1397 | } | ||
1398 | |||
1399 | static int nvgpu_gpu_get_power(struct gk20a *g, | ||
1400 | struct nvgpu_gpu_get_power_args *args) | ||
1401 | { | ||
1402 | int err; | ||
1403 | |||
1404 | nvgpu_log_fn(g, " "); | ||
1405 | |||
1406 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1407 | return -EINVAL; | ||
1408 | |||
1409 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_POWER)) | ||
1410 | return -EINVAL; | ||
1411 | |||
1412 | err = gk20a_busy(g); | ||
1413 | if (err) | ||
1414 | return err; | ||
1415 | |||
1416 | err = pmgr_pwr_devices_get_power(g, &args->power); | ||
1417 | |||
1418 | gk20a_idle(g); | ||
1419 | |||
1420 | return err; | ||
1421 | } | ||
1422 | |||
1423 | static int nvgpu_gpu_get_temperature(struct gk20a *g, | ||
1424 | struct nvgpu_gpu_get_temperature_args *args) | ||
1425 | { | ||
1426 | int err; | ||
1427 | u32 temp_f24_8; | ||
1428 | |||
1429 | nvgpu_log_fn(g, " "); | ||
1430 | |||
1431 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1432 | return -EINVAL; | ||
1433 | |||
1434 | if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) | ||
1435 | return -EINVAL; | ||
1436 | |||
1437 | if (!g->ops.therm.get_internal_sensor_curr_temp) | ||
1438 | return -EINVAL; | ||
1439 | |||
1440 | err = gk20a_busy(g); | ||
1441 | if (err) | ||
1442 | return err; | ||
1443 | |||
1444 | err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); | ||
1445 | |||
1446 | gk20a_idle(g); | ||
1447 | |||
1448 | args->temp_f24_8 = (s32)temp_f24_8; | ||
1449 | |||
1450 | return err; | ||
1451 | } | ||
1452 | |||
1453 | static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, | ||
1454 | struct nvgpu_gpu_set_therm_alert_limit_args *args) | ||
1455 | { | ||
1456 | int err; | ||
1457 | |||
1458 | nvgpu_log_fn(g, " "); | ||
1459 | |||
1460 | if (args->reserved[0] || args->reserved[1] || args->reserved[2]) | ||
1461 | return -EINVAL; | ||
1462 | |||
1463 | if (!g->ops.therm.configure_therm_alert) | ||
1464 | return -EINVAL; | ||
1465 | |||
1466 | err = gk20a_busy(g); | ||
1467 | if (err) | ||
1468 | return err; | ||
1469 | |||
1470 | err = g->ops.therm.configure_therm_alert(g, args->temp_f24_8); | ||
1471 | |||
1472 | gk20a_idle(g); | ||
1473 | |||
1474 | return err; | ||
1475 | } | ||
1476 | |||
1477 | static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, | ||
1478 | u32 flags) | ||
1479 | { | ||
1480 | int err = 0; | ||
1481 | bool allow; | ||
1482 | bool disallow; | ||
1483 | |||
1484 | allow = flags & | ||
1485 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; | ||
1486 | |||
1487 | disallow = flags & | ||
1488 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; | ||
1489 | |||
1490 | /* Can't be both at the same time */ | ||
1491 | if (allow && disallow) | ||
1492 | return -EINVAL; | ||
1493 | |||
1494 | /* Nothing to do */ | ||
1495 | if (!allow && !disallow) | ||
1496 | return 0; | ||
1497 | |||
1498 | /* | ||
1499 | * Moving into explicit idle or back from it? A call that doesn't | ||
1500 | * change the status is a no-op. | ||
1501 | */ | ||
1502 | if (!ch->deterministic_railgate_allowed && | ||
1503 | allow) { | ||
1504 | gk20a_idle(ch->g); | ||
1505 | } else if (ch->deterministic_railgate_allowed && | ||
1506 | !allow) { | ||
1507 | err = gk20a_busy(ch->g); | ||
1508 | if (err) { | ||
1509 | nvgpu_warn(ch->g, | ||
1510 | "cannot busy to restore deterministic ch"); | ||
1511 | return err; | ||
1512 | } | ||
1513 | } | ||
1514 | ch->deterministic_railgate_allowed = allow; | ||
1515 | |||
1516 | return err; | ||
1517 | } | ||
1518 | |||
1519 | static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) | ||
1520 | { | ||
1521 | if (!ch->deterministic) | ||
1522 | return -EINVAL; | ||
1523 | |||
1524 | return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); | ||
1525 | } | ||
1526 | |||
1527 | static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | ||
1528 | struct nvgpu_gpu_set_deterministic_opts_args *args) | ||
1529 | { | ||
1530 | int __user *user_channels; | ||
1531 | u32 i = 0; | ||
1532 | int err = 0; | ||
1533 | |||
1534 | nvgpu_log_fn(g, " "); | ||
1535 | |||
1536 | user_channels = (int __user *)(uintptr_t)args->channels; | ||
1537 | |||
1538 | /* Upper limit; prevent holding deterministic_busy for long */ | ||
1539 | if (args->num_channels > g->fifo.num_channels) { | ||
1540 | err = -EINVAL; | ||
1541 | goto out; | ||
1542 | } | ||
1543 | |||
1544 | /* Trivial sanity check first */ | ||
1545 | if (!access_ok(VERIFY_READ, user_channels, | ||
1546 | args->num_channels * sizeof(int))) { | ||
1547 | err = -EFAULT; | ||
1548 | goto out; | ||
1549 | } | ||
1550 | |||
1551 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1552 | |||
1553 | /* note: we exit at the first failure */ | ||
1554 | for (; i < args->num_channels; i++) { | ||
1555 | int ch_fd = 0; | ||
1556 | struct channel_gk20a *ch; | ||
1557 | |||
1558 | if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { | ||
1559 | /* User raced with above access_ok */ | ||
1560 | err = -EFAULT; | ||
1561 | break; | ||
1562 | } | ||
1563 | |||
1564 | ch = gk20a_get_channel_from_file(ch_fd); | ||
1565 | if (!ch) { | ||
1566 | err = -EINVAL; | ||
1567 | break; | ||
1568 | } | ||
1569 | |||
1570 | err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); | ||
1571 | |||
1572 | gk20a_channel_put(ch); | ||
1573 | |||
1574 | if (err) | ||
1575 | break; | ||
1576 | } | ||
1577 | |||
1578 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1579 | |||
1580 | out: | ||
1581 | args->num_channels = i; | ||
1582 | return err; | ||
1583 | } | ||
1584 | |||
1585 | static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g, | ||
1586 | struct nvgpu_gpu_read_single_sm_error_state_args *args) | ||
1587 | { | ||
1588 | struct gr_gk20a *gr = &g->gr; | ||
1589 | struct nvgpu_gr_sm_error_state *sm_error_state; | ||
1590 | struct nvgpu_gpu_sm_error_state_record sm_error_state_record; | ||
1591 | u32 sm_id; | ||
1592 | int err = 0; | ||
1593 | |||
1594 | sm_id = args->sm_id; | ||
1595 | if (sm_id >= gr->no_of_sm) | ||
1596 | return -EINVAL; | ||
1597 | |||
1598 | nvgpu_speculation_barrier(); | ||
1599 | |||
1600 | sm_error_state = gr->sm_error_states + sm_id; | ||
1601 | sm_error_state_record.global_esr = | ||
1602 | sm_error_state->hww_global_esr; | ||
1603 | sm_error_state_record.warp_esr = | ||
1604 | sm_error_state->hww_warp_esr; | ||
1605 | sm_error_state_record.warp_esr_pc = | ||
1606 | sm_error_state->hww_warp_esr_pc; | ||
1607 | sm_error_state_record.global_esr_report_mask = | ||
1608 | sm_error_state->hww_global_esr_report_mask; | ||
1609 | sm_error_state_record.warp_esr_report_mask = | ||
1610 | sm_error_state->hww_warp_esr_report_mask; | ||
1611 | |||
1612 | if (args->record_size > 0) { | ||
1613 | size_t write_size = sizeof(*sm_error_state); | ||
1614 | |||
1615 | if (write_size > args->record_size) | ||
1616 | write_size = args->record_size; | ||
1617 | |||
1618 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1619 | err = copy_to_user((void __user *)(uintptr_t) | ||
1620 | args->record_mem, | ||
1621 | &sm_error_state_record, | ||
1622 | write_size); | ||
1623 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1624 | if (err) { | ||
1625 | nvgpu_err(g, "copy_to_user failed!"); | ||
1626 | return err; | ||
1627 | } | ||
1628 | |||
1629 | args->record_size = write_size; | ||
1630 | } | ||
1631 | |||
1632 | return 0; | ||
1633 | } | ||
1634 | |||
1635 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
1636 | { | ||
1637 | struct gk20a_ctrl_priv *priv = filp->private_data; | ||
1638 | struct gk20a *g = priv->g; | ||
1639 | struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; | ||
1640 | struct nvgpu_gpu_zcull_get_info_args *get_info_args; | ||
1641 | struct nvgpu_gpu_zbc_set_table_args *set_table_args; | ||
1642 | struct nvgpu_gpu_zbc_query_table_args *query_table_args; | ||
1643 | u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE]; | ||
1644 | struct gr_zcull_info *zcull_info; | ||
1645 | struct zbc_entry *zbc_val; | ||
1646 | struct zbc_query_params *zbc_tbl; | ||
1647 | int i, err = 0; | ||
1648 | |||
1649 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
1650 | |||
1651 | if ((_IOC_TYPE(cmd) != NVGPU_GPU_IOCTL_MAGIC) || | ||
1652 | (_IOC_NR(cmd) == 0) || | ||
1653 | (_IOC_NR(cmd) > NVGPU_GPU_IOCTL_LAST) || | ||
1654 | (_IOC_SIZE(cmd) > NVGPU_GPU_IOCTL_MAX_ARG_SIZE)) | ||
1655 | return -EINVAL; | ||
1656 | |||
1657 | memset(buf, 0, sizeof(buf)); | ||
1658 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1659 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1660 | return -EFAULT; | ||
1661 | } | ||
1662 | |||
1663 | if (!g->sw_ready) { | ||
1664 | err = gk20a_busy(g); | ||
1665 | if (err) | ||
1666 | return err; | ||
1667 | |||
1668 | gk20a_idle(g); | ||
1669 | } | ||
1670 | |||
1671 | switch (cmd) { | ||
1672 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | ||
1673 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; | ||
1674 | |||
1675 | get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); | ||
1676 | |||
1677 | break; | ||
1678 | case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: | ||
1679 | get_info_args = (struct nvgpu_gpu_zcull_get_info_args *)buf; | ||
1680 | |||
1681 | memset(get_info_args, 0, sizeof(struct nvgpu_gpu_zcull_get_info_args)); | ||
1682 | |||
1683 | zcull_info = nvgpu_kzalloc(g, sizeof(struct gr_zcull_info)); | ||
1684 | if (zcull_info == NULL) | ||
1685 | return -ENOMEM; | ||
1686 | |||
1687 | err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info); | ||
1688 | if (err) { | ||
1689 | nvgpu_kfree(g, zcull_info); | ||
1690 | break; | ||
1691 | } | ||
1692 | |||
1693 | get_info_args->width_align_pixels = zcull_info->width_align_pixels; | ||
1694 | get_info_args->height_align_pixels = zcull_info->height_align_pixels; | ||
1695 | get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; | ||
1696 | get_info_args->aliquot_total = zcull_info->aliquot_total; | ||
1697 | get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; | ||
1698 | get_info_args->region_header_size = zcull_info->region_header_size; | ||
1699 | get_info_args->subregion_header_size = zcull_info->subregion_header_size; | ||
1700 | get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; | ||
1701 | get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; | ||
1702 | get_info_args->subregion_count = zcull_info->subregion_count; | ||
1703 | |||
1704 | nvgpu_kfree(g, zcull_info); | ||
1705 | break; | ||
1706 | case NVGPU_GPU_IOCTL_ZBC_SET_TABLE: | ||
1707 | set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf; | ||
1708 | |||
1709 | zbc_val = nvgpu_kzalloc(g, sizeof(struct zbc_entry)); | ||
1710 | if (zbc_val == NULL) | ||
1711 | return -ENOMEM; | ||
1712 | |||
1713 | zbc_val->format = set_table_args->format; | ||
1714 | zbc_val->type = set_table_args->type; | ||
1715 | |||
1716 | switch (zbc_val->type) { | ||
1717 | case GK20A_ZBC_TYPE_COLOR: | ||
1718 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
1719 | zbc_val->color_ds[i] = set_table_args->color_ds[i]; | ||
1720 | zbc_val->color_l2[i] = set_table_args->color_l2[i]; | ||
1721 | } | ||
1722 | break; | ||
1723 | case GK20A_ZBC_TYPE_DEPTH: | ||
1724 | case T19X_ZBC: | ||
1725 | zbc_val->depth = set_table_args->depth; | ||
1726 | break; | ||
1727 | default: | ||
1728 | err = -EINVAL; | ||
1729 | } | ||
1730 | |||
1731 | if (!err) { | ||
1732 | err = gk20a_busy(g); | ||
1733 | if (!err) { | ||
1734 | err = g->ops.gr.zbc_set_table(g, &g->gr, | ||
1735 | zbc_val); | ||
1736 | gk20a_idle(g); | ||
1737 | } | ||
1738 | } | ||
1739 | |||
1740 | if (zbc_val) | ||
1741 | nvgpu_kfree(g, zbc_val); | ||
1742 | break; | ||
1743 | case NVGPU_GPU_IOCTL_ZBC_QUERY_TABLE: | ||
1744 | query_table_args = (struct nvgpu_gpu_zbc_query_table_args *)buf; | ||
1745 | |||
1746 | zbc_tbl = nvgpu_kzalloc(g, sizeof(struct zbc_query_params)); | ||
1747 | if (zbc_tbl == NULL) | ||
1748 | return -ENOMEM; | ||
1749 | |||
1750 | zbc_tbl->type = query_table_args->type; | ||
1751 | zbc_tbl->index_size = query_table_args->index_size; | ||
1752 | |||
1753 | err = g->ops.gr.zbc_query_table(g, &g->gr, zbc_tbl); | ||
1754 | |||
1755 | if (!err) { | ||
1756 | switch (zbc_tbl->type) { | ||
1757 | case GK20A_ZBC_TYPE_COLOR: | ||
1758 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | ||
1759 | query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; | ||
1760 | query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; | ||
1761 | } | ||
1762 | break; | ||
1763 | case GK20A_ZBC_TYPE_DEPTH: | ||
1764 | case T19X_ZBC: | ||
1765 | query_table_args->depth = zbc_tbl->depth; | ||
1766 | break; | ||
1767 | case GK20A_ZBC_TYPE_INVALID: | ||
1768 | query_table_args->index_size = zbc_tbl->index_size; | ||
1769 | break; | ||
1770 | default: | ||
1771 | err = -EINVAL; | ||
1772 | } | ||
1773 | if (!err) { | ||
1774 | query_table_args->format = zbc_tbl->format; | ||
1775 | query_table_args->ref_cnt = zbc_tbl->ref_cnt; | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | if (zbc_tbl) | ||
1780 | nvgpu_kfree(g, zbc_tbl); | ||
1781 | break; | ||
1782 | |||
1783 | case NVGPU_GPU_IOCTL_GET_CHARACTERISTICS: | ||
1784 | err = gk20a_ctrl_ioctl_gpu_characteristics( | ||
1785 | g, (struct nvgpu_gpu_get_characteristics *)buf); | ||
1786 | break; | ||
1787 | case NVGPU_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: | ||
1788 | err = gk20a_ctrl_prepare_compressible_read(g, | ||
1789 | (struct nvgpu_gpu_prepare_compressible_read_args *)buf); | ||
1790 | break; | ||
1791 | case NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: | ||
1792 | err = gk20a_ctrl_mark_compressible_write(g, | ||
1793 | (struct nvgpu_gpu_mark_compressible_write_args *)buf); | ||
1794 | break; | ||
1795 | case NVGPU_GPU_IOCTL_ALLOC_AS: | ||
1796 | err = gk20a_ctrl_alloc_as(g, | ||
1797 | (struct nvgpu_alloc_as_args *)buf); | ||
1798 | break; | ||
1799 | case NVGPU_GPU_IOCTL_OPEN_TSG: | ||
1800 | err = gk20a_ctrl_open_tsg(g, | ||
1801 | (struct nvgpu_gpu_open_tsg_args *)buf); | ||
1802 | break; | ||
1803 | case NVGPU_GPU_IOCTL_GET_TPC_MASKS: | ||
1804 | err = gk20a_ctrl_get_tpc_masks(g, | ||
1805 | (struct nvgpu_gpu_get_tpc_masks_args *)buf); | ||
1806 | break; | ||
1807 | case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: | ||
1808 | err = gk20a_ctrl_get_fbp_l2_masks(g, | ||
1809 | (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); | ||
1810 | break; | ||
1811 | case NVGPU_GPU_IOCTL_OPEN_CHANNEL: | ||
1812 | /* this arg type here, but ..gpu_open_channel_args in nvgpu.h | ||
1813 | * for consistency - they are the same */ | ||
1814 | err = gk20a_channel_open_ioctl(g, | ||
1815 | (struct nvgpu_channel_open_args *)buf); | ||
1816 | break; | ||
1817 | case NVGPU_GPU_IOCTL_FLUSH_L2: | ||
1818 | err = nvgpu_gpu_ioctl_l2_fb_ops(g, | ||
1819 | (struct nvgpu_gpu_l2_fb_args *)buf); | ||
1820 | break; | ||
1821 | case NVGPU_GPU_IOCTL_INVAL_ICACHE: | ||
1822 | err = gr_gk20a_elpg_protected_call(g, | ||
1823 | nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf)); | ||
1824 | break; | ||
1825 | |||
1826 | case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: | ||
1827 | err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, | ||
1828 | (struct nvgpu_gpu_mmu_debug_mode_args *)buf); | ||
1829 | break; | ||
1830 | |||
1831 | case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: | ||
1832 | err = gr_gk20a_elpg_protected_call(g, | ||
1833 | nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); | ||
1834 | break; | ||
1835 | |||
1836 | case NVGPU_GPU_IOCTL_TRIGGER_SUSPEND: | ||
1837 | err = nvgpu_gpu_ioctl_trigger_suspend(g); | ||
1838 | break; | ||
1839 | |||
1840 | case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: | ||
1841 | err = nvgpu_gpu_ioctl_wait_for_pause(g, | ||
1842 | (struct nvgpu_gpu_wait_pause_args *)buf); | ||
1843 | break; | ||
1844 | |||
1845 | case NVGPU_GPU_IOCTL_RESUME_FROM_PAUSE: | ||
1846 | err = nvgpu_gpu_ioctl_resume_from_pause(g); | ||
1847 | break; | ||
1848 | |||
1849 | case NVGPU_GPU_IOCTL_CLEAR_SM_ERRORS: | ||
1850 | err = nvgpu_gpu_ioctl_clear_sm_errors(g); | ||
1851 | break; | ||
1852 | |||
1853 | case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: | ||
1854 | err = nvgpu_gpu_ioctl_has_any_exception(g, | ||
1855 | (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); | ||
1856 | break; | ||
1857 | |||
1858 | case NVGPU_GPU_IOCTL_NUM_VSMS: | ||
1859 | err = gk20a_ctrl_get_num_vsms(g, | ||
1860 | (struct nvgpu_gpu_num_vsms *)buf); | ||
1861 | break; | ||
1862 | case NVGPU_GPU_IOCTL_VSMS_MAPPING: | ||
1863 | err = gk20a_ctrl_vsm_mapping(g, | ||
1864 | (struct nvgpu_gpu_vsms_mapping *)buf); | ||
1865 | break; | ||
1866 | |||
1867 | case NVGPU_GPU_IOCTL_GET_CPU_TIME_CORRELATION_INFO: | ||
1868 | err = nvgpu_gpu_get_cpu_time_correlation_info(g, | ||
1869 | (struct nvgpu_gpu_get_cpu_time_correlation_info_args *)buf); | ||
1870 | break; | ||
1871 | |||
1872 | case NVGPU_GPU_IOCTL_GET_GPU_TIME: | ||
1873 | err = nvgpu_gpu_get_gpu_time(g, | ||
1874 | (struct nvgpu_gpu_get_gpu_time_args *)buf); | ||
1875 | break; | ||
1876 | |||
1877 | case NVGPU_GPU_IOCTL_GET_ENGINE_INFO: | ||
1878 | err = nvgpu_gpu_get_engine_info(g, | ||
1879 | (struct nvgpu_gpu_get_engine_info_args *)buf); | ||
1880 | break; | ||
1881 | |||
1882 | case NVGPU_GPU_IOCTL_ALLOC_VIDMEM: | ||
1883 | err = nvgpu_gpu_alloc_vidmem(g, | ||
1884 | (struct nvgpu_gpu_alloc_vidmem_args *)buf); | ||
1885 | break; | ||
1886 | |||
1887 | case NVGPU_GPU_IOCTL_GET_MEMORY_STATE: | ||
1888 | err = nvgpu_gpu_get_memory_state(g, | ||
1889 | (struct nvgpu_gpu_get_memory_state_args *)buf); | ||
1890 | break; | ||
1891 | |||
1892 | case NVGPU_GPU_IOCTL_CLK_GET_RANGE: | ||
1893 | err = nvgpu_gpu_clk_get_range(g, priv, | ||
1894 | (struct nvgpu_gpu_clk_range_args *)buf); | ||
1895 | break; | ||
1896 | |||
1897 | case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: | ||
1898 | err = nvgpu_gpu_clk_get_vf_points(g, priv, | ||
1899 | (struct nvgpu_gpu_clk_vf_points_args *)buf); | ||
1900 | break; | ||
1901 | |||
1902 | case NVGPU_GPU_IOCTL_CLK_SET_INFO: | ||
1903 | err = nvgpu_gpu_clk_set_info(g, priv, | ||
1904 | (struct nvgpu_gpu_clk_set_info_args *)buf); | ||
1905 | break; | ||
1906 | |||
1907 | case NVGPU_GPU_IOCTL_CLK_GET_INFO: | ||
1908 | err = nvgpu_gpu_clk_get_info(g, priv, | ||
1909 | (struct nvgpu_gpu_clk_get_info_args *)buf); | ||
1910 | break; | ||
1911 | |||
1912 | case NVGPU_GPU_IOCTL_GET_EVENT_FD: | ||
1913 | err = nvgpu_gpu_get_event_fd(g, priv, | ||
1914 | (struct nvgpu_gpu_get_event_fd_args *)buf); | ||
1915 | break; | ||
1916 | |||
1917 | case NVGPU_GPU_IOCTL_GET_VOLTAGE: | ||
1918 | err = nvgpu_gpu_get_voltage(g, | ||
1919 | (struct nvgpu_gpu_get_voltage_args *)buf); | ||
1920 | break; | ||
1921 | |||
1922 | case NVGPU_GPU_IOCTL_GET_CURRENT: | ||
1923 | err = nvgpu_gpu_get_current(g, | ||
1924 | (struct nvgpu_gpu_get_current_args *)buf); | ||
1925 | break; | ||
1926 | |||
1927 | case NVGPU_GPU_IOCTL_GET_POWER: | ||
1928 | err = nvgpu_gpu_get_power(g, | ||
1929 | (struct nvgpu_gpu_get_power_args *)buf); | ||
1930 | break; | ||
1931 | |||
1932 | case NVGPU_GPU_IOCTL_GET_TEMPERATURE: | ||
1933 | err = nvgpu_gpu_get_temperature(g, | ||
1934 | (struct nvgpu_gpu_get_temperature_args *)buf); | ||
1935 | break; | ||
1936 | |||
1937 | case NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT: | ||
1938 | err = nvgpu_gpu_set_therm_alert_limit(g, | ||
1939 | (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); | ||
1940 | break; | ||
1941 | |||
1942 | case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: | ||
1943 | err = nvgpu_gpu_set_deterministic_opts(g, | ||
1944 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); | ||
1945 | break; | ||
1946 | |||
1947 | case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
1948 | err = nvgpu_gpu_read_single_sm_error_state(g, | ||
1949 | (struct nvgpu_gpu_read_single_sm_error_state_args *)buf); | ||
1950 | break; | ||
1951 | |||
1952 | default: | ||
1953 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
1954 | err = -ENOTTY; | ||
1955 | break; | ||
1956 | } | ||
1957 | |||
1958 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
1959 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | ||
1960 | |||
1961 | return err; | ||
1962 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h new file mode 100644 index 00000000..8b4a5e59 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_IOCTL_CTRL_H__ | ||
17 | #define __NVGPU_IOCTL_CTRL_H__ | ||
18 | |||
19 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp); | ||
20 | int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp); | ||
21 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
22 | |||
23 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c new file mode 100644 index 00000000..31e7e2cb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | |||
@@ -0,0 +1,2003 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger/Profiler Driver | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/cdev.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/dma-buf.h> | ||
24 | #include <uapi/linux/nvgpu.h> | ||
25 | |||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/log.h> | ||
28 | #include <nvgpu/vm.h> | ||
29 | #include <nvgpu/atomic.h> | ||
30 | #include <nvgpu/cond.h> | ||
31 | |||
32 | #include <nvgpu/linux/vidmem.h> | ||
33 | #include <nvgpu/linux/vm.h> | ||
34 | |||
35 | #include "gk20a/gk20a.h" | ||
36 | #include "gk20a/gr_gk20a.h" | ||
37 | #include "gk20a/regops_gk20a.h" | ||
38 | #include "gk20a/dbg_gpu_gk20a.h" | ||
39 | #include "os_linux.h" | ||
40 | #include "platform_gk20a.h" | ||
41 | #include "ioctl_dbg.h" | ||
42 | |||
43 | /* turn seriously unwieldy names -> something shorter */ | ||
44 | #define REGOP_LINUX(x) NVGPU_DBG_GPU_REG_OP_##x | ||
45 | |||
46 | /* silly allocator - just increment id */ | ||
47 | static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0); | ||
48 | static int generate_unique_id(void) | ||
49 | { | ||
50 | return nvgpu_atomic_add_return(1, &unique_id); | ||
51 | } | ||
52 | |||
53 | static int alloc_profiler(struct gk20a *g, | ||
54 | struct dbg_profiler_object_data **_prof) | ||
55 | { | ||
56 | struct dbg_profiler_object_data *prof; | ||
57 | *_prof = NULL; | ||
58 | |||
59 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
60 | |||
61 | prof = nvgpu_kzalloc(g, sizeof(*prof)); | ||
62 | if (!prof) | ||
63 | return -ENOMEM; | ||
64 | |||
65 | prof->prof_handle = generate_unique_id(); | ||
66 | *_prof = prof; | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int alloc_session(struct gk20a *g, struct dbg_session_gk20a_linux **_dbg_s_linux) | ||
71 | { | ||
72 | struct dbg_session_gk20a_linux *dbg_s_linux; | ||
73 | *_dbg_s_linux = NULL; | ||
74 | |||
75 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
76 | |||
77 | dbg_s_linux = nvgpu_kzalloc(g, sizeof(*dbg_s_linux)); | ||
78 | if (!dbg_s_linux) | ||
79 | return -ENOMEM; | ||
80 | |||
81 | dbg_s_linux->dbg_s.id = generate_unique_id(); | ||
82 | *_dbg_s_linux = dbg_s_linux; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, | ||
87 | struct gr_gk20a *gr); | ||
88 | |||
89 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset); | ||
90 | |||
91 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
92 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args); | ||
93 | |||
94 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
95 | struct nvgpu_dbg_gpu_powergate_args *args); | ||
96 | |||
97 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
98 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args); | ||
99 | |||
100 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
101 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args); | ||
102 | |||
103 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
104 | struct dbg_session_gk20a *dbg_s, | ||
105 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); | ||
106 | |||
107 | static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a_linux *dbg_s, | ||
108 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
109 | |||
110 | static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a_linux *dbg_s_linux, | ||
111 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); | ||
112 | |||
113 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
114 | struct nvgpu_dbg_gpu_profiler_reserve_args *args); | ||
115 | |||
116 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
117 | struct nvgpu_dbg_gpu_perfbuf_map_args *args); | ||
118 | |||
119 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
120 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args); | ||
121 | |||
122 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
123 | int timeout_mode); | ||
124 | |||
125 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
126 | u32 profiler_handle); | ||
127 | |||
128 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s); | ||
129 | |||
130 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s); | ||
131 | |||
132 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
133 | u32 profiler_handle); | ||
134 | |||
135 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); | ||
136 | |||
137 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
138 | struct file *filp, bool is_profiler); | ||
139 | |||
140 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) | ||
141 | { | ||
142 | unsigned int mask = 0; | ||
143 | struct dbg_session_gk20a_linux *dbg_session_linux = filep->private_data; | ||
144 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
145 | struct gk20a *g = dbg_s->g; | ||
146 | |||
147 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
148 | |||
149 | poll_wait(filep, &dbg_s->dbg_events.wait_queue.wq, wait); | ||
150 | |||
151 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
152 | |||
153 | if (dbg_s->dbg_events.events_enabled && | ||
154 | dbg_s->dbg_events.num_pending_events > 0) { | ||
155 | nvgpu_log(g, gpu_dbg_gpu_dbg, "found pending event on session id %d", | ||
156 | dbg_s->id); | ||
157 | nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending", | ||
158 | dbg_s->dbg_events.num_pending_events); | ||
159 | mask = (POLLPRI | POLLIN); | ||
160 | } | ||
161 | |||
162 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
163 | |||
164 | return mask; | ||
165 | } | ||
166 | |||
167 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) | ||
168 | { | ||
169 | struct dbg_session_gk20a_linux *dbg_session_linux = filp->private_data; | ||
170 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
171 | struct gk20a *g = dbg_s->g; | ||
172 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
173 | |||
174 | nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", g->name); | ||
175 | |||
176 | /* unbind channels */ | ||
177 | dbg_unbind_all_channels_gk20a(dbg_s); | ||
178 | |||
179 | /* Powergate/Timeout enable is called here as possibility of dbg_session | ||
180 | * which called powergate/timeout disable ioctl, to be killed without | ||
181 | * calling powergate/timeout enable ioctl | ||
182 | */ | ||
183 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
184 | g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); | ||
185 | nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); | ||
186 | |||
187 | /* If this session owned the perf buffer, release it */ | ||
188 | if (g->perfbuf.owner == dbg_s) | ||
189 | gk20a_perfbuf_release_locked(g, g->perfbuf.offset); | ||
190 | |||
191 | /* Per-context profiler objects were released when we called | ||
192 | * dbg_unbind_all_channels. We could still have global ones. | ||
193 | */ | ||
194 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
195 | dbg_profiler_object_data, prof_obj_entry) { | ||
196 | if (prof_obj->session_id == dbg_s->id) { | ||
197 | if (prof_obj->has_reservation) | ||
198 | g->ops.dbg_session_ops. | ||
199 | release_profiler_reservation(dbg_s, prof_obj); | ||
200 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
201 | nvgpu_kfree(g, prof_obj); | ||
202 | } | ||
203 | } | ||
204 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
205 | |||
206 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
207 | nvgpu_mutex_destroy(&dbg_s->ioctl_lock); | ||
208 | |||
209 | nvgpu_kfree(g, dbg_session_linux); | ||
210 | gk20a_put(g); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp) | ||
216 | { | ||
217 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
218 | struct nvgpu_os_linux, prof.cdev); | ||
219 | struct gk20a *g = &l->g; | ||
220 | |||
221 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
222 | return gk20a_dbg_gpu_do_dev_open(inode, filp, true /* is profiler */); | ||
223 | } | ||
224 | |||
225 | static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s, | ||
226 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
227 | { | ||
228 | int err; | ||
229 | struct gk20a *g = dbg_s->g; | ||
230 | |||
231 | nvgpu_log(g, gpu_dbg_fn, "timeout enable/disable = %d", args->enable); | ||
232 | |||
233 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
234 | err = nvgpu_dbg_timeout_enable(dbg_s, args->enable); | ||
235 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
236 | |||
237 | return err; | ||
238 | } | ||
239 | |||
240 | static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state( | ||
241 | struct dbg_session_gk20a *dbg_s, | ||
242 | struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args) | ||
243 | { | ||
244 | struct gk20a *g = dbg_s->g; | ||
245 | struct gr_gk20a *gr = &g->gr; | ||
246 | u32 sm_id; | ||
247 | struct channel_gk20a *ch; | ||
248 | struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; | ||
249 | struct nvgpu_gr_sm_error_state sm_error_state; | ||
250 | int err = 0; | ||
251 | |||
252 | /* Not currently supported in the virtual case */ | ||
253 | if (g->is_virtual) | ||
254 | return -ENOSYS; | ||
255 | |||
256 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
257 | if (!ch) | ||
258 | return -EINVAL; | ||
259 | |||
260 | sm_id = args->sm_id; | ||
261 | if (sm_id >= gr->no_of_sm) | ||
262 | return -EINVAL; | ||
263 | |||
264 | nvgpu_speculation_barrier(); | ||
265 | |||
266 | if (args->sm_error_state_record_size > 0) { | ||
267 | size_t read_size = sizeof(sm_error_state_record); | ||
268 | |||
269 | if (read_size > args->sm_error_state_record_size) | ||
270 | read_size = args->sm_error_state_record_size; | ||
271 | |||
272 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
273 | err = copy_from_user(&sm_error_state_record, | ||
274 | (void __user *)(uintptr_t) | ||
275 | args->sm_error_state_record_mem, | ||
276 | read_size); | ||
277 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
278 | if (err) | ||
279 | return -ENOMEM; | ||
280 | } | ||
281 | |||
282 | err = gk20a_busy(g); | ||
283 | if (err) | ||
284 | return err; | ||
285 | |||
286 | sm_error_state.hww_global_esr = | ||
287 | sm_error_state_record.hww_global_esr; | ||
288 | sm_error_state.hww_warp_esr = | ||
289 | sm_error_state_record.hww_warp_esr; | ||
290 | sm_error_state.hww_warp_esr_pc = | ||
291 | sm_error_state_record.hww_warp_esr_pc; | ||
292 | sm_error_state.hww_global_esr_report_mask = | ||
293 | sm_error_state_record.hww_global_esr_report_mask; | ||
294 | sm_error_state.hww_warp_esr_report_mask = | ||
295 | sm_error_state_record.hww_warp_esr_report_mask; | ||
296 | |||
297 | err = gr_gk20a_elpg_protected_call(g, | ||
298 | g->ops.gr.update_sm_error_state(g, ch, | ||
299 | sm_id, &sm_error_state)); | ||
300 | |||
301 | gk20a_idle(g); | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | |||
306 | |||
307 | static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | ||
308 | struct dbg_session_gk20a *dbg_s, | ||
309 | struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) | ||
310 | { | ||
311 | struct gk20a *g = dbg_s->g; | ||
312 | struct gr_gk20a *gr = &g->gr; | ||
313 | struct nvgpu_gr_sm_error_state *sm_error_state; | ||
314 | struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; | ||
315 | u32 sm_id; | ||
316 | int err = 0; | ||
317 | |||
318 | sm_id = args->sm_id; | ||
319 | if (sm_id >= gr->no_of_sm) | ||
320 | return -EINVAL; | ||
321 | |||
322 | nvgpu_speculation_barrier(); | ||
323 | |||
324 | sm_error_state = gr->sm_error_states + sm_id; | ||
325 | sm_error_state_record.hww_global_esr = | ||
326 | sm_error_state->hww_global_esr; | ||
327 | sm_error_state_record.hww_warp_esr = | ||
328 | sm_error_state->hww_warp_esr; | ||
329 | sm_error_state_record.hww_warp_esr_pc = | ||
330 | sm_error_state->hww_warp_esr_pc; | ||
331 | sm_error_state_record.hww_global_esr_report_mask = | ||
332 | sm_error_state->hww_global_esr_report_mask; | ||
333 | sm_error_state_record.hww_warp_esr_report_mask = | ||
334 | sm_error_state->hww_warp_esr_report_mask; | ||
335 | |||
336 | if (args->sm_error_state_record_size > 0) { | ||
337 | size_t write_size = sizeof(*sm_error_state); | ||
338 | |||
339 | if (write_size > args->sm_error_state_record_size) | ||
340 | write_size = args->sm_error_state_record_size; | ||
341 | |||
342 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
343 | err = copy_to_user((void __user *)(uintptr_t) | ||
344 | args->sm_error_state_record_mem, | ||
345 | &sm_error_state_record, | ||
346 | write_size); | ||
347 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
348 | if (err) { | ||
349 | nvgpu_err(g, "copy_to_user failed!"); | ||
350 | return err; | ||
351 | } | ||
352 | |||
353 | args->sm_error_state_record_size = write_size; | ||
354 | } | ||
355 | |||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | |||
360 | static int nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type( | ||
361 | struct dbg_session_gk20a *dbg_s, | ||
362 | struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *args) | ||
363 | { | ||
364 | struct gk20a *g = dbg_s->g; | ||
365 | |||
366 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
367 | |||
368 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
369 | |||
370 | dbg_s->broadcast_stop_trigger = (args->broadcast != 0); | ||
371 | |||
372 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | ||
378 | int timeout_mode) | ||
379 | { | ||
380 | struct gk20a *g = dbg_s->g; | ||
381 | int err = 0; | ||
382 | |||
383 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", | ||
384 | timeout_mode); | ||
385 | |||
386 | switch (timeout_mode) { | ||
387 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: | ||
388 | if (dbg_s->is_timeout_disabled == true) | ||
389 | nvgpu_atomic_dec(&g->timeouts_disabled_refcount); | ||
390 | dbg_s->is_timeout_disabled = false; | ||
391 | break; | ||
392 | |||
393 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE: | ||
394 | if (dbg_s->is_timeout_disabled == false) | ||
395 | nvgpu_atomic_inc(&g->timeouts_disabled_refcount); | ||
396 | dbg_s->is_timeout_disabled = true; | ||
397 | break; | ||
398 | |||
399 | default: | ||
400 | nvgpu_err(g, | ||
401 | "unrecognized dbg gpu timeout mode : 0x%x", | ||
402 | timeout_mode); | ||
403 | err = -EINVAL; | ||
404 | break; | ||
405 | } | ||
406 | |||
407 | if (!err) | ||
408 | nvgpu_log(g, gpu_dbg_gpu_dbg, "dbg is timeout disabled %s, " | ||
409 | "timeouts disabled refcount %d", | ||
410 | dbg_s->is_timeout_disabled ? "true" : "false", | ||
411 | nvgpu_atomic_read(&g->timeouts_disabled_refcount)); | ||
412 | return err; | ||
413 | } | ||
414 | |||
415 | static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, | ||
416 | struct file *filp, bool is_profiler) | ||
417 | { | ||
418 | struct nvgpu_os_linux *l; | ||
419 | struct dbg_session_gk20a_linux *dbg_session_linux; | ||
420 | struct dbg_session_gk20a *dbg_s; | ||
421 | struct gk20a *g; | ||
422 | |||
423 | struct device *dev; | ||
424 | |||
425 | int err; | ||
426 | |||
427 | if (!is_profiler) | ||
428 | l = container_of(inode->i_cdev, | ||
429 | struct nvgpu_os_linux, dbg.cdev); | ||
430 | else | ||
431 | l = container_of(inode->i_cdev, | ||
432 | struct nvgpu_os_linux, prof.cdev); | ||
433 | g = gk20a_get(&l->g); | ||
434 | if (!g) | ||
435 | return -ENODEV; | ||
436 | |||
437 | dev = dev_from_gk20a(g); | ||
438 | |||
439 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", g->name); | ||
440 | |||
441 | err = alloc_session(g, &dbg_session_linux); | ||
442 | if (err) | ||
443 | goto free_ref; | ||
444 | |||
445 | dbg_s = &dbg_session_linux->dbg_s; | ||
446 | |||
447 | filp->private_data = dbg_session_linux; | ||
448 | dbg_session_linux->dev = dev; | ||
449 | dbg_s->g = g; | ||
450 | dbg_s->is_profiler = is_profiler; | ||
451 | dbg_s->is_pg_disabled = false; | ||
452 | dbg_s->is_timeout_disabled = false; | ||
453 | |||
454 | nvgpu_cond_init(&dbg_s->dbg_events.wait_queue); | ||
455 | nvgpu_init_list_node(&dbg_s->ch_list); | ||
456 | err = nvgpu_mutex_init(&dbg_s->ch_list_lock); | ||
457 | if (err) | ||
458 | goto err_free_session; | ||
459 | err = nvgpu_mutex_init(&dbg_s->ioctl_lock); | ||
460 | if (err) | ||
461 | goto err_destroy_lock; | ||
462 | dbg_s->dbg_events.events_enabled = false; | ||
463 | dbg_s->dbg_events.num_pending_events = 0; | ||
464 | |||
465 | return 0; | ||
466 | |||
467 | err_destroy_lock: | ||
468 | nvgpu_mutex_destroy(&dbg_s->ch_list_lock); | ||
469 | err_free_session: | ||
470 | nvgpu_kfree(g, dbg_session_linux); | ||
471 | free_ref: | ||
472 | gk20a_put(g); | ||
473 | return err; | ||
474 | } | ||
475 | |||
476 | void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) | ||
477 | { | ||
478 | nvgpu_cond_broadcast_interruptible(&dbg_s->dbg_events.wait_queue); | ||
479 | } | ||
480 | |||
481 | static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
482 | struct dbg_session_channel_data *ch_data) | ||
483 | { | ||
484 | struct gk20a *g = dbg_s->g; | ||
485 | int chid; | ||
486 | struct dbg_session_data *session_data; | ||
487 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
488 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
489 | |||
490 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
491 | |||
492 | chid = ch_data->chid; | ||
493 | |||
494 | /* If there's a profiler ctx reservation record associated with this | ||
495 | * session/channel pair, release it. | ||
496 | */ | ||
497 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
498 | dbg_profiler_object_data, prof_obj_entry) { | ||
499 | if ((prof_obj->session_id == dbg_s->id) && | ||
500 | (prof_obj->ch->chid == chid)) { | ||
501 | if (prof_obj->has_reservation) { | ||
502 | g->ops.dbg_session_ops. | ||
503 | release_profiler_reservation(dbg_s, prof_obj); | ||
504 | } | ||
505 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
506 | nvgpu_kfree(g, prof_obj); | ||
507 | } | ||
508 | } | ||
509 | |||
510 | nvgpu_list_del(&ch_data->ch_entry); | ||
511 | |||
512 | session_data = ch_data->session_data; | ||
513 | nvgpu_list_del(&session_data->dbg_s_entry); | ||
514 | nvgpu_kfree(dbg_s->g, session_data); | ||
515 | |||
516 | ch_data_linux = container_of(ch_data, struct dbg_session_channel_data_linux, | ||
517 | ch_data); | ||
518 | |||
519 | fput(ch_data_linux->ch_f); | ||
520 | nvgpu_kfree(dbg_s->g, ch_data_linux); | ||
521 | |||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static int dbg_bind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
526 | struct nvgpu_dbg_gpu_bind_channel_args *args) | ||
527 | { | ||
528 | struct file *f; | ||
529 | struct gk20a *g = dbg_s->g; | ||
530 | struct channel_gk20a *ch; | ||
531 | struct dbg_session_channel_data_linux *ch_data_linux; | ||
532 | struct dbg_session_data *session_data; | ||
533 | int err = 0; | ||
534 | |||
535 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
536 | g->name, args->channel_fd); | ||
537 | |||
538 | /* | ||
539 | * Although gk20a_get_channel_from_file gives us a channel ref, need to | ||
540 | * hold a ref to the file during the session lifetime. See comment in | ||
541 | * struct dbg_session_channel_data. | ||
542 | */ | ||
543 | f = fget(args->channel_fd); | ||
544 | if (!f) | ||
545 | return -ENODEV; | ||
546 | |||
547 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
548 | if (!ch) { | ||
549 | nvgpu_log_fn(g, "no channel found for fd"); | ||
550 | err = -EINVAL; | ||
551 | goto out_fput; | ||
552 | } | ||
553 | |||
554 | nvgpu_log_fn(g, "%s hwchid=%d", g->name, ch->chid); | ||
555 | |||
556 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
557 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
558 | |||
559 | ch_data_linux = nvgpu_kzalloc(g, sizeof(*ch_data_linux)); | ||
560 | if (!ch_data_linux) { | ||
561 | err = -ENOMEM; | ||
562 | goto out_chput; | ||
563 | } | ||
564 | ch_data_linux->ch_f = f; | ||
565 | ch_data_linux->ch_data.channel_fd = args->channel_fd; | ||
566 | ch_data_linux->ch_data.chid = ch->chid; | ||
567 | ch_data_linux->ch_data.unbind_single_channel = dbg_unbind_single_channel_gk20a; | ||
568 | nvgpu_init_list_node(&ch_data_linux->ch_data.ch_entry); | ||
569 | |||
570 | session_data = nvgpu_kzalloc(g, sizeof(*session_data)); | ||
571 | if (!session_data) { | ||
572 | err = -ENOMEM; | ||
573 | goto out_kfree; | ||
574 | } | ||
575 | session_data->dbg_s = dbg_s; | ||
576 | nvgpu_init_list_node(&session_data->dbg_s_entry); | ||
577 | ch_data_linux->ch_data.session_data = session_data; | ||
578 | |||
579 | nvgpu_list_add(&session_data->dbg_s_entry, &ch->dbg_s_list); | ||
580 | |||
581 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
582 | nvgpu_list_add_tail(&ch_data_linux->ch_data.ch_entry, &dbg_s->ch_list); | ||
583 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
584 | |||
585 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
586 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
587 | |||
588 | gk20a_channel_put(ch); | ||
589 | |||
590 | return 0; | ||
591 | |||
592 | out_kfree: | ||
593 | nvgpu_kfree(g, ch_data_linux); | ||
594 | out_chput: | ||
595 | gk20a_channel_put(ch); | ||
596 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
597 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
598 | out_fput: | ||
599 | fput(f); | ||
600 | return err; | ||
601 | } | ||
602 | |||
603 | static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) | ||
604 | { | ||
605 | struct dbg_session_channel_data *ch_data, *tmp; | ||
606 | struct gk20a *g = dbg_s->g; | ||
607 | |||
608 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
609 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
610 | nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list, | ||
611 | dbg_session_channel_data, ch_entry) | ||
612 | ch_data->unbind_single_channel(dbg_s, ch_data); | ||
613 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
614 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
615 | |||
616 | return 0; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
621 | * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
622 | */ | ||
623 | static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) | ||
624 | { | ||
625 | switch (regops_op) { | ||
626 | case REGOP(READ_32): | ||
627 | return REGOP_LINUX(READ_32); | ||
628 | case REGOP(WRITE_32): | ||
629 | return REGOP_LINUX(WRITE_32); | ||
630 | case REGOP(READ_64): | ||
631 | return REGOP_LINUX(READ_64); | ||
632 | case REGOP(WRITE_64): | ||
633 | return REGOP_LINUX(WRITE_64); | ||
634 | case REGOP(READ_08): | ||
635 | return REGOP_LINUX(READ_08); | ||
636 | case REGOP(WRITE_08): | ||
637 | return REGOP_LINUX(WRITE_08); | ||
638 | } | ||
639 | |||
640 | return regops_op; | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* | ||
645 | * into common regops op values of the form of NVGPU_DBG_REG_OP_* | ||
646 | */ | ||
647 | static u32 nvgpu_get_regops_op_values_common(u32 regops_op) | ||
648 | { | ||
649 | switch (regops_op) { | ||
650 | case REGOP_LINUX(READ_32): | ||
651 | return REGOP(READ_32); | ||
652 | case REGOP_LINUX(WRITE_32): | ||
653 | return REGOP(WRITE_32); | ||
654 | case REGOP_LINUX(READ_64): | ||
655 | return REGOP(READ_64); | ||
656 | case REGOP_LINUX(WRITE_64): | ||
657 | return REGOP(WRITE_64); | ||
658 | case REGOP_LINUX(READ_08): | ||
659 | return REGOP(READ_08); | ||
660 | case REGOP_LINUX(WRITE_08): | ||
661 | return REGOP(WRITE_08); | ||
662 | } | ||
663 | |||
664 | return regops_op; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Convert common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
669 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
670 | */ | ||
671 | static u32 nvgpu_get_regops_type_values_linux(u32 regops_type) | ||
672 | { | ||
673 | switch (regops_type) { | ||
674 | case REGOP(TYPE_GLOBAL): | ||
675 | return REGOP_LINUX(TYPE_GLOBAL); | ||
676 | case REGOP(TYPE_GR_CTX): | ||
677 | return REGOP_LINUX(TYPE_GR_CTX); | ||
678 | case REGOP(TYPE_GR_CTX_TPC): | ||
679 | return REGOP_LINUX(TYPE_GR_CTX_TPC); | ||
680 | case REGOP(TYPE_GR_CTX_SM): | ||
681 | return REGOP_LINUX(TYPE_GR_CTX_SM); | ||
682 | case REGOP(TYPE_GR_CTX_CROP): | ||
683 | return REGOP_LINUX(TYPE_GR_CTX_CROP); | ||
684 | case REGOP(TYPE_GR_CTX_ZROP): | ||
685 | return REGOP_LINUX(TYPE_GR_CTX_ZROP); | ||
686 | case REGOP(TYPE_GR_CTX_QUAD): | ||
687 | return REGOP_LINUX(TYPE_GR_CTX_QUAD); | ||
688 | } | ||
689 | |||
690 | return regops_type; | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Convert linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_TYPE_* | ||
695 | * into common regops type values of the form of NVGPU_DBG_REG_OP_TYPE_* | ||
696 | */ | ||
697 | static u32 nvgpu_get_regops_type_values_common(u32 regops_type) | ||
698 | { | ||
699 | switch (regops_type) { | ||
700 | case REGOP_LINUX(TYPE_GLOBAL): | ||
701 | return REGOP(TYPE_GLOBAL); | ||
702 | case REGOP_LINUX(TYPE_GR_CTX): | ||
703 | return REGOP(TYPE_GR_CTX); | ||
704 | case REGOP_LINUX(TYPE_GR_CTX_TPC): | ||
705 | return REGOP(TYPE_GR_CTX_TPC); | ||
706 | case REGOP_LINUX(TYPE_GR_CTX_SM): | ||
707 | return REGOP(TYPE_GR_CTX_SM); | ||
708 | case REGOP_LINUX(TYPE_GR_CTX_CROP): | ||
709 | return REGOP(TYPE_GR_CTX_CROP); | ||
710 | case REGOP_LINUX(TYPE_GR_CTX_ZROP): | ||
711 | return REGOP(TYPE_GR_CTX_ZROP); | ||
712 | case REGOP_LINUX(TYPE_GR_CTX_QUAD): | ||
713 | return REGOP(TYPE_GR_CTX_QUAD); | ||
714 | } | ||
715 | |||
716 | return regops_type; | ||
717 | } | ||
718 | |||
719 | /* | ||
720 | * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
721 | * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
722 | */ | ||
723 | static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) | ||
724 | { | ||
725 | switch (regops_status) { | ||
726 | case REGOP(STATUS_SUCCESS): | ||
727 | return REGOP_LINUX(STATUS_SUCCESS); | ||
728 | case REGOP(STATUS_INVALID_OP): | ||
729 | return REGOP_LINUX(STATUS_INVALID_OP); | ||
730 | case REGOP(STATUS_INVALID_TYPE): | ||
731 | return REGOP_LINUX(STATUS_INVALID_TYPE); | ||
732 | case REGOP(STATUS_INVALID_OFFSET): | ||
733 | return REGOP_LINUX(STATUS_INVALID_OFFSET); | ||
734 | case REGOP(STATUS_UNSUPPORTED_OP): | ||
735 | return REGOP_LINUX(STATUS_UNSUPPORTED_OP); | ||
736 | case REGOP(STATUS_INVALID_MASK ): | ||
737 | return REGOP_LINUX(STATUS_INVALID_MASK); | ||
738 | } | ||
739 | |||
740 | return regops_status; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* | ||
745 | * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* | ||
746 | */ | ||
747 | static u32 nvgpu_get_regops_status_values_common(u32 regops_status) | ||
748 | { | ||
749 | switch (regops_status) { | ||
750 | case REGOP_LINUX(STATUS_SUCCESS): | ||
751 | return REGOP(STATUS_SUCCESS); | ||
752 | case REGOP_LINUX(STATUS_INVALID_OP): | ||
753 | return REGOP(STATUS_INVALID_OP); | ||
754 | case REGOP_LINUX(STATUS_INVALID_TYPE): | ||
755 | return REGOP(STATUS_INVALID_TYPE); | ||
756 | case REGOP_LINUX(STATUS_INVALID_OFFSET): | ||
757 | return REGOP(STATUS_INVALID_OFFSET); | ||
758 | case REGOP_LINUX(STATUS_UNSUPPORTED_OP): | ||
759 | return REGOP(STATUS_UNSUPPORTED_OP); | ||
760 | case REGOP_LINUX(STATUS_INVALID_MASK ): | ||
761 | return REGOP(STATUS_INVALID_MASK); | ||
762 | } | ||
763 | |||
764 | return regops_status; | ||
765 | } | ||
766 | |||
767 | static int nvgpu_get_regops_data_common(struct nvgpu_dbg_gpu_reg_op *in, | ||
768 | struct nvgpu_dbg_reg_op *out, u32 num_ops) | ||
769 | { | ||
770 | u32 i; | ||
771 | |||
772 | if(in == NULL || out == NULL) | ||
773 | return -ENOMEM; | ||
774 | |||
775 | for (i = 0; i < num_ops; i++) { | ||
776 | out[i].op = nvgpu_get_regops_op_values_common(in[i].op); | ||
777 | out[i].type = nvgpu_get_regops_type_values_common(in[i].type); | ||
778 | out[i].status = nvgpu_get_regops_status_values_common(in[i].status); | ||
779 | out[i].quad = in[i].quad; | ||
780 | out[i].group_mask = in[i].group_mask; | ||
781 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
782 | out[i].offset = in[i].offset; | ||
783 | out[i].value_lo = in[i].value_lo; | ||
784 | out[i].value_hi = in[i].value_hi; | ||
785 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
786 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
787 | } | ||
788 | |||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | static int nvgpu_get_regops_data_linux(struct nvgpu_dbg_reg_op *in, | ||
793 | struct nvgpu_dbg_gpu_reg_op *out, u32 num_ops) | ||
794 | { | ||
795 | u32 i; | ||
796 | |||
797 | if(in == NULL || out == NULL) | ||
798 | return -ENOMEM; | ||
799 | |||
800 | for (i = 0; i < num_ops; i++) { | ||
801 | out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); | ||
802 | out[i].type = nvgpu_get_regops_type_values_linux(in[i].type); | ||
803 | out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); | ||
804 | out[i].quad = in[i].quad; | ||
805 | out[i].group_mask = in[i].group_mask; | ||
806 | out[i].sub_group_mask = in[i].sub_group_mask; | ||
807 | out[i].offset = in[i].offset; | ||
808 | out[i].value_lo = in[i].value_lo; | ||
809 | out[i].value_hi = in[i].value_hi; | ||
810 | out[i].and_n_mask_lo = in[i].and_n_mask_lo; | ||
811 | out[i].and_n_mask_hi = in[i].and_n_mask_hi; | ||
812 | } | ||
813 | |||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | ||
818 | struct nvgpu_dbg_gpu_exec_reg_ops_args *args) | ||
819 | { | ||
820 | int err = 0, powergate_err = 0; | ||
821 | bool is_pg_disabled = false; | ||
822 | |||
823 | struct gk20a *g = dbg_s->g; | ||
824 | struct channel_gk20a *ch; | ||
825 | |||
826 | nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); | ||
827 | |||
828 | if (args->num_ops > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { | ||
829 | nvgpu_err(g, "regops limit exceeded"); | ||
830 | return -EINVAL; | ||
831 | } | ||
832 | |||
833 | if (args->num_ops == 0) { | ||
834 | /* Nothing to do */ | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { | ||
839 | nvgpu_err(g, "reg ops work buffer not allocated"); | ||
840 | return -ENODEV; | ||
841 | } | ||
842 | |||
843 | if (!dbg_s->id) { | ||
844 | nvgpu_err(g, "can't call reg_ops on an unbound debugger session"); | ||
845 | return -EINVAL; | ||
846 | } | ||
847 | |||
848 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
849 | if (!dbg_s->is_profiler && !ch) { | ||
850 | nvgpu_err(g, "bind a channel before regops for a debugging session"); | ||
851 | return -EINVAL; | ||
852 | } | ||
853 | |||
854 | /* be sure that ctx info is in place */ | ||
855 | if (!g->is_virtual && | ||
856 | !gr_context_info_available(dbg_s, &g->gr)) { | ||
857 | nvgpu_err(g, "gr context data not available"); | ||
858 | return -ENODEV; | ||
859 | } | ||
860 | |||
861 | /* since exec_reg_ops sends methods to the ucode, it must take the | ||
862 | * global gpu lock to protect against mixing methods from debug sessions | ||
863 | * on other channels */ | ||
864 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
865 | |||
866 | if (!dbg_s->is_pg_disabled && !g->is_virtual) { | ||
867 | /* In the virtual case, the server will handle | ||
868 | * disabling/enabling powergating when processing reg ops | ||
869 | */ | ||
870 | powergate_err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, | ||
871 | true); | ||
872 | is_pg_disabled = true; | ||
873 | } | ||
874 | |||
875 | if (!powergate_err) { | ||
876 | u64 ops_offset = 0; /* index offset */ | ||
877 | |||
878 | struct nvgpu_dbg_gpu_reg_op *linux_fragment = NULL; | ||
879 | |||
880 | linux_fragment = nvgpu_kzalloc(g, g->dbg_regops_tmp_buf_ops * | ||
881 | sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
882 | |||
883 | if (!linux_fragment) | ||
884 | return -ENOMEM; | ||
885 | |||
886 | while (ops_offset < args->num_ops && !err) { | ||
887 | const u64 num_ops = | ||
888 | min(args->num_ops - ops_offset, | ||
889 | (u64)(g->dbg_regops_tmp_buf_ops)); | ||
890 | const u64 fragment_size = | ||
891 | num_ops * sizeof(struct nvgpu_dbg_gpu_reg_op); | ||
892 | |||
893 | void __user *const fragment = | ||
894 | (void __user *)(uintptr_t) | ||
895 | (args->ops + | ||
896 | ops_offset * sizeof(struct nvgpu_dbg_gpu_reg_op)); | ||
897 | |||
898 | nvgpu_log_fn(g, "Regops fragment: start_op=%llu ops=%llu", | ||
899 | ops_offset, num_ops); | ||
900 | |||
901 | nvgpu_log_fn(g, "Copying regops from userspace"); | ||
902 | |||
903 | if (copy_from_user(linux_fragment, | ||
904 | fragment, fragment_size)) { | ||
905 | nvgpu_err(g, "copy_from_user failed!"); | ||
906 | err = -EFAULT; | ||
907 | break; | ||
908 | } | ||
909 | |||
910 | err = nvgpu_get_regops_data_common(linux_fragment, | ||
911 | g->dbg_regops_tmp_buf, num_ops); | ||
912 | |||
913 | if (err) | ||
914 | break; | ||
915 | |||
916 | err = g->ops.dbg_session_ops.exec_reg_ops( | ||
917 | dbg_s, g->dbg_regops_tmp_buf, num_ops); | ||
918 | |||
919 | err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, | ||
920 | linux_fragment, num_ops); | ||
921 | |||
922 | if (err) | ||
923 | break; | ||
924 | |||
925 | nvgpu_log_fn(g, "Copying result to userspace"); | ||
926 | |||
927 | if (copy_to_user(fragment, linux_fragment, | ||
928 | fragment_size)) { | ||
929 | nvgpu_err(g, "copy_to_user failed!"); | ||
930 | err = -EFAULT; | ||
931 | break; | ||
932 | } | ||
933 | |||
934 | ops_offset += num_ops; | ||
935 | } | ||
936 | |||
937 | nvgpu_kfree(g, linux_fragment); | ||
938 | |||
939 | /* enable powergate, if previously disabled */ | ||
940 | if (is_pg_disabled) { | ||
941 | powergate_err = | ||
942 | g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, | ||
943 | false); | ||
944 | } | ||
945 | } | ||
946 | |||
947 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
948 | |||
949 | if (!err && powergate_err) | ||
950 | err = powergate_err; | ||
951 | |||
952 | if (err) | ||
953 | nvgpu_err(g, "dbg regops failed"); | ||
954 | |||
955 | return err; | ||
956 | } | ||
957 | |||
958 | static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s, | ||
959 | struct nvgpu_dbg_gpu_powergate_args *args) | ||
960 | { | ||
961 | int err; | ||
962 | struct gk20a *g = dbg_s->g; | ||
963 | nvgpu_log_fn(g, "%s powergate mode = %d", | ||
964 | g->name, args->mode); | ||
965 | |||
966 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
967 | if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_DISABLE) { | ||
968 | err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, true); | ||
969 | } else if (args->mode == NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE) { | ||
970 | err = g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, false); | ||
971 | } else { | ||
972 | nvgpu_err(g, "invalid powergate mode"); | ||
973 | err = -EINVAL; | ||
974 | } | ||
975 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
976 | return err; | ||
977 | } | ||
978 | |||
979 | static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
980 | struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args) | ||
981 | { | ||
982 | int err; | ||
983 | struct gk20a *g = dbg_s->g; | ||
984 | struct channel_gk20a *ch_gk20a; | ||
985 | |||
986 | nvgpu_log_fn(g, "%s smpc ctxsw mode = %d", | ||
987 | g->name, args->mode); | ||
988 | |||
989 | err = gk20a_busy(g); | ||
990 | if (err) { | ||
991 | nvgpu_err(g, "failed to poweron"); | ||
992 | return err; | ||
993 | } | ||
994 | |||
995 | /* Take the global lock, since we'll be doing global regops */ | ||
996 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
997 | |||
998 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
999 | if (!ch_gk20a) { | ||
1000 | nvgpu_err(g, | ||
1001 | "no bound channel for smpc ctxsw mode update"); | ||
1002 | err = -EINVAL; | ||
1003 | goto clean_up; | ||
1004 | } | ||
1005 | |||
1006 | err = g->ops.gr.update_smpc_ctxsw_mode(g, ch_gk20a, | ||
1007 | args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW); | ||
1008 | if (err) { | ||
1009 | nvgpu_err(g, | ||
1010 | "error (%d) during smpc ctxsw mode update", err); | ||
1011 | goto clean_up; | ||
1012 | } | ||
1013 | |||
1014 | err = g->ops.regops.apply_smpc_war(dbg_s); | ||
1015 | clean_up: | ||
1016 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1017 | gk20a_idle(g); | ||
1018 | return err; | ||
1019 | } | ||
1020 | |||
1021 | static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | ||
1022 | struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args) | ||
1023 | { | ||
1024 | int err; | ||
1025 | struct gk20a *g = dbg_s->g; | ||
1026 | struct channel_gk20a *ch_gk20a; | ||
1027 | |||
1028 | nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode); | ||
1029 | |||
1030 | /* Must have a valid reservation to enable/disable hwpm cxtsw. | ||
1031 | * Just print an error message for now, but eventually this should | ||
1032 | * return an error, at the point where all client sw has been | ||
1033 | * cleaned up. | ||
1034 | */ | ||
1035 | if (!dbg_s->has_profiler_reservation) { | ||
1036 | nvgpu_err(g, | ||
1037 | "session doesn't have a valid reservation"); | ||
1038 | } | ||
1039 | |||
1040 | err = gk20a_busy(g); | ||
1041 | if (err) { | ||
1042 | nvgpu_err(g, "failed to poweron"); | ||
1043 | return err; | ||
1044 | } | ||
1045 | |||
1046 | /* Take the global lock, since we'll be doing global regops */ | ||
1047 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1048 | |||
1049 | ch_gk20a = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1050 | if (!ch_gk20a) { | ||
1051 | nvgpu_err(g, | ||
1052 | "no bound channel for pm ctxsw mode update"); | ||
1053 | err = -EINVAL; | ||
1054 | goto clean_up; | ||
1055 | } | ||
1056 | if (!dbg_s->is_pg_disabled) { | ||
1057 | nvgpu_err(g, "powergate is not disabled"); | ||
1058 | err = -ENOSYS; | ||
1059 | goto clean_up; | ||
1060 | } | ||
1061 | err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0, | ||
1062 | args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW); | ||
1063 | if (err) | ||
1064 | nvgpu_err(g, | ||
1065 | "error (%d) during pm ctxsw mode update", err); | ||
1066 | /* gk20a would require a WAR to set the core PM_ENABLE bit, not | ||
1067 | * added here with gk20a being deprecated | ||
1068 | */ | ||
1069 | clean_up: | ||
1070 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1071 | gk20a_idle(g); | ||
1072 | return err; | ||
1073 | } | ||
1074 | |||
1075 | static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | ||
1076 | struct dbg_session_gk20a *dbg_s, | ||
1077 | struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args) | ||
1078 | { | ||
1079 | struct gk20a *g = dbg_s->g; | ||
1080 | struct channel_gk20a *ch; | ||
1081 | int err = 0, action = args->mode; | ||
1082 | |||
1083 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "action: %d", args->mode); | ||
1084 | |||
1085 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1086 | if (!ch) | ||
1087 | return -EINVAL; | ||
1088 | |||
1089 | err = gk20a_busy(g); | ||
1090 | if (err) { | ||
1091 | nvgpu_err(g, "failed to poweron"); | ||
1092 | return err; | ||
1093 | } | ||
1094 | |||
1095 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1096 | |||
1097 | /* Suspend GPU context switching */ | ||
1098 | err = gr_gk20a_disable_ctxsw(g); | ||
1099 | if (err) { | ||
1100 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
1101 | /* this should probably be ctx-fatal... */ | ||
1102 | goto clean_up; | ||
1103 | } | ||
1104 | |||
1105 | switch (action) { | ||
1106 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: | ||
1107 | gr_gk20a_suspend_context(ch); | ||
1108 | break; | ||
1109 | |||
1110 | case NVGPU_DBG_GPU_RESUME_ALL_SMS: | ||
1111 | gr_gk20a_resume_context(ch); | ||
1112 | break; | ||
1113 | } | ||
1114 | |||
1115 | err = gr_gk20a_enable_ctxsw(g); | ||
1116 | if (err) | ||
1117 | nvgpu_err(g, "unable to restart ctxsw!"); | ||
1118 | |||
1119 | clean_up: | ||
1120 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1121 | gk20a_idle(g); | ||
1122 | |||
1123 | return err; | ||
1124 | } | ||
1125 | |||
1126 | static int nvgpu_ioctl_allocate_profiler_object( | ||
1127 | struct dbg_session_gk20a_linux *dbg_session_linux, | ||
1128 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
1129 | { | ||
1130 | int err = 0; | ||
1131 | struct dbg_session_gk20a *dbg_s = &dbg_session_linux->dbg_s; | ||
1132 | struct gk20a *g = get_gk20a(dbg_session_linux->dev); | ||
1133 | struct dbg_profiler_object_data *prof_obj; | ||
1134 | |||
1135 | nvgpu_log_fn(g, "%s", g->name); | ||
1136 | |||
1137 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1138 | |||
1139 | err = alloc_profiler(g, &prof_obj); | ||
1140 | if (err) | ||
1141 | goto clean_up; | ||
1142 | |||
1143 | prof_obj->session_id = dbg_s->id; | ||
1144 | |||
1145 | if (dbg_s->is_profiler) | ||
1146 | prof_obj->ch = NULL; | ||
1147 | else { | ||
1148 | prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1149 | if (prof_obj->ch == NULL) { | ||
1150 | nvgpu_err(g, | ||
1151 | "bind a channel for dbg session"); | ||
1152 | nvgpu_kfree(g, prof_obj); | ||
1153 | err = -EINVAL; | ||
1154 | goto clean_up; | ||
1155 | } | ||
1156 | } | ||
1157 | |||
1158 | /* Return handle to client */ | ||
1159 | args->profiler_handle = prof_obj->prof_handle; | ||
1160 | |||
1161 | nvgpu_init_list_node(&prof_obj->prof_obj_entry); | ||
1162 | |||
1163 | nvgpu_list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); | ||
1164 | clean_up: | ||
1165 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1166 | return err; | ||
1167 | } | ||
1168 | |||
1169 | static int nvgpu_ioctl_free_profiler_object( | ||
1170 | struct dbg_session_gk20a_linux *dbg_s_linux, | ||
1171 | struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) | ||
1172 | { | ||
1173 | int err = 0; | ||
1174 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
1175 | struct gk20a *g = get_gk20a(dbg_s_linux->dev); | ||
1176 | struct dbg_profiler_object_data *prof_obj, *tmp_obj; | ||
1177 | bool obj_found = false; | ||
1178 | |||
1179 | nvgpu_log_fn(g, "%s session_id = %d profiler_handle = %x", | ||
1180 | g->name, dbg_s->id, args->profiler_handle); | ||
1181 | |||
1182 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1183 | |||
1184 | /* Remove profiler object from the list, if a match is found */ | ||
1185 | nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, | ||
1186 | dbg_profiler_object_data, prof_obj_entry) { | ||
1187 | if (prof_obj->prof_handle == args->profiler_handle) { | ||
1188 | if (prof_obj->session_id != dbg_s->id) { | ||
1189 | nvgpu_err(g, | ||
1190 | "invalid handle %x", | ||
1191 | args->profiler_handle); | ||
1192 | err = -EINVAL; | ||
1193 | break; | ||
1194 | } | ||
1195 | if (prof_obj->has_reservation) | ||
1196 | g->ops.dbg_session_ops. | ||
1197 | release_profiler_reservation(dbg_s, prof_obj); | ||
1198 | nvgpu_list_del(&prof_obj->prof_obj_entry); | ||
1199 | nvgpu_kfree(g, prof_obj); | ||
1200 | obj_found = true; | ||
1201 | break; | ||
1202 | } | ||
1203 | } | ||
1204 | if (!obj_found) { | ||
1205 | nvgpu_err(g, "profiler %x not found", | ||
1206 | args->profiler_handle); | ||
1207 | err = -EINVAL; | ||
1208 | } | ||
1209 | |||
1210 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1211 | return err; | ||
1212 | } | ||
1213 | |||
1214 | static struct dbg_profiler_object_data *find_matching_prof_obj( | ||
1215 | struct dbg_session_gk20a *dbg_s, | ||
1216 | u32 profiler_handle) | ||
1217 | { | ||
1218 | struct gk20a *g = dbg_s->g; | ||
1219 | struct dbg_profiler_object_data *prof_obj; | ||
1220 | |||
1221 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1222 | dbg_profiler_object_data, prof_obj_entry) { | ||
1223 | if (prof_obj->prof_handle == profiler_handle) { | ||
1224 | if (prof_obj->session_id != dbg_s->id) { | ||
1225 | nvgpu_err(g, | ||
1226 | "invalid handle %x", | ||
1227 | profiler_handle); | ||
1228 | return NULL; | ||
1229 | } | ||
1230 | return prof_obj; | ||
1231 | } | ||
1232 | } | ||
1233 | return NULL; | ||
1234 | } | ||
1235 | |||
1236 | /* used in scenarios where the debugger session can take just the inter-session | ||
1237 | * lock for performance, but the profiler session must take the per-gpu lock | ||
1238 | * since it might not have an associated channel. */ | ||
1239 | static void gk20a_dbg_session_nvgpu_mutex_acquire(struct dbg_session_gk20a *dbg_s) | ||
1240 | { | ||
1241 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1242 | |||
1243 | if (dbg_s->is_profiler || !ch) | ||
1244 | nvgpu_mutex_acquire(&dbg_s->g->dbg_sessions_lock); | ||
1245 | else | ||
1246 | nvgpu_mutex_acquire(&ch->dbg_s_lock); | ||
1247 | } | ||
1248 | |||
1249 | static void gk20a_dbg_session_nvgpu_mutex_release(struct dbg_session_gk20a *dbg_s) | ||
1250 | { | ||
1251 | struct channel_gk20a *ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1252 | |||
1253 | if (dbg_s->is_profiler || !ch) | ||
1254 | nvgpu_mutex_release(&dbg_s->g->dbg_sessions_lock); | ||
1255 | else | ||
1256 | nvgpu_mutex_release(&ch->dbg_s_lock); | ||
1257 | } | ||
1258 | |||
1259 | static void gk20a_dbg_gpu_events_enable(struct dbg_session_gk20a *dbg_s) | ||
1260 | { | ||
1261 | struct gk20a *g = dbg_s->g; | ||
1262 | |||
1263 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1264 | |||
1265 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1266 | |||
1267 | dbg_s->dbg_events.events_enabled = true; | ||
1268 | dbg_s->dbg_events.num_pending_events = 0; | ||
1269 | |||
1270 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1271 | } | ||
1272 | |||
1273 | static void gk20a_dbg_gpu_events_disable(struct dbg_session_gk20a *dbg_s) | ||
1274 | { | ||
1275 | struct gk20a *g = dbg_s->g; | ||
1276 | |||
1277 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1278 | |||
1279 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1280 | |||
1281 | dbg_s->dbg_events.events_enabled = false; | ||
1282 | dbg_s->dbg_events.num_pending_events = 0; | ||
1283 | |||
1284 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1285 | } | ||
1286 | |||
1287 | static void gk20a_dbg_gpu_events_clear(struct dbg_session_gk20a *dbg_s) | ||
1288 | { | ||
1289 | struct gk20a *g = dbg_s->g; | ||
1290 | |||
1291 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1292 | |||
1293 | gk20a_dbg_session_nvgpu_mutex_acquire(dbg_s); | ||
1294 | |||
1295 | if (dbg_s->dbg_events.events_enabled && | ||
1296 | dbg_s->dbg_events.num_pending_events > 0) | ||
1297 | dbg_s->dbg_events.num_pending_events--; | ||
1298 | |||
1299 | gk20a_dbg_session_nvgpu_mutex_release(dbg_s); | ||
1300 | } | ||
1301 | |||
1302 | |||
1303 | static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | ||
1304 | struct nvgpu_dbg_gpu_events_ctrl_args *args) | ||
1305 | { | ||
1306 | int ret = 0; | ||
1307 | struct channel_gk20a *ch; | ||
1308 | struct gk20a *g = dbg_s->g; | ||
1309 | |||
1310 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg events ctrl cmd %d", args->cmd); | ||
1311 | |||
1312 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1313 | if (!ch) { | ||
1314 | nvgpu_err(g, "no channel bound to dbg session"); | ||
1315 | return -EINVAL; | ||
1316 | } | ||
1317 | |||
1318 | switch (args->cmd) { | ||
1319 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | ||
1320 | gk20a_dbg_gpu_events_enable(dbg_s); | ||
1321 | break; | ||
1322 | |||
1323 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_DISABLE: | ||
1324 | gk20a_dbg_gpu_events_disable(dbg_s); | ||
1325 | break; | ||
1326 | |||
1327 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_CLEAR: | ||
1328 | gk20a_dbg_gpu_events_clear(dbg_s); | ||
1329 | break; | ||
1330 | |||
1331 | default: | ||
1332 | nvgpu_err(g, "unrecognized dbg gpu events ctrl cmd: 0x%x", | ||
1333 | args->cmd); | ||
1334 | ret = -EINVAL; | ||
1335 | break; | ||
1336 | } | ||
1337 | |||
1338 | return ret; | ||
1339 | } | ||
1340 | |||
1341 | static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | ||
1342 | struct nvgpu_dbg_gpu_perfbuf_map_args *args) | ||
1343 | { | ||
1344 | struct gk20a *g = dbg_s->g; | ||
1345 | struct mm_gk20a *mm = &g->mm; | ||
1346 | int err; | ||
1347 | u32 virt_size; | ||
1348 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
1349 | |||
1350 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1351 | |||
1352 | if (g->perfbuf.owner) { | ||
1353 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1354 | return -EBUSY; | ||
1355 | } | ||
1356 | |||
1357 | mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size, | ||
1358 | big_page_size << 10, | ||
1359 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
1360 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
1361 | false, false, "perfbuf"); | ||
1362 | if (!mm->perfbuf.vm) { | ||
1363 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1364 | return -ENOMEM; | ||
1365 | } | ||
1366 | |||
1367 | err = nvgpu_vm_map_buffer(mm->perfbuf.vm, | ||
1368 | args->dmabuf_fd, | ||
1369 | &args->offset, | ||
1370 | 0, | ||
1371 | 0, | ||
1372 | 0, | ||
1373 | 0, | ||
1374 | args->mapping_size, | ||
1375 | NULL); | ||
1376 | if (err) | ||
1377 | goto err_remove_vm; | ||
1378 | |||
1379 | /* perf output buffer may not cross a 4GB boundary */ | ||
1380 | virt_size = u64_lo32(args->mapping_size); | ||
1381 | if (u64_hi32(args->offset) != u64_hi32(args->offset + virt_size)) { | ||
1382 | err = -EINVAL; | ||
1383 | goto err_unmap; | ||
1384 | } | ||
1385 | |||
1386 | err = g->ops.dbg_session_ops.perfbuffer_enable(g, | ||
1387 | args->offset, virt_size); | ||
1388 | if (err) | ||
1389 | goto err_unmap; | ||
1390 | |||
1391 | g->perfbuf.owner = dbg_s; | ||
1392 | g->perfbuf.offset = args->offset; | ||
1393 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1394 | |||
1395 | return 0; | ||
1396 | |||
1397 | err_unmap: | ||
1398 | nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL); | ||
1399 | err_remove_vm: | ||
1400 | nvgpu_vm_put(mm->perfbuf.vm); | ||
1401 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1402 | return err; | ||
1403 | } | ||
1404 | |||
1405 | static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s, | ||
1406 | struct nvgpu_dbg_gpu_perfbuf_unmap_args *args) | ||
1407 | { | ||
1408 | struct gk20a *g = dbg_s->g; | ||
1409 | int err; | ||
1410 | |||
1411 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1412 | if ((g->perfbuf.owner != dbg_s) || | ||
1413 | (g->perfbuf.offset != args->offset)) { | ||
1414 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1415 | return -EINVAL; | ||
1416 | } | ||
1417 | |||
1418 | err = gk20a_perfbuf_release_locked(g, args->offset); | ||
1419 | |||
1420 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1421 | |||
1422 | return err; | ||
1423 | } | ||
1424 | |||
1425 | static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s, | ||
1426 | struct nvgpu_dbg_gpu_pc_sampling_args *args) | ||
1427 | { | ||
1428 | struct channel_gk20a *ch; | ||
1429 | struct gk20a *g = dbg_s->g; | ||
1430 | |||
1431 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1432 | if (!ch) | ||
1433 | return -EINVAL; | ||
1434 | |||
1435 | nvgpu_log_fn(g, " "); | ||
1436 | |||
1437 | return g->ops.gr.update_pc_sampling ? | ||
1438 | g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL; | ||
1439 | } | ||
1440 | |||
1441 | static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( | ||
1442 | struct dbg_session_gk20a *dbg_s, | ||
1443 | struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) | ||
1444 | { | ||
1445 | struct gk20a *g = dbg_s->g; | ||
1446 | struct gr_gk20a *gr = &g->gr; | ||
1447 | u32 sm_id; | ||
1448 | struct channel_gk20a *ch; | ||
1449 | int err = 0; | ||
1450 | |||
1451 | ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); | ||
1452 | if (!ch) | ||
1453 | return -EINVAL; | ||
1454 | |||
1455 | sm_id = args->sm_id; | ||
1456 | if (sm_id >= gr->no_of_sm) | ||
1457 | return -EINVAL; | ||
1458 | |||
1459 | nvgpu_speculation_barrier(); | ||
1460 | |||
1461 | err = gk20a_busy(g); | ||
1462 | if (err) | ||
1463 | return err; | ||
1464 | |||
1465 | err = gr_gk20a_elpg_protected_call(g, | ||
1466 | g->ops.gr.clear_sm_error_state(g, ch, sm_id)); | ||
1467 | |||
1468 | gk20a_idle(g); | ||
1469 | |||
1470 | return err; | ||
1471 | } | ||
1472 | |||
1473 | static int | ||
1474 | nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, | ||
1475 | struct nvgpu_dbg_gpu_suspend_resume_contexts_args *args) | ||
1476 | { | ||
1477 | struct gk20a *g = dbg_s->g; | ||
1478 | int err = 0; | ||
1479 | int ctx_resident_ch_fd = -1; | ||
1480 | |||
1481 | err = gk20a_busy(g); | ||
1482 | if (err) | ||
1483 | return err; | ||
1484 | |||
1485 | switch (args->action) { | ||
1486 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: | ||
1487 | err = g->ops.gr.suspend_contexts(g, dbg_s, | ||
1488 | &ctx_resident_ch_fd); | ||
1489 | break; | ||
1490 | |||
1491 | case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS: | ||
1492 | err = g->ops.gr.resume_contexts(g, dbg_s, | ||
1493 | &ctx_resident_ch_fd); | ||
1494 | break; | ||
1495 | } | ||
1496 | |||
1497 | if (ctx_resident_ch_fd < 0) { | ||
1498 | args->is_resident_context = 0; | ||
1499 | } else { | ||
1500 | args->is_resident_context = 1; | ||
1501 | args->resident_context_fd = ctx_resident_ch_fd; | ||
1502 | } | ||
1503 | |||
1504 | gk20a_idle(g); | ||
1505 | |||
1506 | return err; | ||
1507 | } | ||
1508 | |||
1509 | static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | ||
1510 | struct nvgpu_dbg_gpu_access_fb_memory_args *args) | ||
1511 | { | ||
1512 | struct gk20a *g = dbg_s->g; | ||
1513 | struct dma_buf *dmabuf; | ||
1514 | void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; | ||
1515 | void *buffer; | ||
1516 | u64 size, access_size, offset; | ||
1517 | u64 access_limit_size = SZ_4K; | ||
1518 | int err = 0; | ||
1519 | |||
1520 | if ((args->offset & 3) || (!args->size) || (args->size & 3)) | ||
1521 | return -EINVAL; | ||
1522 | |||
1523 | dmabuf = dma_buf_get(args->dmabuf_fd); | ||
1524 | if (IS_ERR(dmabuf)) | ||
1525 | return -EINVAL; | ||
1526 | |||
1527 | if ((args->offset > dmabuf->size) || | ||
1528 | (args->size > dmabuf->size) || | ||
1529 | (args->offset + args->size > dmabuf->size)) { | ||
1530 | err = -EINVAL; | ||
1531 | goto fail_dmabuf_put; | ||
1532 | } | ||
1533 | |||
1534 | buffer = nvgpu_big_zalloc(g, access_limit_size); | ||
1535 | if (!buffer) { | ||
1536 | err = -ENOMEM; | ||
1537 | goto fail_dmabuf_put; | ||
1538 | } | ||
1539 | |||
1540 | size = args->size; | ||
1541 | offset = 0; | ||
1542 | |||
1543 | err = gk20a_busy(g); | ||
1544 | if (err) | ||
1545 | goto fail_free_buffer; | ||
1546 | |||
1547 | while (size) { | ||
1548 | /* Max access size of access_limit_size in one loop */ | ||
1549 | access_size = min(access_limit_size, size); | ||
1550 | |||
1551 | if (args->cmd == | ||
1552 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE) { | ||
1553 | err = copy_from_user(buffer, user_buffer + offset, | ||
1554 | access_size); | ||
1555 | if (err) | ||
1556 | goto fail_idle; | ||
1557 | } | ||
1558 | |||
1559 | err = nvgpu_vidmem_buf_access_memory(g, dmabuf, buffer, | ||
1560 | args->offset + offset, access_size, | ||
1561 | args->cmd); | ||
1562 | if (err) | ||
1563 | goto fail_idle; | ||
1564 | |||
1565 | if (args->cmd == | ||
1566 | NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ) { | ||
1567 | err = copy_to_user(user_buffer + offset, | ||
1568 | buffer, access_size); | ||
1569 | if (err) | ||
1570 | goto fail_idle; | ||
1571 | } | ||
1572 | |||
1573 | size -= access_size; | ||
1574 | offset += access_size; | ||
1575 | } | ||
1576 | |||
1577 | fail_idle: | ||
1578 | gk20a_idle(g); | ||
1579 | fail_free_buffer: | ||
1580 | nvgpu_big_free(g, buffer); | ||
1581 | fail_dmabuf_put: | ||
1582 | dma_buf_put(dmabuf); | ||
1583 | |||
1584 | return err; | ||
1585 | } | ||
1586 | |||
1587 | static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, | ||
1588 | struct nvgpu_dbg_gpu_profiler_reserve_args *args) | ||
1589 | { | ||
1590 | if (args->acquire) | ||
1591 | return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); | ||
1592 | |||
1593 | return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); | ||
1594 | } | ||
1595 | |||
1596 | static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, | ||
1597 | struct nvgpu_dbg_gpu_timeout_args *args) | ||
1598 | { | ||
1599 | bool status; | ||
1600 | struct gk20a *g = dbg_s->g; | ||
1601 | |||
1602 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1603 | status = nvgpu_is_timeouts_enabled(g); | ||
1604 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1605 | |||
1606 | if (status) | ||
1607 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE; | ||
1608 | else | ||
1609 | args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; | ||
1610 | } | ||
1611 | |||
1612 | /* In order to perform a context relative op the context has | ||
1613 | * to be created already... which would imply that the | ||
1614 | * context switch mechanism has already been put in place. | ||
1615 | * So by the time we perform such an opertation it should always | ||
1616 | * be possible to query for the appropriate context offsets, etc. | ||
1617 | * | ||
1618 | * But note: while the dbg_gpu bind requires the a channel fd, | ||
1619 | * it doesn't require an allocated gr/compute obj at that point... | ||
1620 | */ | ||
1621 | static bool gr_context_info_available(struct dbg_session_gk20a *dbg_s, | ||
1622 | struct gr_gk20a *gr) | ||
1623 | { | ||
1624 | int err; | ||
1625 | |||
1626 | nvgpu_mutex_acquire(&gr->ctx_mutex); | ||
1627 | err = !gr->ctx_vars.golden_image_initialized; | ||
1628 | nvgpu_mutex_release(&gr->ctx_mutex); | ||
1629 | if (err) | ||
1630 | return false; | ||
1631 | return true; | ||
1632 | |||
1633 | } | ||
1634 | |||
1635 | static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | ||
1636 | { | ||
1637 | struct mm_gk20a *mm = &g->mm; | ||
1638 | struct vm_gk20a *vm = mm->perfbuf.vm; | ||
1639 | int err; | ||
1640 | |||
1641 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); | ||
1642 | |||
1643 | nvgpu_vm_unmap(vm, offset, NULL); | ||
1644 | nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); | ||
1645 | nvgpu_vm_put(vm); | ||
1646 | |||
1647 | g->perfbuf.owner = NULL; | ||
1648 | g->perfbuf.offset = 0; | ||
1649 | return err; | ||
1650 | } | ||
1651 | |||
1652 | static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, | ||
1653 | u32 profiler_handle) | ||
1654 | { | ||
1655 | struct gk20a *g = dbg_s->g; | ||
1656 | struct dbg_profiler_object_data *prof_obj; | ||
1657 | int err = 0; | ||
1658 | |||
1659 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
1660 | |||
1661 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1662 | |||
1663 | /* Find matching object. */ | ||
1664 | prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
1665 | |||
1666 | if (!prof_obj) { | ||
1667 | nvgpu_err(g, "object not found"); | ||
1668 | err = -EINVAL; | ||
1669 | goto exit; | ||
1670 | } | ||
1671 | |||
1672 | if (prof_obj->has_reservation) | ||
1673 | g->ops.dbg_session_ops.release_profiler_reservation(dbg_s, prof_obj); | ||
1674 | else { | ||
1675 | nvgpu_err(g, "No reservation found"); | ||
1676 | err = -EINVAL; | ||
1677 | goto exit; | ||
1678 | } | ||
1679 | exit: | ||
1680 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1681 | return err; | ||
1682 | } | ||
1683 | |||
1684 | static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, | ||
1685 | u32 profiler_handle) | ||
1686 | { | ||
1687 | struct gk20a *g = dbg_s->g; | ||
1688 | struct dbg_profiler_object_data *prof_obj, *my_prof_obj; | ||
1689 | int err = 0; | ||
1690 | |||
1691 | nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle); | ||
1692 | |||
1693 | if (g->profiler_reservation_count < 0) { | ||
1694 | nvgpu_err(g, "Negative reservation count!"); | ||
1695 | return -EINVAL; | ||
1696 | } | ||
1697 | |||
1698 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1699 | |||
1700 | /* Find matching object. */ | ||
1701 | my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); | ||
1702 | |||
1703 | if (!my_prof_obj) { | ||
1704 | nvgpu_err(g, "object not found"); | ||
1705 | err = -EINVAL; | ||
1706 | goto exit; | ||
1707 | } | ||
1708 | |||
1709 | /* If we already have the reservation, we're done */ | ||
1710 | if (my_prof_obj->has_reservation) { | ||
1711 | err = 0; | ||
1712 | goto exit; | ||
1713 | } | ||
1714 | |||
1715 | if (my_prof_obj->ch == NULL) { | ||
1716 | /* Global reservations are only allowed if there are no other | ||
1717 | * global or per-context reservations currently held | ||
1718 | */ | ||
1719 | if (!g->ops.dbg_session_ops.check_and_set_global_reservation( | ||
1720 | dbg_s, my_prof_obj)) { | ||
1721 | nvgpu_err(g, | ||
1722 | "global reserve: have existing reservation"); | ||
1723 | err = -EBUSY; | ||
1724 | } | ||
1725 | } else if (g->global_profiler_reservation_held) { | ||
1726 | /* If there's a global reservation, | ||
1727 | * we can't take a per-context one. | ||
1728 | */ | ||
1729 | nvgpu_err(g, | ||
1730 | "per-ctxt reserve: global reservation in effect"); | ||
1731 | err = -EBUSY; | ||
1732 | } else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) { | ||
1733 | /* TSG: check that another channel in the TSG | ||
1734 | * doesn't already have the reservation | ||
1735 | */ | ||
1736 | int my_tsgid = my_prof_obj->ch->tsgid; | ||
1737 | |||
1738 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1739 | dbg_profiler_object_data, prof_obj_entry) { | ||
1740 | if (prof_obj->has_reservation && | ||
1741 | (prof_obj->ch->tsgid == my_tsgid)) { | ||
1742 | nvgpu_err(g, | ||
1743 | "per-ctxt reserve (tsg): already reserved"); | ||
1744 | err = -EBUSY; | ||
1745 | goto exit; | ||
1746 | } | ||
1747 | } | ||
1748 | |||
1749 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
1750 | dbg_s, my_prof_obj)) { | ||
1751 | /* Another guest OS has the global reservation */ | ||
1752 | nvgpu_err(g, | ||
1753 | "per-ctxt reserve: global reservation in effect"); | ||
1754 | err = -EBUSY; | ||
1755 | } | ||
1756 | } else { | ||
1757 | /* channel: check that some other profiler object doesn't | ||
1758 | * already have the reservation. | ||
1759 | */ | ||
1760 | struct channel_gk20a *my_ch = my_prof_obj->ch; | ||
1761 | |||
1762 | nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects, | ||
1763 | dbg_profiler_object_data, prof_obj_entry) { | ||
1764 | if (prof_obj->has_reservation && | ||
1765 | (prof_obj->ch == my_ch)) { | ||
1766 | nvgpu_err(g, | ||
1767 | "per-ctxt reserve (ch): already reserved"); | ||
1768 | err = -EBUSY; | ||
1769 | goto exit; | ||
1770 | } | ||
1771 | } | ||
1772 | |||
1773 | if (!g->ops.dbg_session_ops.check_and_set_context_reservation( | ||
1774 | dbg_s, my_prof_obj)) { | ||
1775 | /* Another guest OS has the global reservation */ | ||
1776 | nvgpu_err(g, | ||
1777 | "per-ctxt reserve: global reservation in effect"); | ||
1778 | err = -EBUSY; | ||
1779 | } | ||
1780 | } | ||
1781 | exit: | ||
1782 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1783 | return err; | ||
1784 | } | ||
1785 | |||
1786 | static int dbg_unbind_channel_gk20a(struct dbg_session_gk20a *dbg_s, | ||
1787 | struct nvgpu_dbg_gpu_unbind_channel_args *args) | ||
1788 | { | ||
1789 | struct dbg_session_channel_data *ch_data; | ||
1790 | struct gk20a *g = dbg_s->g; | ||
1791 | bool channel_found = false; | ||
1792 | struct channel_gk20a *ch; | ||
1793 | int err; | ||
1794 | |||
1795 | nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s fd=%d", | ||
1796 | g->name, args->channel_fd); | ||
1797 | |||
1798 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
1799 | if (!ch) { | ||
1800 | nvgpu_log_fn(g, "no channel found for fd"); | ||
1801 | return -EINVAL; | ||
1802 | } | ||
1803 | |||
1804 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
1805 | nvgpu_list_for_each_entry(ch_data, &dbg_s->ch_list, | ||
1806 | dbg_session_channel_data, ch_entry) { | ||
1807 | if (ch->chid == ch_data->chid) { | ||
1808 | channel_found = true; | ||
1809 | break; | ||
1810 | } | ||
1811 | } | ||
1812 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
1813 | |||
1814 | if (!channel_found) { | ||
1815 | nvgpu_log_fn(g, "channel not bounded, fd=%d\n", args->channel_fd); | ||
1816 | err = -EINVAL; | ||
1817 | goto out; | ||
1818 | } | ||
1819 | |||
1820 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
1821 | nvgpu_mutex_acquire(&dbg_s->ch_list_lock); | ||
1822 | err = dbg_unbind_single_channel_gk20a(dbg_s, ch_data); | ||
1823 | nvgpu_mutex_release(&dbg_s->ch_list_lock); | ||
1824 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
1825 | |||
1826 | out: | ||
1827 | gk20a_channel_put(ch); | ||
1828 | return err; | ||
1829 | } | ||
1830 | |||
1831 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp) | ||
1832 | { | ||
1833 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
1834 | struct nvgpu_os_linux, dbg.cdev); | ||
1835 | struct gk20a *g = &l->g; | ||
1836 | |||
1837 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1838 | return gk20a_dbg_gpu_do_dev_open(inode, filp, false /* not profiler */); | ||
1839 | } | ||
1840 | |||
1841 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | ||
1842 | unsigned long arg) | ||
1843 | { | ||
1844 | struct dbg_session_gk20a_linux *dbg_s_linux = filp->private_data; | ||
1845 | struct dbg_session_gk20a *dbg_s = &dbg_s_linux->dbg_s; | ||
1846 | struct gk20a *g = dbg_s->g; | ||
1847 | u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE]; | ||
1848 | int err = 0; | ||
1849 | |||
1850 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); | ||
1851 | |||
1852 | if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) || | ||
1853 | (_IOC_NR(cmd) == 0) || | ||
1854 | (_IOC_NR(cmd) > NVGPU_DBG_GPU_IOCTL_LAST) || | ||
1855 | (_IOC_SIZE(cmd) > NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE)) | ||
1856 | return -EINVAL; | ||
1857 | |||
1858 | memset(buf, 0, sizeof(buf)); | ||
1859 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1860 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
1861 | return -EFAULT; | ||
1862 | } | ||
1863 | |||
1864 | if (!g->sw_ready) { | ||
1865 | err = gk20a_busy(g); | ||
1866 | if (err) | ||
1867 | return err; | ||
1868 | |||
1869 | gk20a_idle(g); | ||
1870 | } | ||
1871 | |||
1872 | /* protect from threaded user space calls */ | ||
1873 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); | ||
1874 | |||
1875 | switch (cmd) { | ||
1876 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: | ||
1877 | err = dbg_bind_channel_gk20a(dbg_s, | ||
1878 | (struct nvgpu_dbg_gpu_bind_channel_args *)buf); | ||
1879 | break; | ||
1880 | |||
1881 | case NVGPU_DBG_GPU_IOCTL_REG_OPS: | ||
1882 | err = nvgpu_ioctl_channel_reg_ops(dbg_s, | ||
1883 | (struct nvgpu_dbg_gpu_exec_reg_ops_args *)buf); | ||
1884 | break; | ||
1885 | |||
1886 | case NVGPU_DBG_GPU_IOCTL_POWERGATE: | ||
1887 | err = nvgpu_ioctl_powergate_gk20a(dbg_s, | ||
1888 | (struct nvgpu_dbg_gpu_powergate_args *)buf); | ||
1889 | break; | ||
1890 | |||
1891 | case NVGPU_DBG_GPU_IOCTL_EVENTS_CTRL: | ||
1892 | err = gk20a_dbg_gpu_events_ctrl(dbg_s, | ||
1893 | (struct nvgpu_dbg_gpu_events_ctrl_args *)buf); | ||
1894 | break; | ||
1895 | |||
1896 | case NVGPU_DBG_GPU_IOCTL_SMPC_CTXSW_MODE: | ||
1897 | err = nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s, | ||
1898 | (struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *)buf); | ||
1899 | break; | ||
1900 | |||
1901 | case NVGPU_DBG_GPU_IOCTL_HWPM_CTXSW_MODE: | ||
1902 | err = nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(dbg_s, | ||
1903 | (struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *)buf); | ||
1904 | break; | ||
1905 | |||
1906 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS: | ||
1907 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s, | ||
1908 | (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf); | ||
1909 | break; | ||
1910 | |||
1911 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP: | ||
1912 | err = gk20a_perfbuf_map(dbg_s, | ||
1913 | (struct nvgpu_dbg_gpu_perfbuf_map_args *)buf); | ||
1914 | break; | ||
1915 | |||
1916 | case NVGPU_DBG_GPU_IOCTL_PERFBUF_UNMAP: | ||
1917 | err = gk20a_perfbuf_unmap(dbg_s, | ||
1918 | (struct nvgpu_dbg_gpu_perfbuf_unmap_args *)buf); | ||
1919 | break; | ||
1920 | |||
1921 | case NVGPU_DBG_GPU_IOCTL_PC_SAMPLING: | ||
1922 | err = gk20a_dbg_pc_sampling(dbg_s, | ||
1923 | (struct nvgpu_dbg_gpu_pc_sampling_args *)buf); | ||
1924 | break; | ||
1925 | |||
1926 | case NVGPU_DBG_GPU_IOCTL_SET_NEXT_STOP_TRIGGER_TYPE: | ||
1927 | err = nvgpu_dbg_gpu_ioctl_set_next_stop_trigger_type(dbg_s, | ||
1928 | (struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args *)buf); | ||
1929 | break; | ||
1930 | |||
1931 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT: | ||
1932 | err = nvgpu_dbg_gpu_ioctl_timeout(dbg_s, | ||
1933 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
1934 | break; | ||
1935 | |||
1936 | case NVGPU_DBG_GPU_IOCTL_GET_TIMEOUT: | ||
1937 | nvgpu_dbg_gpu_ioctl_get_timeout(dbg_s, | ||
1938 | (struct nvgpu_dbg_gpu_timeout_args *)buf); | ||
1939 | break; | ||
1940 | |||
1941 | case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: | ||
1942 | err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, | ||
1943 | (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); | ||
1944 | break; | ||
1945 | |||
1946 | case NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE: | ||
1947 | err = nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(dbg_s, | ||
1948 | (struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *)buf); | ||
1949 | break; | ||
1950 | |||
1951 | case NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE: | ||
1952 | err = nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(dbg_s, | ||
1953 | (struct nvgpu_dbg_gpu_write_single_sm_error_state_args *)buf); | ||
1954 | break; | ||
1955 | |||
1956 | case NVGPU_DBG_GPU_IOCTL_UNBIND_CHANNEL: | ||
1957 | err = dbg_unbind_channel_gk20a(dbg_s, | ||
1958 | (struct nvgpu_dbg_gpu_unbind_channel_args *)buf); | ||
1959 | break; | ||
1960 | |||
1961 | case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_CONTEXTS: | ||
1962 | err = nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(dbg_s, | ||
1963 | (struct nvgpu_dbg_gpu_suspend_resume_contexts_args *)buf); | ||
1964 | break; | ||
1965 | |||
1966 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY: | ||
1967 | err = nvgpu_dbg_gpu_ioctl_access_fb_memory(dbg_s, | ||
1968 | (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); | ||
1969 | break; | ||
1970 | |||
1971 | case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: | ||
1972 | err = nvgpu_ioctl_allocate_profiler_object(dbg_s_linux, | ||
1973 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
1974 | break; | ||
1975 | |||
1976 | case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: | ||
1977 | err = nvgpu_ioctl_free_profiler_object(dbg_s_linux, | ||
1978 | (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); | ||
1979 | break; | ||
1980 | |||
1981 | case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: | ||
1982 | err = nvgpu_ioctl_profiler_reserve(dbg_s, | ||
1983 | (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); | ||
1984 | break; | ||
1985 | |||
1986 | default: | ||
1987 | nvgpu_err(g, | ||
1988 | "unrecognized dbg gpu ioctl cmd: 0x%x", | ||
1989 | cmd); | ||
1990 | err = -ENOTTY; | ||
1991 | break; | ||
1992 | } | ||
1993 | |||
1994 | nvgpu_mutex_release(&dbg_s->ioctl_lock); | ||
1995 | |||
1996 | nvgpu_log(g, gpu_dbg_gpu_dbg, "ret=%d", err); | ||
1997 | |||
1998 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
1999 | err = copy_to_user((void __user *)arg, | ||
2000 | buf, _IOC_SIZE(cmd)); | ||
2001 | |||
2002 | return err; | ||
2003 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h new file mode 100644 index 00000000..bd76045b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * Tegra GK20A GPU Debugger Driver | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | #ifndef DBG_GPU_IOCTL_GK20A_H | ||
19 | #define DBG_GPU_IOCTL_GK20A_H | ||
20 | #include <linux/poll.h> | ||
21 | |||
22 | #include "gk20a/dbg_gpu_gk20a.h" | ||
23 | |||
24 | /* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number | ||
25 | * of regops */ | ||
26 | #define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024 | ||
27 | |||
28 | struct dbg_session_gk20a_linux { | ||
29 | struct device *dev; | ||
30 | struct dbg_session_gk20a dbg_s; | ||
31 | }; | ||
32 | |||
33 | struct dbg_session_channel_data_linux { | ||
34 | /* | ||
35 | * We have to keep a ref to the _file_, not the channel, because | ||
36 | * close(channel_fd) is synchronous and would deadlock if we had an | ||
37 | * open debug session fd holding a channel ref at that time. Holding a | ||
38 | * ref to the file makes close(channel_fd) just drop a kernel ref to | ||
39 | * the file; the channel will close when the last file ref is dropped. | ||
40 | */ | ||
41 | struct file *ch_f; | ||
42 | struct dbg_session_channel_data ch_data; | ||
43 | }; | ||
44 | |||
45 | /* module debug driver interface */ | ||
46 | int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp); | ||
47 | int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp); | ||
48 | long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); | ||
49 | unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); | ||
50 | |||
51 | /* used by profiler driver interface */ | ||
52 | int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); | ||
53 | |||
54 | #endif \ No newline at end of file | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c new file mode 100644 index 00000000..4ef99ded --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | |||
@@ -0,0 +1,677 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/cdev.h> | ||
20 | #include <linux/uaccess.h> | ||
21 | #include <linux/poll.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | #include <linux/anon_inodes.h> | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/log.h> | ||
27 | #include <nvgpu/os_sched.h> | ||
28 | |||
29 | #include "gk20a/gk20a.h" | ||
30 | #include "gk20a/tsg_gk20a.h" | ||
31 | #include "gv11b/fifo_gv11b.h" | ||
32 | #include "platform_gk20a.h" | ||
33 | #include "ioctl_tsg.h" | ||
34 | #include "ioctl_channel.h" | ||
35 | #include "os_linux.h" | ||
36 | |||
37 | struct tsg_private { | ||
38 | struct gk20a *g; | ||
39 | struct tsg_gk20a *tsg; | ||
40 | }; | ||
41 | |||
42 | static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
43 | { | ||
44 | struct channel_gk20a *ch; | ||
45 | int err; | ||
46 | |||
47 | ch = gk20a_get_channel_from_file(ch_fd); | ||
48 | if (!ch) | ||
49 | return -EINVAL; | ||
50 | |||
51 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
52 | |||
53 | gk20a_channel_put(ch); | ||
54 | return err; | ||
55 | } | ||
56 | |||
57 | static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, | ||
58 | struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg) | ||
59 | { | ||
60 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
61 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
62 | struct channel_gk20a *ch; | ||
63 | struct gr_gk20a *gr = &g->gr; | ||
64 | int err = 0; | ||
65 | |||
66 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
67 | |||
68 | nvgpu_mutex_acquire(&sched->control_lock); | ||
69 | if (sched->control_locked) { | ||
70 | err = -EPERM; | ||
71 | goto mutex_release; | ||
72 | } | ||
73 | err = gk20a_busy(g); | ||
74 | if (err) { | ||
75 | nvgpu_err(g, "failed to power on gpu"); | ||
76 | goto mutex_release; | ||
77 | } | ||
78 | |||
79 | ch = gk20a_get_channel_from_file(arg->channel_fd); | ||
80 | if (!ch) { | ||
81 | err = -EINVAL; | ||
82 | goto idle; | ||
83 | } | ||
84 | |||
85 | if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) { | ||
86 | if ((arg->num_active_tpcs > gr->max_tpc_count) || | ||
87 | !(arg->num_active_tpcs)) { | ||
88 | nvgpu_err(g, "Invalid num of active TPCs"); | ||
89 | err = -EINVAL; | ||
90 | goto ch_put; | ||
91 | } | ||
92 | tsg->tpc_num_initialized = true; | ||
93 | tsg->num_active_tpcs = arg->num_active_tpcs; | ||
94 | tsg->tpc_pg_enabled = true; | ||
95 | } else { | ||
96 | tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled"); | ||
97 | } | ||
98 | |||
99 | if (arg->subcontext_id < g->fifo.max_subctx_count) { | ||
100 | ch->subctx_id = arg->subcontext_id; | ||
101 | } else { | ||
102 | err = -EINVAL; | ||
103 | goto ch_put; | ||
104 | } | ||
105 | |||
106 | nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d", | ||
107 | ch->chid, ch->subctx_id); | ||
108 | |||
109 | /* Use runqueue selector 1 for all ASYNC ids */ | ||
110 | if (ch->subctx_id > CHANNEL_INFO_VEID0) | ||
111 | ch->runqueue_sel = 1; | ||
112 | |||
113 | err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch); | ||
114 | ch_put: | ||
115 | gk20a_channel_put(ch); | ||
116 | idle: | ||
117 | gk20a_idle(g); | ||
118 | mutex_release: | ||
119 | nvgpu_mutex_release(&sched->control_lock); | ||
120 | return err; | ||
121 | } | ||
122 | |||
123 | static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) | ||
124 | { | ||
125 | struct channel_gk20a *ch; | ||
126 | int err = 0; | ||
127 | |||
128 | ch = gk20a_get_channel_from_file(ch_fd); | ||
129 | if (!ch) | ||
130 | return -EINVAL; | ||
131 | |||
132 | if (ch->tsgid != tsg->tsgid) { | ||
133 | err = -EINVAL; | ||
134 | goto out; | ||
135 | } | ||
136 | |||
137 | err = gk20a_tsg_unbind_channel(ch); | ||
138 | |||
139 | /* | ||
140 | * Mark the channel timedout since channel unbound from TSG | ||
141 | * has no context of its own so it can't serve any job | ||
142 | */ | ||
143 | ch->has_timedout = true; | ||
144 | |||
145 | out: | ||
146 | gk20a_channel_put(ch); | ||
147 | return err; | ||
148 | } | ||
149 | |||
150 | static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg, | ||
151 | unsigned int event_id, | ||
152 | struct gk20a_event_id_data **event_id_data) | ||
153 | { | ||
154 | struct gk20a_event_id_data *local_event_id_data; | ||
155 | bool event_found = false; | ||
156 | |||
157 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
158 | nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list, | ||
159 | gk20a_event_id_data, event_id_node) { | ||
160 | if (local_event_id_data->event_id == event_id) { | ||
161 | event_found = true; | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
166 | |||
167 | if (event_found) { | ||
168 | *event_id_data = local_event_id_data; | ||
169 | return 0; | ||
170 | } else { | ||
171 | return -1; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific | ||
177 | * event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs | ||
178 | */ | ||
179 | static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id) | ||
180 | { | ||
181 | switch (event_id) { | ||
182 | case NVGPU_EVENT_ID_BPT_INT: | ||
183 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT; | ||
184 | case NVGPU_EVENT_ID_BPT_PAUSE: | ||
185 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE; | ||
186 | case NVGPU_EVENT_ID_BLOCKING_SYNC: | ||
187 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC; | ||
188 | case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED: | ||
189 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED; | ||
190 | case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE: | ||
191 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE; | ||
192 | case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN: | ||
193 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN; | ||
194 | } | ||
195 | |||
196 | return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX; | ||
197 | } | ||
198 | |||
199 | void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, | ||
200 | int __event_id) | ||
201 | { | ||
202 | struct gk20a_event_id_data *event_id_data; | ||
203 | u32 event_id; | ||
204 | int err = 0; | ||
205 | struct gk20a *g = tsg->g; | ||
206 | |||
207 | event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id); | ||
208 | if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
209 | return; | ||
210 | |||
211 | err = gk20a_tsg_get_event_data_from_id(tsg, event_id, | ||
212 | &event_id_data); | ||
213 | if (err) | ||
214 | return; | ||
215 | |||
216 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
217 | |||
218 | nvgpu_log_info(g, | ||
219 | "posting event for event_id=%d on tsg=%d\n", | ||
220 | event_id, tsg->tsgid); | ||
221 | event_id_data->event_posted = true; | ||
222 | |||
223 | nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq); | ||
224 | |||
225 | nvgpu_mutex_release(&event_id_data->lock); | ||
226 | } | ||
227 | |||
228 | static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait) | ||
229 | { | ||
230 | unsigned int mask = 0; | ||
231 | struct gk20a_event_id_data *event_id_data = filep->private_data; | ||
232 | struct gk20a *g = event_id_data->g; | ||
233 | u32 event_id = event_id_data->event_id; | ||
234 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
235 | |||
236 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " "); | ||
237 | |||
238 | poll_wait(filep, &event_id_data->event_id_wq.wq, wait); | ||
239 | |||
240 | nvgpu_mutex_acquire(&event_id_data->lock); | ||
241 | |||
242 | if (event_id_data->event_posted) { | ||
243 | nvgpu_log_info(g, | ||
244 | "found pending event_id=%d on TSG=%d\n", | ||
245 | event_id, tsg->tsgid); | ||
246 | mask = (POLLPRI | POLLIN); | ||
247 | event_id_data->event_posted = false; | ||
248 | } | ||
249 | |||
250 | nvgpu_mutex_release(&event_id_data->lock); | ||
251 | |||
252 | return mask; | ||
253 | } | ||
254 | |||
255 | static int gk20a_event_id_release(struct inode *inode, struct file *filp) | ||
256 | { | ||
257 | struct gk20a_event_id_data *event_id_data = filp->private_data; | ||
258 | struct gk20a *g = event_id_data->g; | ||
259 | struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id; | ||
260 | |||
261 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
262 | nvgpu_list_del(&event_id_data->event_id_node); | ||
263 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
264 | |||
265 | nvgpu_mutex_destroy(&event_id_data->lock); | ||
266 | gk20a_put(g); | ||
267 | nvgpu_kfree(g, event_id_data); | ||
268 | filp->private_data = NULL; | ||
269 | |||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | const struct file_operations gk20a_event_id_ops = { | ||
274 | .owner = THIS_MODULE, | ||
275 | .poll = gk20a_event_id_poll, | ||
276 | .release = gk20a_event_id_release, | ||
277 | }; | ||
278 | |||
279 | static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg, | ||
280 | int event_id, | ||
281 | int *fd) | ||
282 | { | ||
283 | int err = 0; | ||
284 | int local_fd; | ||
285 | struct file *file; | ||
286 | char name[64]; | ||
287 | struct gk20a_event_id_data *event_id_data; | ||
288 | struct gk20a *g; | ||
289 | |||
290 | g = gk20a_get(tsg->g); | ||
291 | if (!g) | ||
292 | return -ENODEV; | ||
293 | |||
294 | err = gk20a_tsg_get_event_data_from_id(tsg, | ||
295 | event_id, &event_id_data); | ||
296 | if (err == 0) { | ||
297 | /* We already have event enabled */ | ||
298 | err = -EINVAL; | ||
299 | goto free_ref; | ||
300 | } | ||
301 | |||
302 | err = get_unused_fd_flags(O_RDWR); | ||
303 | if (err < 0) | ||
304 | goto free_ref; | ||
305 | local_fd = err; | ||
306 | |||
307 | snprintf(name, sizeof(name), "nvgpu-event%d-fd%d", | ||
308 | event_id, local_fd); | ||
309 | |||
310 | file = anon_inode_getfile(name, &gk20a_event_id_ops, | ||
311 | NULL, O_RDWR); | ||
312 | if (IS_ERR(file)) { | ||
313 | err = PTR_ERR(file); | ||
314 | goto clean_up; | ||
315 | } | ||
316 | |||
317 | event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data)); | ||
318 | if (!event_id_data) { | ||
319 | err = -ENOMEM; | ||
320 | goto clean_up_file; | ||
321 | } | ||
322 | event_id_data->g = g; | ||
323 | event_id_data->id = tsg->tsgid; | ||
324 | event_id_data->event_id = event_id; | ||
325 | |||
326 | nvgpu_cond_init(&event_id_data->event_id_wq); | ||
327 | err = nvgpu_mutex_init(&event_id_data->lock); | ||
328 | if (err) | ||
329 | goto clean_up_free; | ||
330 | |||
331 | nvgpu_init_list_node(&event_id_data->event_id_node); | ||
332 | |||
333 | nvgpu_mutex_acquire(&tsg->event_id_list_lock); | ||
334 | nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list); | ||
335 | nvgpu_mutex_release(&tsg->event_id_list_lock); | ||
336 | |||
337 | fd_install(local_fd, file); | ||
338 | file->private_data = event_id_data; | ||
339 | |||
340 | *fd = local_fd; | ||
341 | |||
342 | return 0; | ||
343 | |||
344 | clean_up_free: | ||
345 | nvgpu_kfree(g, event_id_data); | ||
346 | clean_up_file: | ||
347 | fput(file); | ||
348 | clean_up: | ||
349 | put_unused_fd(local_fd); | ||
350 | free_ref: | ||
351 | gk20a_put(g); | ||
352 | return err; | ||
353 | } | ||
354 | |||
355 | static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, | ||
356 | struct nvgpu_event_id_ctrl_args *args) | ||
357 | { | ||
358 | int err = 0; | ||
359 | int fd = -1; | ||
360 | |||
361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | ||
362 | return -EINVAL; | ||
363 | |||
364 | switch (args->cmd) { | ||
365 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: | ||
366 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); | ||
367 | if (!err) | ||
368 | args->event_fd = fd; | ||
369 | break; | ||
370 | |||
371 | default: | ||
372 | nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x", | ||
373 | args->cmd); | ||
374 | err = -EINVAL; | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | return err; | ||
379 | } | ||
380 | |||
381 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp) | ||
382 | { | ||
383 | struct tsg_private *priv; | ||
384 | struct tsg_gk20a *tsg; | ||
385 | struct device *dev; | ||
386 | int err; | ||
387 | |||
388 | g = gk20a_get(g); | ||
389 | if (!g) | ||
390 | return -ENODEV; | ||
391 | |||
392 | dev = dev_from_gk20a(g); | ||
393 | |||
394 | nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev)); | ||
395 | |||
396 | priv = nvgpu_kmalloc(g, sizeof(*priv)); | ||
397 | if (!priv) { | ||
398 | err = -ENOMEM; | ||
399 | goto free_ref; | ||
400 | } | ||
401 | |||
402 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
403 | if (!tsg) { | ||
404 | nvgpu_kfree(g, priv); | ||
405 | err = -ENOMEM; | ||
406 | goto free_ref; | ||
407 | } | ||
408 | |||
409 | priv->g = g; | ||
410 | priv->tsg = tsg; | ||
411 | filp->private_data = priv; | ||
412 | |||
413 | gk20a_sched_ctrl_tsg_added(g, tsg); | ||
414 | |||
415 | return 0; | ||
416 | |||
417 | free_ref: | ||
418 | gk20a_put(g); | ||
419 | return err; | ||
420 | } | ||
421 | |||
422 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp) | ||
423 | { | ||
424 | struct nvgpu_os_linux *l; | ||
425 | struct gk20a *g; | ||
426 | int ret; | ||
427 | |||
428 | l = container_of(inode->i_cdev, | ||
429 | struct nvgpu_os_linux, tsg.cdev); | ||
430 | g = &l->g; | ||
431 | |||
432 | nvgpu_log_fn(g, " "); | ||
433 | |||
434 | ret = gk20a_busy(g); | ||
435 | if (ret) { | ||
436 | nvgpu_err(g, "failed to power on, %d", ret); | ||
437 | return ret; | ||
438 | } | ||
439 | |||
440 | ret = nvgpu_ioctl_tsg_open(&l->g, filp); | ||
441 | |||
442 | gk20a_idle(g); | ||
443 | nvgpu_log_fn(g, "done"); | ||
444 | return ret; | ||
445 | } | ||
446 | |||
447 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref) | ||
448 | { | ||
449 | struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); | ||
450 | struct gk20a *g = tsg->g; | ||
451 | |||
452 | gk20a_sched_ctrl_tsg_removed(g, tsg); | ||
453 | |||
454 | gk20a_tsg_release(ref); | ||
455 | gk20a_put(g); | ||
456 | } | ||
457 | |||
458 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp) | ||
459 | { | ||
460 | struct tsg_private *priv = filp->private_data; | ||
461 | struct tsg_gk20a *tsg = priv->tsg; | ||
462 | |||
463 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
464 | nvgpu_kfree(tsg->g, priv); | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g, | ||
469 | struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg) | ||
470 | { | ||
471 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
472 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
473 | u32 level = arg->level; | ||
474 | int err; | ||
475 | |||
476 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
477 | |||
478 | nvgpu_mutex_acquire(&sched->control_lock); | ||
479 | if (sched->control_locked) { | ||
480 | err = -EPERM; | ||
481 | goto done; | ||
482 | } | ||
483 | err = gk20a_busy(g); | ||
484 | if (err) { | ||
485 | nvgpu_err(g, "failed to power on gpu"); | ||
486 | goto done; | ||
487 | } | ||
488 | |||
489 | level = nvgpu_get_common_runlist_level(level); | ||
490 | err = gk20a_tsg_set_runlist_interleave(tsg, level); | ||
491 | |||
492 | gk20a_idle(g); | ||
493 | done: | ||
494 | nvgpu_mutex_release(&sched->control_lock); | ||
495 | return err; | ||
496 | } | ||
497 | |||
498 | static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g, | ||
499 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
500 | { | ||
501 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
502 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
503 | int err; | ||
504 | |||
505 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
506 | |||
507 | nvgpu_mutex_acquire(&sched->control_lock); | ||
508 | if (sched->control_locked) { | ||
509 | err = -EPERM; | ||
510 | goto done; | ||
511 | } | ||
512 | err = gk20a_busy(g); | ||
513 | if (err) { | ||
514 | nvgpu_err(g, "failed to power on gpu"); | ||
515 | goto done; | ||
516 | } | ||
517 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us); | ||
518 | gk20a_idle(g); | ||
519 | done: | ||
520 | nvgpu_mutex_release(&sched->control_lock); | ||
521 | return err; | ||
522 | } | ||
523 | |||
524 | static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g, | ||
525 | struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg) | ||
526 | { | ||
527 | arg->timeslice_us = gk20a_tsg_get_timeslice(tsg); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, | ||
532 | unsigned long arg) | ||
533 | { | ||
534 | struct tsg_private *priv = filp->private_data; | ||
535 | struct tsg_gk20a *tsg = priv->tsg; | ||
536 | struct gk20a *g = tsg->g; | ||
537 | u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE]; | ||
538 | int err = 0; | ||
539 | |||
540 | nvgpu_log_fn(g, "start %d", _IOC_NR(cmd)); | ||
541 | |||
542 | if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) || | ||
543 | (_IOC_NR(cmd) == 0) || | ||
544 | (_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) || | ||
545 | (_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE)) | ||
546 | return -EINVAL; | ||
547 | |||
548 | memset(buf, 0, sizeof(buf)); | ||
549 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
550 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
551 | return -EFAULT; | ||
552 | } | ||
553 | |||
554 | if (!g->sw_ready) { | ||
555 | err = gk20a_busy(g); | ||
556 | if (err) | ||
557 | return err; | ||
558 | |||
559 | gk20a_idle(g); | ||
560 | } | ||
561 | |||
562 | switch (cmd) { | ||
563 | case NVGPU_TSG_IOCTL_BIND_CHANNEL: | ||
564 | { | ||
565 | int ch_fd = *(int *)buf; | ||
566 | if (ch_fd < 0) { | ||
567 | err = -EINVAL; | ||
568 | break; | ||
569 | } | ||
570 | err = gk20a_tsg_bind_channel_fd(tsg, ch_fd); | ||
571 | break; | ||
572 | } | ||
573 | |||
574 | case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX: | ||
575 | { | ||
576 | err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg, | ||
577 | (struct nvgpu_tsg_bind_channel_ex_args *)buf); | ||
578 | break; | ||
579 | } | ||
580 | |||
581 | case NVGPU_TSG_IOCTL_UNBIND_CHANNEL: | ||
582 | { | ||
583 | int ch_fd = *(int *)buf; | ||
584 | |||
585 | if (ch_fd < 0) { | ||
586 | err = -EINVAL; | ||
587 | break; | ||
588 | } | ||
589 | err = gk20a_busy(g); | ||
590 | if (err) { | ||
591 | nvgpu_err(g, | ||
592 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
593 | break; | ||
594 | } | ||
595 | err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd); | ||
596 | gk20a_idle(g); | ||
597 | break; | ||
598 | } | ||
599 | |||
600 | case NVGPU_IOCTL_TSG_ENABLE: | ||
601 | { | ||
602 | err = gk20a_busy(g); | ||
603 | if (err) { | ||
604 | nvgpu_err(g, | ||
605 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
606 | return err; | ||
607 | } | ||
608 | g->ops.fifo.enable_tsg(tsg); | ||
609 | gk20a_idle(g); | ||
610 | break; | ||
611 | } | ||
612 | |||
613 | case NVGPU_IOCTL_TSG_DISABLE: | ||
614 | { | ||
615 | err = gk20a_busy(g); | ||
616 | if (err) { | ||
617 | nvgpu_err(g, | ||
618 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
619 | return err; | ||
620 | } | ||
621 | g->ops.fifo.disable_tsg(tsg); | ||
622 | gk20a_idle(g); | ||
623 | break; | ||
624 | } | ||
625 | |||
626 | case NVGPU_IOCTL_TSG_PREEMPT: | ||
627 | { | ||
628 | err = gk20a_busy(g); | ||
629 | if (err) { | ||
630 | nvgpu_err(g, | ||
631 | "failed to host gk20a for ioctl cmd: 0x%x", cmd); | ||
632 | return err; | ||
633 | } | ||
634 | /* preempt TSG */ | ||
635 | err = g->ops.fifo.preempt_tsg(g, tsg->tsgid); | ||
636 | gk20a_idle(g); | ||
637 | break; | ||
638 | } | ||
639 | |||
640 | case NVGPU_IOCTL_TSG_EVENT_ID_CTRL: | ||
641 | { | ||
642 | err = gk20a_tsg_event_id_ctrl(g, tsg, | ||
643 | (struct nvgpu_event_id_ctrl_args *)buf); | ||
644 | break; | ||
645 | } | ||
646 | |||
647 | case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
648 | err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg, | ||
649 | (struct nvgpu_runlist_interleave_args *)buf); | ||
650 | break; | ||
651 | |||
652 | case NVGPU_IOCTL_TSG_SET_TIMESLICE: | ||
653 | { | ||
654 | err = gk20a_tsg_ioctl_set_timeslice(g, tsg, | ||
655 | (struct nvgpu_timeslice_args *)buf); | ||
656 | break; | ||
657 | } | ||
658 | case NVGPU_IOCTL_TSG_GET_TIMESLICE: | ||
659 | { | ||
660 | err = gk20a_tsg_ioctl_get_timeslice(g, tsg, | ||
661 | (struct nvgpu_timeslice_args *)buf); | ||
662 | break; | ||
663 | } | ||
664 | |||
665 | default: | ||
666 | nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", | ||
667 | cmd); | ||
668 | err = -ENOTTY; | ||
669 | break; | ||
670 | } | ||
671 | |||
672 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
673 | err = copy_to_user((void __user *)arg, | ||
674 | buf, _IOC_SIZE(cmd)); | ||
675 | |||
676 | return err; | ||
677 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h new file mode 100644 index 00000000..67399fd4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef NVGPU_IOCTL_TSG_H | ||
14 | #define NVGPU_IOCTL_TSG_H | ||
15 | |||
16 | struct inode; | ||
17 | struct file; | ||
18 | struct gk20a; | ||
19 | struct nvgpu_ref; | ||
20 | |||
21 | int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp); | ||
22 | int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp); | ||
23 | int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp); | ||
24 | long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, | ||
25 | unsigned int cmd, unsigned long arg); | ||
26 | void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref); | ||
27 | |||
28 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/kmem.c b/drivers/gpu/nvgpu/os/linux/kmem.c new file mode 100644 index 00000000..10946a08 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/kmem.c | |||
@@ -0,0 +1,654 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/mm.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/stacktrace.h> | ||
23 | |||
24 | #include <nvgpu/lock.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/atomic.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | |||
29 | #include "gk20a/gk20a.h" | ||
30 | |||
31 | #include "kmem_priv.h" | ||
32 | |||
33 | /* | ||
34 | * Statically declared because this needs to be shared across all nvgpu driver | ||
35 | * instances. This makes sure that all kmem caches are _definitely_ uniquely | ||
36 | * named. | ||
37 | */ | ||
38 | static atomic_t kmem_cache_id; | ||
39 | |||
40 | void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear) | ||
41 | { | ||
42 | void *p; | ||
43 | |||
44 | if (size > PAGE_SIZE) { | ||
45 | if (clear) | ||
46 | p = nvgpu_vzalloc(g, size); | ||
47 | else | ||
48 | p = nvgpu_vmalloc(g, size); | ||
49 | } else { | ||
50 | if (clear) | ||
51 | p = nvgpu_kzalloc(g, size); | ||
52 | else | ||
53 | p = nvgpu_kmalloc(g, size); | ||
54 | } | ||
55 | |||
56 | return p; | ||
57 | } | ||
58 | |||
59 | void nvgpu_big_free(struct gk20a *g, void *p) | ||
60 | { | ||
61 | /* | ||
62 | * This will have to be fixed eventually. Allocs that use | ||
63 | * nvgpu_big_[mz]alloc() will need to remember the size of the alloc | ||
64 | * when freeing. | ||
65 | */ | ||
66 | if (is_vmalloc_addr(p)) | ||
67 | nvgpu_vfree(g, p); | ||
68 | else | ||
69 | nvgpu_kfree(g, p); | ||
70 | } | ||
71 | |||
72 | void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
73 | { | ||
74 | void *alloc; | ||
75 | |||
76 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
77 | alloc = __nvgpu_track_kmalloc(g, size, ip); | ||
78 | #else | ||
79 | alloc = kmalloc(size, GFP_KERNEL); | ||
80 | #endif | ||
81 | |||
82 | kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
83 | size, alloc, GFP_KERNEL); | ||
84 | |||
85 | return alloc; | ||
86 | } | ||
87 | |||
88 | void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
89 | { | ||
90 | void *alloc; | ||
91 | |||
92 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
93 | alloc = __nvgpu_track_kzalloc(g, size, ip); | ||
94 | #else | ||
95 | alloc = kzalloc(size, GFP_KERNEL); | ||
96 | #endif | ||
97 | |||
98 | kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
99 | size, alloc, GFP_KERNEL); | ||
100 | |||
101 | return alloc; | ||
102 | } | ||
103 | |||
104 | void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip) | ||
105 | { | ||
106 | void *alloc; | ||
107 | |||
108 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
109 | alloc = __nvgpu_track_kcalloc(g, n, size, ip); | ||
110 | #else | ||
111 | alloc = kcalloc(n, size, GFP_KERNEL); | ||
112 | #endif | ||
113 | |||
114 | kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x", | ||
115 | n * size, alloc, GFP_KERNEL); | ||
116 | |||
117 | return alloc; | ||
118 | } | ||
119 | |||
120 | void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip) | ||
121 | { | ||
122 | void *alloc; | ||
123 | |||
124 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
125 | alloc = __nvgpu_track_vmalloc(g, size, ip); | ||
126 | #else | ||
127 | alloc = vmalloc(size); | ||
128 | #endif | ||
129 | |||
130 | kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc); | ||
131 | |||
132 | return alloc; | ||
133 | } | ||
134 | |||
135 | void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip) | ||
136 | { | ||
137 | void *alloc; | ||
138 | |||
139 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
140 | alloc = __nvgpu_track_vzalloc(g, size, ip); | ||
141 | #else | ||
142 | alloc = vzalloc(size); | ||
143 | #endif | ||
144 | |||
145 | kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc); | ||
146 | |||
147 | return alloc; | ||
148 | } | ||
149 | |||
150 | void __nvgpu_kfree(struct gk20a *g, void *addr) | ||
151 | { | ||
152 | kmem_dbg(g, "kfree: addr=0x%p", addr); | ||
153 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
154 | __nvgpu_track_kfree(g, addr); | ||
155 | #else | ||
156 | kfree(addr); | ||
157 | #endif | ||
158 | } | ||
159 | |||
160 | void __nvgpu_vfree(struct gk20a *g, void *addr) | ||
161 | { | ||
162 | kmem_dbg(g, "vfree: addr=0x%p", addr); | ||
163 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
164 | __nvgpu_track_vfree(g, addr); | ||
165 | #else | ||
166 | vfree(addr); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
171 | |||
172 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
173 | { | ||
174 | nvgpu_mutex_acquire(&tracker->lock); | ||
175 | } | ||
176 | |||
177 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | ||
178 | { | ||
179 | nvgpu_mutex_release(&tracker->lock); | ||
180 | } | ||
181 | |||
182 | void kmem_print_mem_alloc(struct gk20a *g, | ||
183 | struct nvgpu_mem_alloc *alloc, | ||
184 | struct seq_file *s) | ||
185 | { | ||
186 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
187 | int i; | ||
188 | |||
189 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n", | ||
190 | alloc->addr, alloc->size); | ||
191 | for (i = 0; i < alloc->stack_length; i++) | ||
192 | __pstat(s, " %3d [<%p>] %pS\n", i, | ||
193 | (void *)alloc->stack[i], | ||
194 | (void *)alloc->stack[i]); | ||
195 | __pstat(s, "\n"); | ||
196 | #else | ||
197 | __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n", | ||
198 | alloc->addr, alloc->size, alloc->ip); | ||
199 | #endif | ||
200 | } | ||
201 | |||
202 | static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
203 | struct nvgpu_mem_alloc *alloc) | ||
204 | { | ||
205 | alloc->allocs_entry.key_start = alloc->addr; | ||
206 | alloc->allocs_entry.key_end = alloc->addr + alloc->size; | ||
207 | |||
208 | nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static struct nvgpu_mem_alloc *nvgpu_rem_alloc( | ||
213 | struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr) | ||
214 | { | ||
215 | struct nvgpu_mem_alloc *alloc; | ||
216 | struct nvgpu_rbtree_node *node = NULL; | ||
217 | |||
218 | nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs); | ||
219 | if (!node) | ||
220 | return NULL; | ||
221 | |||
222 | alloc = nvgpu_mem_alloc_from_rbtree_node(node); | ||
223 | |||
224 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
225 | |||
226 | return alloc; | ||
227 | } | ||
228 | |||
229 | static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
230 | unsigned long size, unsigned long real_size, | ||
231 | u64 addr, unsigned long ip) | ||
232 | { | ||
233 | int ret; | ||
234 | struct nvgpu_mem_alloc *alloc; | ||
235 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
236 | struct stack_trace stack_trace; | ||
237 | #endif | ||
238 | |||
239 | alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); | ||
240 | if (!alloc) | ||
241 | return -ENOMEM; | ||
242 | |||
243 | alloc->owner = tracker; | ||
244 | alloc->size = size; | ||
245 | alloc->real_size = real_size; | ||
246 | alloc->addr = addr; | ||
247 | alloc->ip = (void *)(uintptr_t)ip; | ||
248 | |||
249 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
250 | stack_trace.max_entries = MAX_STACK_TRACE; | ||
251 | stack_trace.nr_entries = 0; | ||
252 | stack_trace.entries = alloc->stack; | ||
253 | /* | ||
254 | * This 4 here skips the 2 function calls that happen for all traced | ||
255 | * allocs due to nvgpu: | ||
256 | * | ||
257 | * __nvgpu_save_kmem_alloc+0x7c/0x128 | ||
258 | * __nvgpu_track_kzalloc+0xcc/0xf8 | ||
259 | * | ||
260 | * And the function calls that get made by the stack trace code itself. | ||
261 | * If the trace savings code changes this will likely have to change | ||
262 | * as well. | ||
263 | */ | ||
264 | stack_trace.skip = 4; | ||
265 | save_stack_trace(&stack_trace); | ||
266 | alloc->stack_length = stack_trace.nr_entries; | ||
267 | #endif | ||
268 | |||
269 | nvgpu_lock_tracker(tracker); | ||
270 | tracker->bytes_alloced += size; | ||
271 | tracker->bytes_alloced_real += real_size; | ||
272 | tracker->nr_allocs++; | ||
273 | |||
274 | /* Keep track of this for building a histogram later on. */ | ||
275 | if (tracker->max_alloc < size) | ||
276 | tracker->max_alloc = size; | ||
277 | if (tracker->min_alloc > size) | ||
278 | tracker->min_alloc = size; | ||
279 | |||
280 | ret = nvgpu_add_alloc(tracker, alloc); | ||
281 | if (ret) { | ||
282 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); | ||
283 | kfree(alloc); | ||
284 | nvgpu_unlock_tracker(tracker); | ||
285 | return ret; | ||
286 | } | ||
287 | nvgpu_unlock_tracker(tracker); | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | ||
293 | u64 addr) | ||
294 | { | ||
295 | struct nvgpu_mem_alloc *alloc; | ||
296 | |||
297 | nvgpu_lock_tracker(tracker); | ||
298 | alloc = nvgpu_rem_alloc(tracker, addr); | ||
299 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { | ||
300 | nvgpu_unlock_tracker(tracker); | ||
301 | return -EINVAL; | ||
302 | } | ||
303 | |||
304 | memset((void *)alloc->addr, 0, alloc->size); | ||
305 | |||
306 | tracker->nr_frees++; | ||
307 | tracker->bytes_freed += alloc->size; | ||
308 | tracker->bytes_freed_real += alloc->real_size; | ||
309 | nvgpu_unlock_tracker(tracker); | ||
310 | |||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | static void __nvgpu_check_valloc_size(unsigned long size) | ||
315 | { | ||
316 | WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size); | ||
317 | } | ||
318 | |||
319 | static void __nvgpu_check_kalloc_size(size_t size) | ||
320 | { | ||
321 | WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size); | ||
322 | } | ||
323 | |||
324 | void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size, | ||
325 | unsigned long ip) | ||
326 | { | ||
327 | void *alloc = vmalloc(size); | ||
328 | |||
329 | if (!alloc) | ||
330 | return NULL; | ||
331 | |||
332 | __nvgpu_check_valloc_size(size); | ||
333 | |||
334 | /* | ||
335 | * Ignore the return message. If this fails let's not cause any issues | ||
336 | * for the rest of the driver. | ||
337 | */ | ||
338 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
339 | (u64)(uintptr_t)alloc, ip); | ||
340 | |||
341 | return alloc; | ||
342 | } | ||
343 | |||
344 | void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size, | ||
345 | unsigned long ip) | ||
346 | { | ||
347 | void *alloc = vzalloc(size); | ||
348 | |||
349 | if (!alloc) | ||
350 | return NULL; | ||
351 | |||
352 | __nvgpu_check_valloc_size(size); | ||
353 | |||
354 | /* | ||
355 | * Ignore the return message. If this fails let's not cause any issues | ||
356 | * for the rest of the driver. | ||
357 | */ | ||
358 | __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size), | ||
359 | (u64)(uintptr_t)alloc, ip); | ||
360 | |||
361 | return alloc; | ||
362 | } | ||
363 | |||
364 | void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
365 | { | ||
366 | void *alloc = kmalloc(size, GFP_KERNEL); | ||
367 | |||
368 | if (!alloc) | ||
369 | return NULL; | ||
370 | |||
371 | __nvgpu_check_kalloc_size(size); | ||
372 | |||
373 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
374 | (u64)(uintptr_t)alloc, ip); | ||
375 | |||
376 | return alloc; | ||
377 | } | ||
378 | |||
379 | void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip) | ||
380 | { | ||
381 | void *alloc = kzalloc(size, GFP_KERNEL); | ||
382 | |||
383 | if (!alloc) | ||
384 | return NULL; | ||
385 | |||
386 | __nvgpu_check_kalloc_size(size); | ||
387 | |||
388 | __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size), | ||
389 | (u64)(uintptr_t)alloc, ip); | ||
390 | |||
391 | return alloc; | ||
392 | } | ||
393 | |||
394 | void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size, | ||
395 | unsigned long ip) | ||
396 | { | ||
397 | void *alloc = kcalloc(n, size, GFP_KERNEL); | ||
398 | |||
399 | if (!alloc) | ||
400 | return NULL; | ||
401 | |||
402 | __nvgpu_check_kalloc_size(n * size); | ||
403 | |||
404 | __nvgpu_save_kmem_alloc(g->kmallocs, n * size, | ||
405 | roundup_pow_of_two(n * size), | ||
406 | (u64)(uintptr_t)alloc, ip); | ||
407 | |||
408 | return alloc; | ||
409 | } | ||
410 | |||
411 | void __nvgpu_track_vfree(struct gk20a *g, void *addr) | ||
412 | { | ||
413 | /* | ||
414 | * Often it is accepted practice to pass NULL pointers into free | ||
415 | * functions to save code. | ||
416 | */ | ||
417 | if (!addr) | ||
418 | return; | ||
419 | |||
420 | __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr); | ||
421 | |||
422 | vfree(addr); | ||
423 | } | ||
424 | |||
425 | void __nvgpu_track_kfree(struct gk20a *g, void *addr) | ||
426 | { | ||
427 | if (!addr) | ||
428 | return; | ||
429 | |||
430 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); | ||
431 | |||
432 | kfree(addr); | ||
433 | } | ||
434 | |||
435 | static int __do_check_for_outstanding_allocs( | ||
436 | struct gk20a *g, | ||
437 | struct nvgpu_mem_alloc_tracker *tracker, | ||
438 | const char *type, bool silent) | ||
439 | { | ||
440 | struct nvgpu_rbtree_node *node; | ||
441 | int count = 0; | ||
442 | |||
443 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
444 | while (node) { | ||
445 | struct nvgpu_mem_alloc *alloc = | ||
446 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
447 | |||
448 | if (!silent) | ||
449 | kmem_print_mem_alloc(g, alloc, NULL); | ||
450 | |||
451 | count++; | ||
452 | nvgpu_rbtree_enum_next(&node, node); | ||
453 | } | ||
454 | |||
455 | return count; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * check_for_outstanding_allocs - Count and display outstanding allocs | ||
460 | * | ||
461 | * @g - The GPU. | ||
462 | * @silent - If set don't print anything about the allocs. | ||
463 | * | ||
464 | * Dump (or just count) the number of allocations left outstanding. | ||
465 | */ | ||
466 | static int check_for_outstanding_allocs(struct gk20a *g, bool silent) | ||
467 | { | ||
468 | int count = 0; | ||
469 | |||
470 | count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc", | ||
471 | silent); | ||
472 | count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc", | ||
473 | silent); | ||
474 | |||
475 | return count; | ||
476 | } | ||
477 | |||
478 | static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker, | ||
479 | void (*force_free_func)(const void *)) | ||
480 | { | ||
481 | struct nvgpu_rbtree_node *node; | ||
482 | |||
483 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
484 | while (node) { | ||
485 | struct nvgpu_mem_alloc *alloc = | ||
486 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
487 | |||
488 | if (force_free_func) | ||
489 | force_free_func((void *)alloc->addr); | ||
490 | |||
491 | nvgpu_rbtree_unlink(node, &tracker->allocs); | ||
492 | kfree(alloc); | ||
493 | |||
494 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
495 | } | ||
496 | } | ||
497 | |||
498 | /** | ||
499 | * nvgpu_kmem_cleanup - Cleanup the kmem tracking | ||
500 | * | ||
501 | * @g - The GPU. | ||
502 | * @force_free - If set will also free leaked objects if possible. | ||
503 | * | ||
504 | * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free | ||
505 | * is non-zero then the allocation made by nvgpu is also freed. This is risky, | ||
506 | * though, as it is possible that the memory is still in use by other parts of | ||
507 | * the GPU driver not aware that this has happened. | ||
508 | * | ||
509 | * In theory it should be fine if the GPU driver has been deinitialized and | ||
510 | * there are no bugs in that code. However, if there are any bugs in that code | ||
511 | * then they could likely manifest as odd crashes indeterminate amounts of time | ||
512 | * in the future. So use @force_free at your own risk. | ||
513 | */ | ||
514 | static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free) | ||
515 | { | ||
516 | do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL); | ||
517 | do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL); | ||
518 | } | ||
519 | |||
520 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
521 | { | ||
522 | int count; | ||
523 | bool silent, force_free; | ||
524 | |||
525 | if (!flags) | ||
526 | return; | ||
527 | |||
528 | silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS); | ||
529 | force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
530 | |||
531 | count = check_for_outstanding_allocs(g, silent); | ||
532 | nvgpu_kmem_cleanup(g, force_free); | ||
533 | |||
534 | /* | ||
535 | * If we leak objects we can either BUG() out or just WARN(). In general | ||
536 | * it doesn't make sense to BUG() on here since leaking a few objects | ||
537 | * won't crash the kernel but it can be helpful for development. | ||
538 | * | ||
539 | * If neither flag is set then we just silently do nothing. | ||
540 | */ | ||
541 | if (count > 0) { | ||
542 | if (flags & NVGPU_KMEM_FINI_WARN) { | ||
543 | WARN(1, "Letting %d allocs leak!!\n", count); | ||
544 | } else if (flags & NVGPU_KMEM_FINI_BUG) { | ||
545 | nvgpu_err(g, "Letting %d allocs leak!!", count); | ||
546 | BUG(); | ||
547 | } | ||
548 | } | ||
549 | } | ||
550 | |||
551 | int nvgpu_kmem_init(struct gk20a *g) | ||
552 | { | ||
553 | int err; | ||
554 | |||
555 | g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL); | ||
556 | g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL); | ||
557 | |||
558 | if (!g->vmallocs || !g->kmallocs) { | ||
559 | err = -ENOMEM; | ||
560 | goto fail; | ||
561 | } | ||
562 | |||
563 | g->vmallocs->name = "vmalloc"; | ||
564 | g->kmallocs->name = "kmalloc"; | ||
565 | |||
566 | g->vmallocs->allocs = NULL; | ||
567 | g->kmallocs->allocs = NULL; | ||
568 | |||
569 | nvgpu_mutex_init(&g->vmallocs->lock); | ||
570 | nvgpu_mutex_init(&g->kmallocs->lock); | ||
571 | |||
572 | g->vmallocs->min_alloc = PAGE_SIZE; | ||
573 | g->kmallocs->min_alloc = KMALLOC_MIN_SIZE; | ||
574 | |||
575 | /* | ||
576 | * This needs to go after all the other initialization since they use | ||
577 | * the nvgpu_kzalloc() API. | ||
578 | */ | ||
579 | g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
580 | sizeof(struct nvgpu_mem_alloc)); | ||
581 | g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g, | ||
582 | sizeof(struct nvgpu_mem_alloc)); | ||
583 | |||
584 | if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) { | ||
585 | err = -ENOMEM; | ||
586 | if (g->vmallocs->allocs_cache) | ||
587 | nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache); | ||
588 | if (g->kmallocs->allocs_cache) | ||
589 | nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache); | ||
590 | goto fail; | ||
591 | } | ||
592 | |||
593 | return 0; | ||
594 | |||
595 | fail: | ||
596 | if (g->vmallocs) | ||
597 | kfree(g->vmallocs); | ||
598 | if (g->kmallocs) | ||
599 | kfree(g->kmallocs); | ||
600 | return err; | ||
601 | } | ||
602 | |||
603 | #else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
604 | |||
605 | int nvgpu_kmem_init(struct gk20a *g) | ||
606 | { | ||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | void nvgpu_kmem_fini(struct gk20a *g, int flags) | ||
611 | { | ||
612 | } | ||
613 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
614 | |||
615 | struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) | ||
616 | { | ||
617 | struct nvgpu_kmem_cache *cache = | ||
618 | nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache)); | ||
619 | |||
620 | if (!cache) | ||
621 | return NULL; | ||
622 | |||
623 | cache->g = g; | ||
624 | |||
625 | snprintf(cache->name, sizeof(cache->name), | ||
626 | "nvgpu-cache-0x%p-%d-%d", g, (int)size, | ||
627 | atomic_inc_return(&kmem_cache_id)); | ||
628 | cache->cache = kmem_cache_create(cache->name, | ||
629 | size, size, 0, NULL); | ||
630 | if (!cache->cache) { | ||
631 | nvgpu_kfree(g, cache); | ||
632 | return NULL; | ||
633 | } | ||
634 | |||
635 | return cache; | ||
636 | } | ||
637 | |||
638 | void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) | ||
639 | { | ||
640 | struct gk20a *g = cache->g; | ||
641 | |||
642 | kmem_cache_destroy(cache->cache); | ||
643 | nvgpu_kfree(g, cache); | ||
644 | } | ||
645 | |||
646 | void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) | ||
647 | { | ||
648 | return kmem_cache_alloc(cache->cache, GFP_KERNEL); | ||
649 | } | ||
650 | |||
651 | void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr) | ||
652 | { | ||
653 | kmem_cache_free(cache->cache, ptr); | ||
654 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/kmem_priv.h b/drivers/gpu/nvgpu/os/linux/kmem_priv.h new file mode 100644 index 00000000..a41762af --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/kmem_priv.h | |||
@@ -0,0 +1,105 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __KMEM_PRIV_H__ | ||
18 | #define __KMEM_PRIV_H__ | ||
19 | |||
20 | #include <nvgpu/rbtree.h> | ||
21 | #include <nvgpu/lock.h> | ||
22 | |||
23 | struct seq_file; | ||
24 | |||
25 | #define __pstat(s, fmt, msg...) \ | ||
26 | do { \ | ||
27 | if (s) \ | ||
28 | seq_printf(s, fmt, ##msg); \ | ||
29 | else \ | ||
30 | pr_info(fmt, ##msg); \ | ||
31 | } while (0) | ||
32 | |||
33 | #define MAX_STACK_TRACE 20 | ||
34 | |||
35 | /* | ||
36 | * Linux specific version of the nvgpu_kmem_cache struct. This type is | ||
37 | * completely opaque to the rest of the driver. | ||
38 | */ | ||
39 | struct nvgpu_kmem_cache { | ||
40 | struct gk20a *g; | ||
41 | struct kmem_cache *cache; | ||
42 | |||
43 | /* | ||
44 | * Memory to hold the kmem_cache unique name. Only necessary on our | ||
45 | * k3.10 kernel when not using the SLUB allocator but it's easier to | ||
46 | * just carry this on to newer kernels. | ||
47 | */ | ||
48 | char name[128]; | ||
49 | }; | ||
50 | |||
51 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | ||
52 | |||
53 | struct nvgpu_mem_alloc { | ||
54 | struct nvgpu_mem_alloc_tracker *owner; | ||
55 | |||
56 | void *ip; | ||
57 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | ||
58 | unsigned long stack[MAX_STACK_TRACE]; | ||
59 | int stack_length; | ||
60 | #endif | ||
61 | |||
62 | u64 addr; | ||
63 | |||
64 | unsigned long size; | ||
65 | unsigned long real_size; | ||
66 | |||
67 | struct nvgpu_rbtree_node allocs_entry; | ||
68 | }; | ||
69 | |||
70 | static inline struct nvgpu_mem_alloc * | ||
71 | nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node) | ||
72 | { | ||
73 | return (struct nvgpu_mem_alloc *) | ||
74 | ((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry)); | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * Linux specific tracking of vmalloc, kmalloc, etc. | ||
79 | */ | ||
80 | struct nvgpu_mem_alloc_tracker { | ||
81 | const char *name; | ||
82 | struct nvgpu_kmem_cache *allocs_cache; | ||
83 | struct nvgpu_rbtree_node *allocs; | ||
84 | struct nvgpu_mutex lock; | ||
85 | |||
86 | u64 bytes_alloced; | ||
87 | u64 bytes_freed; | ||
88 | u64 bytes_alloced_real; | ||
89 | u64 bytes_freed_real; | ||
90 | u64 nr_allocs; | ||
91 | u64 nr_frees; | ||
92 | |||
93 | unsigned long min_alloc; | ||
94 | unsigned long max_alloc; | ||
95 | }; | ||
96 | |||
97 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
98 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker); | ||
99 | |||
100 | void kmem_print_mem_alloc(struct gk20a *g, | ||
101 | struct nvgpu_mem_alloc *alloc, | ||
102 | struct seq_file *s); | ||
103 | #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */ | ||
104 | |||
105 | #endif /* __KMEM_PRIV_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/log.c b/drivers/gpu/nvgpu/os/linux/log.c new file mode 100644 index 00000000..ca29e0f3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/log.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/device.h> | ||
19 | |||
20 | #include <nvgpu/log.h> | ||
21 | |||
22 | #include "gk20a/gk20a.h" | ||
23 | #include "platform_gk20a.h" | ||
24 | #include "os_linux.h" | ||
25 | |||
26 | /* | ||
27 | * Define a length for log buffers. This is the buffer that the 'fmt, ...' part | ||
28 | * of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it | ||
29 | * needs to not be overly sized since we have limited kernel stack space. But at | ||
30 | * the same time we don't want it to be restrictive either. | ||
31 | */ | ||
32 | #define LOG_BUFFER_LENGTH 160 | ||
33 | |||
34 | /* | ||
35 | * Annoying quirk of Linux: this has to be a string literal since the printk() | ||
36 | * function and friends use the preprocessor to concatenate stuff to the start | ||
37 | * of this string when printing. | ||
38 | */ | ||
39 | #define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n" | ||
40 | |||
41 | static const char *log_types[] = { | ||
42 | "ERR", | ||
43 | "WRN", | ||
44 | "DBG", | ||
45 | "INFO", | ||
46 | }; | ||
47 | |||
48 | int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask) | ||
49 | { | ||
50 | return !!(g->log_mask & log_mask); | ||
51 | } | ||
52 | |||
53 | static inline const char *nvgpu_log_name(struct gk20a *g) | ||
54 | { | ||
55 | return dev_name(dev_from_gk20a(g)); | ||
56 | } | ||
57 | |||
58 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
59 | static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name, | ||
60 | const char *func_name, int line, | ||
61 | const char *log_type, const char *log) | ||
62 | { | ||
63 | trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log); | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | static void __nvgpu_really_print_log(u32 trace, const char *gpu_name, | ||
68 | const char *func_name, int line, | ||
69 | enum nvgpu_log_type type, const char *log) | ||
70 | { | ||
71 | const char *name = gpu_name ? gpu_name : ""; | ||
72 | const char *log_type = log_types[type]; | ||
73 | |||
74 | #ifdef CONFIG_GK20A_TRACE_PRINTK | ||
75 | if (trace) | ||
76 | return __nvgpu_trace_printk_log(trace, name, func_name, | ||
77 | line, log_type, log); | ||
78 | #endif | ||
79 | switch (type) { | ||
80 | case NVGPU_DEBUG: | ||
81 | /* | ||
82 | * We could use pr_debug() here but we control debug enablement | ||
83 | * separately from the Linux kernel. Perhaps this is a bug in | ||
84 | * nvgpu. | ||
85 | */ | ||
86 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
87 | break; | ||
88 | case NVGPU_INFO: | ||
89 | pr_info(LOG_FMT, name, func_name, line, log_type, log); | ||
90 | break; | ||
91 | case NVGPU_WARNING: | ||
92 | pr_warn(LOG_FMT, name, func_name, line, log_type, log); | ||
93 | break; | ||
94 | case NVGPU_ERROR: | ||
95 | pr_err(LOG_FMT, name, func_name, line, log_type, log); | ||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | __attribute__((format (printf, 5, 6))) | ||
101 | void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line, | ||
102 | enum nvgpu_log_type type, const char *fmt, ...) | ||
103 | { | ||
104 | char log[LOG_BUFFER_LENGTH]; | ||
105 | va_list args; | ||
106 | |||
107 | va_start(args, fmt); | ||
108 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
109 | va_end(args); | ||
110 | |||
111 | __nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "", | ||
112 | func_name, line, type, log); | ||
113 | } | ||
114 | |||
115 | __attribute__((format (printf, 5, 6))) | ||
116 | void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask, | ||
117 | const char *func_name, int line, | ||
118 | const char *fmt, ...) | ||
119 | { | ||
120 | char log[LOG_BUFFER_LENGTH]; | ||
121 | va_list args; | ||
122 | |||
123 | if ((log_mask & g->log_mask) == 0) | ||
124 | return; | ||
125 | |||
126 | va_start(args, fmt); | ||
127 | vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args); | ||
128 | va_end(args); | ||
129 | |||
130 | __nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g), | ||
131 | func_name, line, NVGPU_DEBUG, log); | ||
132 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c new file mode 100644 index 00000000..af71cc81 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module.c | |||
@@ -0,0 +1,1365 @@ | |||
1 | /* | ||
2 | * GK20A Graphics | ||
3 | * | ||
4 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/module.h> | ||
20 | #include <linux/of.h> | ||
21 | #include <linux/of_device.h> | ||
22 | #include <linux/of_platform.h> | ||
23 | #include <linux/of_address.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/pm_runtime.h> | ||
26 | #include <linux/reset.h> | ||
27 | #include <linux/platform/tegra/common.h> | ||
28 | #include <linux/pci.h> | ||
29 | |||
30 | #include <uapi/linux/nvgpu.h> | ||
31 | #include <dt-bindings/soc/gm20b-fuse.h> | ||
32 | #include <dt-bindings/soc/gp10b-fuse.h> | ||
33 | |||
34 | #include <soc/tegra/fuse.h> | ||
35 | |||
36 | #include <nvgpu/dma.h> | ||
37 | #include <nvgpu/kmem.h> | ||
38 | #include <nvgpu/nvgpu_common.h> | ||
39 | #include <nvgpu/soc.h> | ||
40 | #include <nvgpu/enabled.h> | ||
41 | #include <nvgpu/debug.h> | ||
42 | #include <nvgpu/ctxsw_trace.h> | ||
43 | #include <nvgpu/vidmem.h> | ||
44 | #include <nvgpu/sim.h> | ||
45 | |||
46 | #include "platform_gk20a.h" | ||
47 | #include "sysfs.h" | ||
48 | #include "vgpu/vgpu_linux.h" | ||
49 | #include "scale.h" | ||
50 | #include "pci.h" | ||
51 | #include "module.h" | ||
52 | #include "module_usermode.h" | ||
53 | #include "intr.h" | ||
54 | #include "ioctl.h" | ||
55 | |||
56 | #include "os_linux.h" | ||
57 | #include "ctxsw_trace.h" | ||
58 | #include "driver_common.h" | ||
59 | #include "channel.h" | ||
60 | |||
61 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
62 | #include "cde.h" | ||
63 | #endif | ||
64 | |||
65 | #define CLASS_NAME "nvidia-gpu" | ||
66 | /* TODO: Change to e.g. "nvidia-gpu%s" once we have symlinks in place. */ | ||
67 | |||
68 | #define GK20A_WAIT_FOR_IDLE_MS 2000 | ||
69 | |||
70 | #define CREATE_TRACE_POINTS | ||
71 | #include <trace/events/gk20a.h> | ||
72 | |||
73 | |||
74 | struct device_node *nvgpu_get_node(struct gk20a *g) | ||
75 | { | ||
76 | struct device *dev = dev_from_gk20a(g); | ||
77 | |||
78 | if (dev_is_pci(dev)) { | ||
79 | struct pci_bus *bus = to_pci_dev(dev)->bus; | ||
80 | |||
81 | while (!pci_is_root_bus(bus)) | ||
82 | bus = bus->parent; | ||
83 | |||
84 | return bus->bridge->parent->of_node; | ||
85 | } | ||
86 | |||
87 | return dev->of_node; | ||
88 | } | ||
89 | |||
90 | void gk20a_busy_noresume(struct gk20a *g) | ||
91 | { | ||
92 | pm_runtime_get_noresume(dev_from_gk20a(g)); | ||
93 | } | ||
94 | |||
95 | int gk20a_busy(struct gk20a *g) | ||
96 | { | ||
97 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
98 | int ret = 0; | ||
99 | struct device *dev; | ||
100 | |||
101 | if (!g) | ||
102 | return -ENODEV; | ||
103 | |||
104 | atomic_inc(&g->usage_count.atomic_var); | ||
105 | |||
106 | down_read(&l->busy_lock); | ||
107 | |||
108 | if (!gk20a_can_busy(g)) { | ||
109 | ret = -ENODEV; | ||
110 | atomic_dec(&g->usage_count.atomic_var); | ||
111 | goto fail; | ||
112 | } | ||
113 | |||
114 | dev = dev_from_gk20a(g); | ||
115 | |||
116 | if (pm_runtime_enabled(dev)) { | ||
117 | /* Increment usage count and attempt to resume device */ | ||
118 | ret = pm_runtime_get_sync(dev); | ||
119 | if (ret < 0) { | ||
120 | /* Mark suspended so runtime pm will retry later */ | ||
121 | pm_runtime_set_suspended(dev); | ||
122 | pm_runtime_put_noidle(dev); | ||
123 | atomic_dec(&g->usage_count.atomic_var); | ||
124 | goto fail; | ||
125 | } | ||
126 | } else { | ||
127 | nvgpu_mutex_acquire(&g->poweron_lock); | ||
128 | if (!g->power_on) { | ||
129 | ret = gk20a_gpu_is_virtual(dev) ? | ||
130 | vgpu_pm_finalize_poweron(dev) | ||
131 | : gk20a_pm_finalize_poweron(dev); | ||
132 | if (ret) { | ||
133 | atomic_dec(&g->usage_count.atomic_var); | ||
134 | nvgpu_mutex_release(&g->poweron_lock); | ||
135 | goto fail; | ||
136 | } | ||
137 | } | ||
138 | nvgpu_mutex_release(&g->poweron_lock); | ||
139 | } | ||
140 | |||
141 | fail: | ||
142 | up_read(&l->busy_lock); | ||
143 | |||
144 | return ret < 0 ? ret : 0; | ||
145 | } | ||
146 | |||
147 | void gk20a_idle_nosuspend(struct gk20a *g) | ||
148 | { | ||
149 | pm_runtime_put_noidle(dev_from_gk20a(g)); | ||
150 | } | ||
151 | |||
152 | void gk20a_idle(struct gk20a *g) | ||
153 | { | ||
154 | struct device *dev; | ||
155 | |||
156 | atomic_dec(&g->usage_count.atomic_var); | ||
157 | |||
158 | dev = dev_from_gk20a(g); | ||
159 | |||
160 | if (!(dev && gk20a_can_busy(g))) | ||
161 | return; | ||
162 | |||
163 | if (pm_runtime_enabled(dev)) { | ||
164 | pm_runtime_mark_last_busy(dev); | ||
165 | pm_runtime_put_sync_autosuspend(dev); | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Undoes gk20a_lockout_registers(). | ||
171 | */ | ||
172 | static int gk20a_restore_registers(struct gk20a *g) | ||
173 | { | ||
174 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
175 | |||
176 | l->regs = l->regs_saved; | ||
177 | l->bar1 = l->bar1_saved; | ||
178 | |||
179 | nvgpu_restore_usermode_registers(g); | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int nvgpu_init_os_linux_ops(struct nvgpu_os_linux *l) | ||
185 | { | ||
186 | int err = 0; | ||
187 | |||
188 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
189 | err = nvgpu_cde_init_ops(l); | ||
190 | #endif | ||
191 | |||
192 | return err; | ||
193 | } | ||
194 | |||
195 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l) | ||
196 | { | ||
197 | struct gk20a *g = &l->g; | ||
198 | int err; | ||
199 | |||
200 | if (l->init_done) | ||
201 | return 0; | ||
202 | |||
203 | err = nvgpu_init_channel_support_linux(l); | ||
204 | if (err) { | ||
205 | nvgpu_err(g, "failed to init linux channel support"); | ||
206 | return err; | ||
207 | } | ||
208 | |||
209 | l->init_done = true; | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | int gk20a_pm_finalize_poweron(struct device *dev) | ||
215 | { | ||
216 | struct gk20a *g = get_gk20a(dev); | ||
217 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
218 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
219 | int err; | ||
220 | |||
221 | nvgpu_log_fn(g, " "); | ||
222 | |||
223 | if (g->power_on) | ||
224 | return 0; | ||
225 | |||
226 | trace_gk20a_finalize_poweron(dev_name(dev)); | ||
227 | |||
228 | /* Increment platform power refcount */ | ||
229 | if (platform->busy) { | ||
230 | err = platform->busy(dev); | ||
231 | if (err < 0) { | ||
232 | nvgpu_err(g, "failed to poweron platform dependency"); | ||
233 | return err; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | err = gk20a_restore_registers(g); | ||
238 | if (err) | ||
239 | return err; | ||
240 | |||
241 | /* Enable interrupt workqueue */ | ||
242 | if (!l->nonstall_work_queue) { | ||
243 | l->nonstall_work_queue = alloc_workqueue("%s", | ||
244 | WQ_HIGHPRI, 1, "mc_nonstall"); | ||
245 | INIT_WORK(&l->nonstall_fn_work, nvgpu_intr_nonstall_cb); | ||
246 | } | ||
247 | |||
248 | err = gk20a_detect_chip(g); | ||
249 | if (err) | ||
250 | return err; | ||
251 | |||
252 | if (g->sim) { | ||
253 | if (g->sim->sim_init_late) | ||
254 | g->sim->sim_init_late(g); | ||
255 | } | ||
256 | |||
257 | err = gk20a_finalize_poweron(g); | ||
258 | if (err) | ||
259 | goto done; | ||
260 | |||
261 | err = nvgpu_finalize_poweron_linux(l); | ||
262 | if (err) | ||
263 | goto done; | ||
264 | |||
265 | nvgpu_init_mm_ce_context(g); | ||
266 | |||
267 | nvgpu_vidmem_thread_unpause(&g->mm); | ||
268 | |||
269 | /* Initialise scaling: it will initialize scaling drive only once */ | ||
270 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ) && | ||
271 | nvgpu_platform_is_silicon(g)) { | ||
272 | gk20a_scale_init(dev); | ||
273 | if (platform->initscale) | ||
274 | platform->initscale(dev); | ||
275 | } | ||
276 | |||
277 | trace_gk20a_finalize_poweron_done(dev_name(dev)); | ||
278 | |||
279 | err = nvgpu_init_os_linux_ops(l); | ||
280 | if (err) | ||
281 | goto done; | ||
282 | |||
283 | enable_irq(g->irq_stall); | ||
284 | if (g->irq_stall != g->irq_nonstall) | ||
285 | enable_irq(g->irq_nonstall); | ||
286 | g->irqs_enabled = 1; | ||
287 | |||
288 | gk20a_scale_resume(dev_from_gk20a(g)); | ||
289 | |||
290 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
291 | if (platform->has_cde) | ||
292 | gk20a_init_cde_support(l); | ||
293 | #endif | ||
294 | |||
295 | err = gk20a_sched_ctrl_init(g); | ||
296 | if (err) { | ||
297 | nvgpu_err(g, "failed to init sched control"); | ||
298 | return err; | ||
299 | } | ||
300 | |||
301 | g->sw_ready = true; | ||
302 | |||
303 | done: | ||
304 | if (err) | ||
305 | g->power_on = false; | ||
306 | |||
307 | return err; | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
312 | * thse registers after the GPU has been clock or power gated. This should help | ||
313 | * find annoying bugs where register reads and writes are silently dropped | ||
314 | * after the GPU has been turned off. On older chips these reads and writes can | ||
315 | * also lock the entire CPU up. | ||
316 | */ | ||
317 | static int gk20a_lockout_registers(struct gk20a *g) | ||
318 | { | ||
319 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
320 | |||
321 | l->regs = NULL; | ||
322 | l->bar1 = NULL; | ||
323 | |||
324 | nvgpu_lockout_usermode_registers(g); | ||
325 | |||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | static int gk20a_pm_prepare_poweroff(struct device *dev) | ||
330 | { | ||
331 | struct gk20a *g = get_gk20a(dev); | ||
332 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
333 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
334 | #endif | ||
335 | int ret = 0; | ||
336 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
337 | bool irqs_enabled; | ||
338 | |||
339 | nvgpu_log_fn(g, " "); | ||
340 | |||
341 | nvgpu_mutex_acquire(&g->poweroff_lock); | ||
342 | |||
343 | if (!g->power_on) | ||
344 | goto done; | ||
345 | |||
346 | /* disable IRQs and wait for completion */ | ||
347 | irqs_enabled = g->irqs_enabled; | ||
348 | if (irqs_enabled) { | ||
349 | disable_irq(g->irq_stall); | ||
350 | if (g->irq_stall != g->irq_nonstall) | ||
351 | disable_irq(g->irq_nonstall); | ||
352 | g->irqs_enabled = 0; | ||
353 | } | ||
354 | |||
355 | gk20a_scale_suspend(dev); | ||
356 | |||
357 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
358 | gk20a_cde_suspend(l); | ||
359 | #endif | ||
360 | |||
361 | ret = gk20a_prepare_poweroff(g); | ||
362 | if (ret) | ||
363 | goto error; | ||
364 | |||
365 | /* Decrement platform power refcount */ | ||
366 | if (platform->idle) | ||
367 | platform->idle(dev); | ||
368 | |||
369 | /* Stop CPU from accessing the GPU registers. */ | ||
370 | gk20a_lockout_registers(g); | ||
371 | |||
372 | nvgpu_mutex_release(&g->poweroff_lock); | ||
373 | return 0; | ||
374 | |||
375 | error: | ||
376 | /* re-enabled IRQs if previously enabled */ | ||
377 | if (irqs_enabled) { | ||
378 | enable_irq(g->irq_stall); | ||
379 | if (g->irq_stall != g->irq_nonstall) | ||
380 | enable_irq(g->irq_nonstall); | ||
381 | g->irqs_enabled = 1; | ||
382 | } | ||
383 | |||
384 | gk20a_scale_resume(dev); | ||
385 | done: | ||
386 | nvgpu_mutex_release(&g->poweroff_lock); | ||
387 | |||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static struct of_device_id tegra_gk20a_of_match[] = { | ||
392 | #ifdef CONFIG_TEGRA_GK20A | ||
393 | { .compatible = "nvidia,tegra210-gm20b", | ||
394 | .data = &gm20b_tegra_platform }, | ||
395 | { .compatible = "nvidia,tegra186-gp10b", | ||
396 | .data = &gp10b_tegra_platform }, | ||
397 | { .compatible = "nvidia,gv11b", | ||
398 | .data = &gv11b_tegra_platform }, | ||
399 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
400 | { .compatible = "nvidia,gv11b-vgpu", | ||
401 | .data = &gv11b_vgpu_tegra_platform}, | ||
402 | #endif | ||
403 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
404 | { .compatible = "nvidia,tegra124-gk20a-vgpu", | ||
405 | .data = &vgpu_tegra_platform }, | ||
406 | #endif | ||
407 | #endif | ||
408 | |||
409 | { }, | ||
410 | }; | ||
411 | |||
412 | #ifdef CONFIG_PM | ||
413 | /** | ||
414 | * __gk20a_do_idle() - force the GPU to idle and railgate | ||
415 | * | ||
416 | * In success, this call MUST be balanced by caller with __gk20a_do_unidle() | ||
417 | * | ||
418 | * Acquires two locks : &l->busy_lock and &platform->railgate_lock | ||
419 | * In success, we hold these locks and return | ||
420 | * In failure, we release these locks and return | ||
421 | */ | ||
422 | int __gk20a_do_idle(struct gk20a *g, bool force_reset) | ||
423 | { | ||
424 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
425 | struct device *dev = dev_from_gk20a(g); | ||
426 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
427 | struct nvgpu_timeout timeout; | ||
428 | int ref_cnt; | ||
429 | int target_ref_cnt = 0; | ||
430 | bool is_railgated; | ||
431 | int err = 0; | ||
432 | |||
433 | /* | ||
434 | * Hold back deterministic submits and changes to deterministic | ||
435 | * channels - this must be outside the power busy locks. | ||
436 | */ | ||
437 | gk20a_channel_deterministic_idle(g); | ||
438 | |||
439 | /* acquire busy lock to block other busy() calls */ | ||
440 | down_write(&l->busy_lock); | ||
441 | |||
442 | /* acquire railgate lock to prevent unrailgate in midst of do_idle() */ | ||
443 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
444 | |||
445 | /* check if it is already railgated ? */ | ||
446 | if (platform->is_railgated(dev)) | ||
447 | return 0; | ||
448 | |||
449 | /* | ||
450 | * release railgate_lock, prevent suspend by incrementing usage counter, | ||
451 | * re-acquire railgate_lock | ||
452 | */ | ||
453 | nvgpu_mutex_release(&platform->railgate_lock); | ||
454 | pm_runtime_get_sync(dev); | ||
455 | |||
456 | /* | ||
457 | * One refcount taken in this API | ||
458 | * If User disables rail gating, we take one more | ||
459 | * extra refcount | ||
460 | */ | ||
461 | if (g->can_railgate) | ||
462 | target_ref_cnt = 1; | ||
463 | else | ||
464 | target_ref_cnt = 2; | ||
465 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
466 | |||
467 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
468 | NVGPU_TIMER_CPU_TIMER); | ||
469 | |||
470 | /* check and wait until GPU is idle (with a timeout) */ | ||
471 | do { | ||
472 | nvgpu_usleep_range(1000, 1100); | ||
473 | ref_cnt = atomic_read(&dev->power.usage_count); | ||
474 | } while (ref_cnt != target_ref_cnt && !nvgpu_timeout_expired(&timeout)); | ||
475 | |||
476 | if (ref_cnt != target_ref_cnt) { | ||
477 | nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", | ||
478 | ref_cnt); | ||
479 | goto fail_drop_usage_count; | ||
480 | } | ||
481 | |||
482 | /* check if global force_reset flag is set */ | ||
483 | force_reset |= platform->force_reset_in_do_idle; | ||
484 | |||
485 | nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, | ||
486 | NVGPU_TIMER_CPU_TIMER); | ||
487 | |||
488 | if (g->can_railgate && !force_reset) { | ||
489 | /* | ||
490 | * Case 1 : GPU railgate is supported | ||
491 | * | ||
492 | * if GPU is now idle, we will have only one ref count, | ||
493 | * drop this ref which will rail gate the GPU | ||
494 | */ | ||
495 | pm_runtime_put_sync(dev); | ||
496 | |||
497 | /* add sufficient delay to allow GPU to rail gate */ | ||
498 | nvgpu_msleep(g->railgate_delay); | ||
499 | |||
500 | /* check in loop if GPU is railgated or not */ | ||
501 | do { | ||
502 | nvgpu_usleep_range(1000, 1100); | ||
503 | is_railgated = platform->is_railgated(dev); | ||
504 | } while (!is_railgated && !nvgpu_timeout_expired(&timeout)); | ||
505 | |||
506 | if (is_railgated) { | ||
507 | return 0; | ||
508 | } else { | ||
509 | nvgpu_err(g, "failed to idle in timeout"); | ||
510 | goto fail_timeout; | ||
511 | } | ||
512 | } else { | ||
513 | /* | ||
514 | * Case 2 : GPU railgate is not supported or we explicitly | ||
515 | * do not want to depend on runtime PM | ||
516 | * | ||
517 | * if GPU is now idle, call prepare_poweroff() to save the | ||
518 | * state and then do explicit railgate | ||
519 | * | ||
520 | * __gk20a_do_unidle() needs to unrailgate, call | ||
521 | * finalize_poweron(), and then call pm_runtime_put_sync() | ||
522 | * to balance the GPU usage counter | ||
523 | */ | ||
524 | |||
525 | /* Save the GPU state */ | ||
526 | err = gk20a_pm_prepare_poweroff(dev); | ||
527 | if (err) | ||
528 | goto fail_drop_usage_count; | ||
529 | |||
530 | /* railgate GPU */ | ||
531 | platform->railgate(dev); | ||
532 | |||
533 | nvgpu_udelay(10); | ||
534 | |||
535 | g->forced_reset = true; | ||
536 | return 0; | ||
537 | } | ||
538 | |||
539 | fail_drop_usage_count: | ||
540 | pm_runtime_put_noidle(dev); | ||
541 | fail_timeout: | ||
542 | nvgpu_mutex_release(&platform->railgate_lock); | ||
543 | up_write(&l->busy_lock); | ||
544 | gk20a_channel_deterministic_unidle(g); | ||
545 | return -EBUSY; | ||
546 | } | ||
547 | |||
548 | /** | ||
549 | * gk20a_do_idle() - wrap up for __gk20a_do_idle() to be called | ||
550 | * from outside of GPU driver | ||
551 | * | ||
552 | * In success, this call MUST be balanced by caller with gk20a_do_unidle() | ||
553 | */ | ||
554 | static int gk20a_do_idle(void *_g) | ||
555 | { | ||
556 | struct gk20a *g = (struct gk20a *)_g; | ||
557 | |||
558 | return __gk20a_do_idle(g, true); | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * __gk20a_do_unidle() - unblock all the tasks blocked by __gk20a_do_idle() | ||
563 | */ | ||
564 | int __gk20a_do_unidle(struct gk20a *g) | ||
565 | { | ||
566 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
567 | struct device *dev = dev_from_gk20a(g); | ||
568 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
569 | int err; | ||
570 | |||
571 | if (g->forced_reset) { | ||
572 | /* | ||
573 | * If we did a forced-reset/railgate | ||
574 | * then unrailgate the GPU here first | ||
575 | */ | ||
576 | platform->unrailgate(dev); | ||
577 | |||
578 | /* restore the GPU state */ | ||
579 | err = gk20a_pm_finalize_poweron(dev); | ||
580 | if (err) | ||
581 | return err; | ||
582 | |||
583 | /* balance GPU usage counter */ | ||
584 | pm_runtime_put_sync(dev); | ||
585 | |||
586 | g->forced_reset = false; | ||
587 | } | ||
588 | |||
589 | /* release the lock and open up all other busy() calls */ | ||
590 | nvgpu_mutex_release(&platform->railgate_lock); | ||
591 | up_write(&l->busy_lock); | ||
592 | |||
593 | gk20a_channel_deterministic_unidle(g); | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * gk20a_do_unidle() - wrap up for __gk20a_do_unidle() | ||
600 | */ | ||
601 | static int gk20a_do_unidle(void *_g) | ||
602 | { | ||
603 | struct gk20a *g = (struct gk20a *)_g; | ||
604 | |||
605 | return __gk20a_do_unidle(g); | ||
606 | } | ||
607 | #endif | ||
608 | |||
609 | void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, | ||
610 | struct resource **out) | ||
611 | { | ||
612 | struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, i); | ||
613 | |||
614 | if (!r) | ||
615 | return NULL; | ||
616 | if (out) | ||
617 | *out = r; | ||
618 | return devm_ioremap_resource(&dev->dev, r); | ||
619 | } | ||
620 | |||
621 | static irqreturn_t gk20a_intr_isr_stall(int irq, void *dev_id) | ||
622 | { | ||
623 | struct gk20a *g = dev_id; | ||
624 | |||
625 | return nvgpu_intr_stall(g); | ||
626 | } | ||
627 | |||
628 | static irqreturn_t gk20a_intr_isr_nonstall(int irq, void *dev_id) | ||
629 | { | ||
630 | struct gk20a *g = dev_id; | ||
631 | |||
632 | return nvgpu_intr_nonstall(g); | ||
633 | } | ||
634 | |||
635 | static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id) | ||
636 | { | ||
637 | struct gk20a *g = dev_id; | ||
638 | |||
639 | return nvgpu_intr_thread_stall(g); | ||
640 | } | ||
641 | |||
642 | void gk20a_remove_support(struct gk20a *g) | ||
643 | { | ||
644 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
645 | struct sim_nvgpu_linux *sim_linux; | ||
646 | |||
647 | tegra_unregister_idle_unidle(gk20a_do_idle); | ||
648 | |||
649 | nvgpu_kfree(g, g->dbg_regops_tmp_buf); | ||
650 | |||
651 | nvgpu_remove_channel_support_linux(l); | ||
652 | |||
653 | if (g->pmu.remove_support) | ||
654 | g->pmu.remove_support(&g->pmu); | ||
655 | |||
656 | if (g->gr.remove_support) | ||
657 | g->gr.remove_support(&g->gr); | ||
658 | |||
659 | if (g->mm.remove_ce_support) | ||
660 | g->mm.remove_ce_support(&g->mm); | ||
661 | |||
662 | if (g->fifo.remove_support) | ||
663 | g->fifo.remove_support(&g->fifo); | ||
664 | |||
665 | if (g->mm.remove_support) | ||
666 | g->mm.remove_support(&g->mm); | ||
667 | |||
668 | if (g->sim) { | ||
669 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
670 | if (g->sim->remove_support) | ||
671 | g->sim->remove_support(g); | ||
672 | if (sim_linux->remove_support_linux) | ||
673 | sim_linux->remove_support_linux(g); | ||
674 | } | ||
675 | |||
676 | /* free mappings to registers, etc */ | ||
677 | if (l->regs) { | ||
678 | iounmap(l->regs); | ||
679 | l->regs = NULL; | ||
680 | } | ||
681 | if (l->bar1) { | ||
682 | iounmap(l->bar1); | ||
683 | l->bar1 = NULL; | ||
684 | } | ||
685 | |||
686 | nvgpu_remove_usermode_support(g); | ||
687 | |||
688 | nvgpu_free_enabled_flags(g); | ||
689 | } | ||
690 | |||
691 | static int gk20a_init_support(struct platform_device *dev) | ||
692 | { | ||
693 | int err = -ENOMEM; | ||
694 | struct gk20a *g = get_gk20a(&dev->dev); | ||
695 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
696 | |||
697 | tegra_register_idle_unidle(gk20a_do_idle, gk20a_do_unidle, g); | ||
698 | |||
699 | l->regs = nvgpu_ioremap_resource(dev, GK20A_BAR0_IORESOURCE_MEM, | ||
700 | &l->reg_mem); | ||
701 | if (IS_ERR(l->regs)) { | ||
702 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
703 | err = PTR_ERR(l->regs); | ||
704 | goto fail; | ||
705 | } | ||
706 | |||
707 | l->bar1 = nvgpu_ioremap_resource(dev, GK20A_BAR1_IORESOURCE_MEM, | ||
708 | &l->bar1_mem); | ||
709 | if (IS_ERR(l->bar1)) { | ||
710 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
711 | err = PTR_ERR(l->bar1); | ||
712 | goto fail; | ||
713 | } | ||
714 | |||
715 | err = nvgpu_init_sim_support_linux(g, dev); | ||
716 | if (err) | ||
717 | goto fail; | ||
718 | err = nvgpu_init_sim_support(g); | ||
719 | if (err) | ||
720 | goto fail_sim; | ||
721 | |||
722 | nvgpu_init_usermode_support(g); | ||
723 | return 0; | ||
724 | |||
725 | fail_sim: | ||
726 | nvgpu_remove_sim_support_linux(g); | ||
727 | fail: | ||
728 | if (l->regs) { | ||
729 | iounmap(l->regs); | ||
730 | l->regs = NULL; | ||
731 | } | ||
732 | if (l->bar1) { | ||
733 | iounmap(l->bar1); | ||
734 | l->bar1 = NULL; | ||
735 | } | ||
736 | |||
737 | return err; | ||
738 | } | ||
739 | |||
740 | static int gk20a_pm_railgate(struct device *dev) | ||
741 | { | ||
742 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
743 | int ret = 0; | ||
744 | struct gk20a *g = get_gk20a(dev); | ||
745 | |||
746 | /* if platform is already railgated, then just return */ | ||
747 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
748 | return ret; | ||
749 | |||
750 | #ifdef CONFIG_DEBUG_FS | ||
751 | g->pstats.last_rail_gate_start = jiffies; | ||
752 | |||
753 | if (g->pstats.railgating_cycle_count >= 1) | ||
754 | g->pstats.total_rail_ungate_time_ms = | ||
755 | g->pstats.total_rail_ungate_time_ms + | ||
756 | jiffies_to_msecs(g->pstats.last_rail_gate_start - | ||
757 | g->pstats.last_rail_ungate_complete); | ||
758 | #endif | ||
759 | |||
760 | if (platform->railgate) | ||
761 | ret = platform->railgate(dev); | ||
762 | if (ret) { | ||
763 | nvgpu_err(g, "failed to railgate platform, err=%d", ret); | ||
764 | return ret; | ||
765 | } | ||
766 | |||
767 | #ifdef CONFIG_DEBUG_FS | ||
768 | g->pstats.last_rail_gate_complete = jiffies; | ||
769 | #endif | ||
770 | ret = tegra_fuse_clock_disable(); | ||
771 | if (ret) | ||
772 | nvgpu_err(g, "failed to disable tegra fuse clock, err=%d", ret); | ||
773 | |||
774 | return ret; | ||
775 | } | ||
776 | |||
777 | static int gk20a_pm_unrailgate(struct device *dev) | ||
778 | { | ||
779 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
780 | int ret = 0; | ||
781 | struct gk20a *g = get_gk20a(dev); | ||
782 | |||
783 | ret = tegra_fuse_clock_enable(); | ||
784 | if (ret) { | ||
785 | nvgpu_err(g, "failed to enable tegra fuse clock, err=%d", ret); | ||
786 | return ret; | ||
787 | } | ||
788 | #ifdef CONFIG_DEBUG_FS | ||
789 | g->pstats.last_rail_ungate_start = jiffies; | ||
790 | if (g->pstats.railgating_cycle_count >= 1) | ||
791 | g->pstats.total_rail_gate_time_ms = | ||
792 | g->pstats.total_rail_gate_time_ms + | ||
793 | jiffies_to_msecs(g->pstats.last_rail_ungate_start - | ||
794 | g->pstats.last_rail_gate_complete); | ||
795 | |||
796 | g->pstats.railgating_cycle_count++; | ||
797 | #endif | ||
798 | |||
799 | trace_gk20a_pm_unrailgate(dev_name(dev)); | ||
800 | |||
801 | if (platform->unrailgate) { | ||
802 | nvgpu_mutex_acquire(&platform->railgate_lock); | ||
803 | ret = platform->unrailgate(dev); | ||
804 | nvgpu_mutex_release(&platform->railgate_lock); | ||
805 | } | ||
806 | |||
807 | #ifdef CONFIG_DEBUG_FS | ||
808 | g->pstats.last_rail_ungate_complete = jiffies; | ||
809 | #endif | ||
810 | |||
811 | return ret; | ||
812 | } | ||
813 | |||
814 | /* | ||
815 | * Remove association of the driver with OS interrupt handler | ||
816 | */ | ||
817 | void nvgpu_free_irq(struct gk20a *g) | ||
818 | { | ||
819 | struct device *dev = dev_from_gk20a(g); | ||
820 | |||
821 | devm_free_irq(dev, g->irq_stall, g); | ||
822 | if (g->irq_stall != g->irq_nonstall) | ||
823 | devm_free_irq(dev, g->irq_nonstall, g); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * Idle the GPU in preparation of shutdown/remove. | ||
828 | * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW | ||
829 | * state to prevent further activity on the driver SW side. | ||
830 | * On driver removal quiesce() should be called after start_unload() | ||
831 | */ | ||
832 | int nvgpu_quiesce(struct gk20a *g) | ||
833 | { | ||
834 | int err; | ||
835 | struct device *dev = dev_from_gk20a(g); | ||
836 | |||
837 | if (g->power_on) { | ||
838 | err = gk20a_wait_for_idle(g); | ||
839 | if (err) { | ||
840 | nvgpu_err(g, "failed to idle GPU, err=%d", err); | ||
841 | return err; | ||
842 | } | ||
843 | |||
844 | err = gk20a_fifo_disable_all_engine_activity(g, true); | ||
845 | if (err) { | ||
846 | nvgpu_err(g, | ||
847 | "failed to disable engine activity, err=%d", | ||
848 | err); | ||
849 | return err; | ||
850 | } | ||
851 | |||
852 | err = gk20a_fifo_wait_engine_idle(g); | ||
853 | if (err) { | ||
854 | nvgpu_err(g, "failed to idle engines, err=%d", | ||
855 | err); | ||
856 | return err; | ||
857 | } | ||
858 | } | ||
859 | |||
860 | if (gk20a_gpu_is_virtual(dev)) | ||
861 | err = vgpu_pm_prepare_poweroff(dev); | ||
862 | else | ||
863 | err = gk20a_pm_prepare_poweroff(dev); | ||
864 | |||
865 | if (err) | ||
866 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", | ||
867 | err); | ||
868 | |||
869 | return err; | ||
870 | } | ||
871 | |||
872 | static void gk20a_pm_shutdown(struct platform_device *pdev) | ||
873 | { | ||
874 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
875 | struct gk20a *g = platform->g; | ||
876 | int err; | ||
877 | |||
878 | nvgpu_info(g, "shutting down"); | ||
879 | |||
880 | /* vgpu has nothing to clean up currently */ | ||
881 | if (gk20a_gpu_is_virtual(&pdev->dev)) | ||
882 | return; | ||
883 | |||
884 | if (!g->power_on) | ||
885 | goto finish; | ||
886 | |||
887 | gk20a_driver_start_unload(g); | ||
888 | |||
889 | /* If GPU is already railgated, | ||
890 | * just prevent more requests, and return */ | ||
891 | if (platform->is_railgated && platform->is_railgated(&pdev->dev)) { | ||
892 | __pm_runtime_disable(&pdev->dev, false); | ||
893 | nvgpu_info(g, "already railgated, shut down complete"); | ||
894 | return; | ||
895 | } | ||
896 | |||
897 | /* Prevent more requests by disabling Runtime PM */ | ||
898 | __pm_runtime_disable(&pdev->dev, false); | ||
899 | |||
900 | err = nvgpu_quiesce(g); | ||
901 | if (err) | ||
902 | goto finish; | ||
903 | |||
904 | err = gk20a_pm_railgate(&pdev->dev); | ||
905 | if (err) | ||
906 | nvgpu_err(g, "failed to railgate, err=%d", err); | ||
907 | |||
908 | finish: | ||
909 | nvgpu_info(g, "shut down complete"); | ||
910 | } | ||
911 | |||
912 | #ifdef CONFIG_PM | ||
913 | static int gk20a_pm_runtime_resume(struct device *dev) | ||
914 | { | ||
915 | int err = 0; | ||
916 | |||
917 | err = gk20a_pm_unrailgate(dev); | ||
918 | if (err) | ||
919 | goto fail; | ||
920 | |||
921 | if (gk20a_gpu_is_virtual(dev)) | ||
922 | err = vgpu_pm_finalize_poweron(dev); | ||
923 | else | ||
924 | err = gk20a_pm_finalize_poweron(dev); | ||
925 | if (err) | ||
926 | goto fail_poweron; | ||
927 | |||
928 | return 0; | ||
929 | |||
930 | fail_poweron: | ||
931 | gk20a_pm_railgate(dev); | ||
932 | fail: | ||
933 | return err; | ||
934 | } | ||
935 | |||
936 | static int gk20a_pm_runtime_suspend(struct device *dev) | ||
937 | { | ||
938 | int err = 0; | ||
939 | struct gk20a *g = get_gk20a(dev); | ||
940 | |||
941 | if (gk20a_gpu_is_virtual(dev)) | ||
942 | err = vgpu_pm_prepare_poweroff(dev); | ||
943 | else | ||
944 | err = gk20a_pm_prepare_poweroff(dev); | ||
945 | if (err) { | ||
946 | nvgpu_err(g, "failed to power off, err=%d", err); | ||
947 | goto fail; | ||
948 | } | ||
949 | |||
950 | err = gk20a_pm_railgate(dev); | ||
951 | if (err) | ||
952 | goto fail; | ||
953 | |||
954 | return 0; | ||
955 | |||
956 | fail: | ||
957 | gk20a_pm_finalize_poweron(dev); | ||
958 | pm_runtime_mark_last_busy(dev); | ||
959 | return err; | ||
960 | } | ||
961 | |||
962 | static int gk20a_pm_suspend(struct device *dev) | ||
963 | { | ||
964 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
965 | struct gk20a *g = get_gk20a(dev); | ||
966 | int ret = 0; | ||
967 | int idle_usage_count = 0; | ||
968 | |||
969 | if (!g->power_on) { | ||
970 | if (!pm_runtime_enabled(dev)) | ||
971 | gk20a_pm_railgate(dev); | ||
972 | return 0; | ||
973 | } | ||
974 | |||
975 | if (nvgpu_atomic_read(&g->usage_count) > idle_usage_count) | ||
976 | return -EBUSY; | ||
977 | |||
978 | ret = gk20a_pm_runtime_suspend(dev); | ||
979 | if (ret) | ||
980 | return ret; | ||
981 | |||
982 | if (platform->suspend) | ||
983 | platform->suspend(dev); | ||
984 | |||
985 | g->suspended = true; | ||
986 | |||
987 | return 0; | ||
988 | } | ||
989 | |||
990 | static int gk20a_pm_resume(struct device *dev) | ||
991 | { | ||
992 | struct gk20a *g = get_gk20a(dev); | ||
993 | int ret = 0; | ||
994 | |||
995 | if (!g->suspended) { | ||
996 | if (!pm_runtime_enabled(dev)) | ||
997 | gk20a_pm_unrailgate(dev); | ||
998 | return 0; | ||
999 | } | ||
1000 | |||
1001 | ret = gk20a_pm_runtime_resume(dev); | ||
1002 | |||
1003 | g->suspended = false; | ||
1004 | |||
1005 | return ret; | ||
1006 | } | ||
1007 | |||
1008 | static const struct dev_pm_ops gk20a_pm_ops = { | ||
1009 | .runtime_resume = gk20a_pm_runtime_resume, | ||
1010 | .runtime_suspend = gk20a_pm_runtime_suspend, | ||
1011 | .resume = gk20a_pm_resume, | ||
1012 | .suspend = gk20a_pm_suspend, | ||
1013 | }; | ||
1014 | #endif | ||
1015 | |||
1016 | static int gk20a_pm_init(struct device *dev) | ||
1017 | { | ||
1018 | struct gk20a *g = get_gk20a(dev); | ||
1019 | int err = 0; | ||
1020 | |||
1021 | nvgpu_log_fn(g, " "); | ||
1022 | |||
1023 | /* | ||
1024 | * Initialise pm runtime. For railgate disable | ||
1025 | * case, set autosuspend delay to negative which | ||
1026 | * will suspend runtime pm | ||
1027 | */ | ||
1028 | if (g->railgate_delay && g->can_railgate) | ||
1029 | pm_runtime_set_autosuspend_delay(dev, | ||
1030 | g->railgate_delay); | ||
1031 | else | ||
1032 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
1033 | |||
1034 | pm_runtime_use_autosuspend(dev); | ||
1035 | pm_runtime_enable(dev); | ||
1036 | |||
1037 | return err; | ||
1038 | } | ||
1039 | |||
1040 | /* | ||
1041 | * Start the process for unloading the driver. Set NVGPU_DRIVER_IS_DYING. | ||
1042 | */ | ||
1043 | void gk20a_driver_start_unload(struct gk20a *g) | ||
1044 | { | ||
1045 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1046 | |||
1047 | nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n"); | ||
1048 | |||
1049 | down_write(&l->busy_lock); | ||
1050 | __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); | ||
1051 | /* GR SW ready needs to be invalidated at this time with the busy lock | ||
1052 | * held to prevent a racing condition on the gr/mm code */ | ||
1053 | g->gr.sw_ready = false; | ||
1054 | g->sw_ready = false; | ||
1055 | up_write(&l->busy_lock); | ||
1056 | |||
1057 | if (g->is_virtual) | ||
1058 | return; | ||
1059 | |||
1060 | gk20a_wait_for_idle(g); | ||
1061 | |||
1062 | nvgpu_wait_for_deferred_interrupts(g); | ||
1063 | |||
1064 | if (l->nonstall_work_queue) { | ||
1065 | cancel_work_sync(&l->nonstall_fn_work); | ||
1066 | destroy_workqueue(l->nonstall_work_queue); | ||
1067 | l->nonstall_work_queue = NULL; | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | static inline void set_gk20a(struct platform_device *pdev, struct gk20a *gk20a) | ||
1072 | { | ||
1073 | gk20a_get_platform(&pdev->dev)->g = gk20a; | ||
1074 | } | ||
1075 | |||
1076 | static int nvgpu_read_fuse_overrides(struct gk20a *g) | ||
1077 | { | ||
1078 | struct device_node *np = nvgpu_get_node(g); | ||
1079 | u32 *fuses; | ||
1080 | int count, i; | ||
1081 | |||
1082 | if (!np) /* may be pcie device */ | ||
1083 | return 0; | ||
1084 | |||
1085 | count = of_property_count_elems_of_size(np, "fuse-overrides", 8); | ||
1086 | if (count <= 0) | ||
1087 | return count; | ||
1088 | |||
1089 | fuses = nvgpu_kmalloc(g, sizeof(u32) * count * 2); | ||
1090 | if (!fuses) | ||
1091 | return -ENOMEM; | ||
1092 | of_property_read_u32_array(np, "fuse-overrides", fuses, count * 2); | ||
1093 | for (i = 0; i < count; i++) { | ||
1094 | u32 fuse, value; | ||
1095 | |||
1096 | fuse = fuses[2 * i]; | ||
1097 | value = fuses[2 * i + 1]; | ||
1098 | switch (fuse) { | ||
1099 | case GM20B_FUSE_OPT_TPC_DISABLE: | ||
1100 | g->tpc_fs_mask_user = ~value; | ||
1101 | break; | ||
1102 | case GP10B_FUSE_OPT_ECC_EN: | ||
1103 | g->gr.fecs_feature_override_ecc_val = value; | ||
1104 | break; | ||
1105 | default: | ||
1106 | nvgpu_err(g, "ignore unknown fuse override %08x", fuse); | ||
1107 | break; | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | nvgpu_kfree(g, fuses); | ||
1112 | |||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | static int gk20a_probe(struct platform_device *dev) | ||
1117 | { | ||
1118 | struct nvgpu_os_linux *l = NULL; | ||
1119 | struct gk20a *gk20a; | ||
1120 | int err; | ||
1121 | struct gk20a_platform *platform = NULL; | ||
1122 | struct device_node *np; | ||
1123 | |||
1124 | if (dev->dev.of_node) { | ||
1125 | const struct of_device_id *match; | ||
1126 | |||
1127 | match = of_match_device(tegra_gk20a_of_match, &dev->dev); | ||
1128 | if (match) | ||
1129 | platform = (struct gk20a_platform *)match->data; | ||
1130 | } else | ||
1131 | platform = (struct gk20a_platform *)dev->dev.platform_data; | ||
1132 | |||
1133 | if (!platform) { | ||
1134 | dev_err(&dev->dev, "no platform data\n"); | ||
1135 | return -ENODATA; | ||
1136 | } | ||
1137 | |||
1138 | platform_set_drvdata(dev, platform); | ||
1139 | |||
1140 | if (gk20a_gpu_is_virtual(&dev->dev)) | ||
1141 | return vgpu_probe(dev); | ||
1142 | |||
1143 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
1144 | if (!l) { | ||
1145 | dev_err(&dev->dev, "couldn't allocate gk20a support"); | ||
1146 | return -ENOMEM; | ||
1147 | } | ||
1148 | |||
1149 | hash_init(l->ecc_sysfs_stats_htable); | ||
1150 | |||
1151 | gk20a = &l->g; | ||
1152 | |||
1153 | nvgpu_log_fn(gk20a, " "); | ||
1154 | |||
1155 | nvgpu_init_gk20a(gk20a); | ||
1156 | set_gk20a(dev, gk20a); | ||
1157 | l->dev = &dev->dev; | ||
1158 | gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK; | ||
1159 | |||
1160 | nvgpu_kmem_init(gk20a); | ||
1161 | |||
1162 | err = nvgpu_init_enabled_flags(gk20a); | ||
1163 | if (err) | ||
1164 | goto return_err; | ||
1165 | |||
1166 | np = nvgpu_get_node(gk20a); | ||
1167 | if (of_dma_is_coherent(np)) { | ||
1168 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1169 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1170 | } | ||
1171 | |||
1172 | if (nvgpu_platform_is_simulation(gk20a)) | ||
1173 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
1174 | |||
1175 | gk20a->irq_stall = platform_get_irq(dev, 0); | ||
1176 | gk20a->irq_nonstall = platform_get_irq(dev, 1); | ||
1177 | if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) { | ||
1178 | err = -ENXIO; | ||
1179 | goto return_err; | ||
1180 | } | ||
1181 | |||
1182 | err = devm_request_threaded_irq(&dev->dev, | ||
1183 | gk20a->irq_stall, | ||
1184 | gk20a_intr_isr_stall, | ||
1185 | gk20a_intr_thread_stall, | ||
1186 | 0, "gk20a_stall", gk20a); | ||
1187 | if (err) { | ||
1188 | dev_err(&dev->dev, | ||
1189 | "failed to request stall intr irq @ %d\n", | ||
1190 | gk20a->irq_stall); | ||
1191 | goto return_err; | ||
1192 | } | ||
1193 | err = devm_request_irq(&dev->dev, | ||
1194 | gk20a->irq_nonstall, | ||
1195 | gk20a_intr_isr_nonstall, | ||
1196 | 0, "gk20a_nonstall", gk20a); | ||
1197 | if (err) { | ||
1198 | dev_err(&dev->dev, | ||
1199 | "failed to request non-stall intr irq @ %d\n", | ||
1200 | gk20a->irq_nonstall); | ||
1201 | goto return_err; | ||
1202 | } | ||
1203 | disable_irq(gk20a->irq_stall); | ||
1204 | if (gk20a->irq_stall != gk20a->irq_nonstall) | ||
1205 | disable_irq(gk20a->irq_nonstall); | ||
1206 | |||
1207 | err = gk20a_init_support(dev); | ||
1208 | if (err) | ||
1209 | goto return_err; | ||
1210 | |||
1211 | err = nvgpu_read_fuse_overrides(gk20a); | ||
1212 | |||
1213 | #ifdef CONFIG_RESET_CONTROLLER | ||
1214 | platform->reset_control = devm_reset_control_get(&dev->dev, NULL); | ||
1215 | if (IS_ERR(platform->reset_control)) | ||
1216 | platform->reset_control = NULL; | ||
1217 | #endif | ||
1218 | |||
1219 | err = nvgpu_probe(gk20a, "gpu.0", INTERFACE_NAME, &nvgpu_class); | ||
1220 | if (err) | ||
1221 | goto return_err; | ||
1222 | |||
1223 | err = gk20a_pm_init(&dev->dev); | ||
1224 | if (err) { | ||
1225 | dev_err(&dev->dev, "pm init failed"); | ||
1226 | goto return_err; | ||
1227 | } | ||
1228 | |||
1229 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); | ||
1230 | |||
1231 | return 0; | ||
1232 | |||
1233 | return_err: | ||
1234 | nvgpu_free_enabled_flags(gk20a); | ||
1235 | |||
1236 | /* | ||
1237 | * Last since the above allocs may use data structures in here. | ||
1238 | */ | ||
1239 | nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); | ||
1240 | |||
1241 | kfree(l); | ||
1242 | |||
1243 | return err; | ||
1244 | } | ||
1245 | |||
1246 | int nvgpu_remove(struct device *dev, struct class *class) | ||
1247 | { | ||
1248 | struct gk20a *g = get_gk20a(dev); | ||
1249 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
1250 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
1251 | #endif | ||
1252 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
1253 | int err; | ||
1254 | |||
1255 | nvgpu_log_fn(g, " "); | ||
1256 | |||
1257 | err = nvgpu_quiesce(g); | ||
1258 | WARN(err, "gpu failed to idle during driver removal"); | ||
1259 | |||
1260 | if (nvgpu_mem_is_valid(&g->syncpt_mem)) | ||
1261 | nvgpu_dma_free(g, &g->syncpt_mem); | ||
1262 | |||
1263 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
1264 | if (platform->has_cde) | ||
1265 | gk20a_cde_destroy(l); | ||
1266 | #endif | ||
1267 | |||
1268 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
1269 | gk20a_ctxsw_trace_cleanup(g); | ||
1270 | #endif | ||
1271 | |||
1272 | gk20a_sched_ctrl_cleanup(g); | ||
1273 | |||
1274 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
1275 | gk20a_scale_exit(dev); | ||
1276 | |||
1277 | nvgpu_clk_arb_cleanup_arbiter(g); | ||
1278 | |||
1279 | gk20a_user_deinit(dev, class); | ||
1280 | |||
1281 | gk20a_debug_deinit(g); | ||
1282 | |||
1283 | nvgpu_remove_sysfs(dev); | ||
1284 | |||
1285 | if (platform->secure_buffer.destroy) | ||
1286 | platform->secure_buffer.destroy(g, | ||
1287 | &platform->secure_buffer); | ||
1288 | |||
1289 | if (pm_runtime_enabled(dev)) | ||
1290 | pm_runtime_disable(dev); | ||
1291 | |||
1292 | if (platform->remove) | ||
1293 | platform->remove(dev); | ||
1294 | |||
1295 | nvgpu_log_fn(g, "removed"); | ||
1296 | |||
1297 | return err; | ||
1298 | } | ||
1299 | |||
1300 | static int __exit gk20a_remove(struct platform_device *pdev) | ||
1301 | { | ||
1302 | int err; | ||
1303 | struct device *dev = &pdev->dev; | ||
1304 | struct gk20a *g = get_gk20a(dev); | ||
1305 | |||
1306 | if (gk20a_gpu_is_virtual(dev)) | ||
1307 | return vgpu_remove(pdev); | ||
1308 | |||
1309 | err = nvgpu_remove(dev, &nvgpu_class); | ||
1310 | |||
1311 | set_gk20a(pdev, NULL); | ||
1312 | gk20a_put(g); | ||
1313 | |||
1314 | return err; | ||
1315 | } | ||
1316 | |||
1317 | static struct platform_driver gk20a_driver = { | ||
1318 | .probe = gk20a_probe, | ||
1319 | .remove = __exit_p(gk20a_remove), | ||
1320 | .shutdown = gk20a_pm_shutdown, | ||
1321 | .driver = { | ||
1322 | .owner = THIS_MODULE, | ||
1323 | .name = "gk20a", | ||
1324 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | ||
1325 | #ifdef CONFIG_OF | ||
1326 | .of_match_table = tegra_gk20a_of_match, | ||
1327 | #endif | ||
1328 | #ifdef CONFIG_PM | ||
1329 | .pm = &gk20a_pm_ops, | ||
1330 | #endif | ||
1331 | .suppress_bind_attrs = true, | ||
1332 | } | ||
1333 | }; | ||
1334 | |||
1335 | struct class nvgpu_class = { | ||
1336 | .owner = THIS_MODULE, | ||
1337 | .name = CLASS_NAME, | ||
1338 | }; | ||
1339 | |||
1340 | static int __init gk20a_init(void) | ||
1341 | { | ||
1342 | |||
1343 | int ret; | ||
1344 | |||
1345 | ret = class_register(&nvgpu_class); | ||
1346 | if (ret) | ||
1347 | return ret; | ||
1348 | |||
1349 | ret = nvgpu_pci_init(); | ||
1350 | if (ret) | ||
1351 | return ret; | ||
1352 | |||
1353 | return platform_driver_register(&gk20a_driver); | ||
1354 | } | ||
1355 | |||
1356 | static void __exit gk20a_exit(void) | ||
1357 | { | ||
1358 | nvgpu_pci_exit(); | ||
1359 | platform_driver_unregister(&gk20a_driver); | ||
1360 | class_unregister(&nvgpu_class); | ||
1361 | } | ||
1362 | |||
1363 | MODULE_LICENSE("GPL v2"); | ||
1364 | module_init(gk20a_init); | ||
1365 | module_exit(gk20a_exit); | ||
diff --git a/drivers/gpu/nvgpu/os/linux/module.h b/drivers/gpu/nvgpu/os/linux/module.h new file mode 100644 index 00000000..ab4bca03 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | #ifndef __NVGPU_COMMON_LINUX_MODULE_H__ | ||
14 | #define __NVGPU_COMMON_LINUX_MODULE_H__ | ||
15 | |||
16 | struct gk20a; | ||
17 | struct device; | ||
18 | struct nvgpu_os_linux; | ||
19 | |||
20 | int gk20a_pm_finalize_poweron(struct device *dev); | ||
21 | int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l); | ||
22 | void gk20a_remove_support(struct gk20a *g); | ||
23 | void gk20a_driver_start_unload(struct gk20a *g); | ||
24 | int nvgpu_quiesce(struct gk20a *g); | ||
25 | int nvgpu_remove(struct device *dev, struct class *class); | ||
26 | void nvgpu_free_irq(struct gk20a *g); | ||
27 | struct device_node *nvgpu_get_node(struct gk20a *g); | ||
28 | void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i, | ||
29 | struct resource **out); | ||
30 | extern struct class nvgpu_class; | ||
31 | |||
32 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.c b/drivers/gpu/nvgpu/os/linux/module_usermode.c new file mode 100644 index 00000000..ea01c1b2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module_usermode.c | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/types.h> | ||
18 | |||
19 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
20 | |||
21 | #include "os_linux.h" | ||
22 | |||
23 | /* | ||
24 | * Locks out the driver from accessing GPU registers. This prevents access to | ||
25 | * thse registers after the GPU has been clock or power gated. This should help | ||
26 | * find annoying bugs where register reads and writes are silently dropped | ||
27 | * after the GPU has been turned off. On older chips these reads and writes can | ||
28 | * also lock the entire CPU up. | ||
29 | */ | ||
30 | void nvgpu_lockout_usermode_registers(struct gk20a *g) | ||
31 | { | ||
32 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
33 | |||
34 | l->usermode_regs = NULL; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * Undoes t19x_lockout_registers(). | ||
39 | */ | ||
40 | void nvgpu_restore_usermode_registers(struct gk20a *g) | ||
41 | { | ||
42 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
43 | |||
44 | l->usermode_regs = l->usermode_regs_saved; | ||
45 | } | ||
46 | |||
47 | void nvgpu_remove_usermode_support(struct gk20a *g) | ||
48 | { | ||
49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
50 | |||
51 | if (l->usermode_regs) { | ||
52 | l->usermode_regs = NULL; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | void nvgpu_init_usermode_support(struct gk20a *g) | ||
57 | { | ||
58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
59 | |||
60 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
61 | l->usermode_regs_saved = l->usermode_regs; | ||
62 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/module_usermode.h b/drivers/gpu/nvgpu/os/linux/module_usermode.h new file mode 100644 index 00000000..b17053ca --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/module_usermode.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_MODULE_T19X_H__ | ||
18 | #define __NVGPU_MODULE_T19X_H__ | ||
19 | |||
20 | struct gk20a; | ||
21 | |||
22 | void nvgpu_init_usermode_support(struct gk20a *g); | ||
23 | void nvgpu_remove_usermode_support(struct gk20a *g); | ||
24 | void nvgpu_lockout_usermode_registers(struct gk20a *g); | ||
25 | void nvgpu_restore_usermode_registers(struct gk20a *g); | ||
26 | |||
27 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c new file mode 100644 index 00000000..93925803 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c | |||
@@ -0,0 +1,613 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | ||
19 | #include <nvgpu/nvgpu_mem.h> | ||
20 | #include <nvgpu/page_allocator.h> | ||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/bug.h> | ||
23 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/kmem.h> | ||
25 | #include <nvgpu/vidmem.h> | ||
26 | |||
27 | #include <nvgpu/linux/dma.h> | ||
28 | #include <nvgpu/linux/vidmem.h> | ||
29 | |||
30 | #include <linux/vmalloc.h> | ||
31 | |||
32 | #include "os_linux.h" | ||
33 | |||
34 | #include "gk20a/gk20a.h" | ||
35 | #include "gk20a/mm_gk20a.h" | ||
36 | #include "platform_gk20a.h" | ||
37 | |||
38 | static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
39 | { | ||
40 | struct device *dev = dev_from_gk20a(g); | ||
41 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
42 | u64 ipa = sg_phys((struct scatterlist *)sgl); | ||
43 | |||
44 | if (platform->phys_addr) | ||
45 | return platform->phys_addr(g, ipa); | ||
46 | |||
47 | return ipa; | ||
48 | } | ||
49 | |||
50 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | ||
51 | { | ||
52 | void *cpu_va; | ||
53 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? | ||
54 | PAGE_KERNEL : | ||
55 | pgprot_writecombine(PAGE_KERNEL); | ||
56 | |||
57 | if (mem->aperture != APERTURE_SYSMEM) | ||
58 | return 0; | ||
59 | |||
60 | /* | ||
61 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
62 | * for all sysmem buffers. The prot settings are left alone since | ||
63 | * eventually this should be deleted. | ||
64 | */ | ||
65 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
66 | return 0; | ||
67 | |||
68 | /* | ||
69 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | ||
70 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | ||
71 | * another CPU mapping. | ||
72 | */ | ||
73 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
74 | return 0; | ||
75 | |||
76 | if (WARN_ON(mem->cpu_va)) { | ||
77 | nvgpu_warn(g, "nested"); | ||
78 | return -EBUSY; | ||
79 | } | ||
80 | |||
81 | cpu_va = vmap(mem->priv.pages, | ||
82 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
83 | 0, prot); | ||
84 | |||
85 | if (WARN_ON(!cpu_va)) | ||
86 | return -ENOMEM; | ||
87 | |||
88 | mem->cpu_va = cpu_va; | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | ||
93 | { | ||
94 | if (mem->aperture != APERTURE_SYSMEM) | ||
95 | return; | ||
96 | |||
97 | /* | ||
98 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
99 | * during the free in the DMA API. | ||
100 | */ | ||
101 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
102 | return; | ||
103 | |||
104 | /* | ||
105 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | ||
106 | * already made by the DMA API. | ||
107 | */ | ||
108 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | ||
109 | return; | ||
110 | |||
111 | vunmap(mem->cpu_va); | ||
112 | mem->cpu_va = NULL; | ||
113 | } | ||
114 | |||
115 | static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
116 | { | ||
117 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
118 | u32 r = start, *dest_u32 = *arg; | ||
119 | |||
120 | if (!l->regs) { | ||
121 | __gk20a_warn_on_no_regs(); | ||
122 | return; | ||
123 | } | ||
124 | |||
125 | while (words--) { | ||
126 | *dest_u32++ = gk20a_readl(g, r); | ||
127 | r += sizeof(u32); | ||
128 | } | ||
129 | |||
130 | *arg = dest_u32; | ||
131 | } | ||
132 | |||
133 | u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) | ||
134 | { | ||
135 | u32 data = 0; | ||
136 | |||
137 | if (mem->aperture == APERTURE_SYSMEM) { | ||
138 | u32 *ptr = mem->cpu_va; | ||
139 | |||
140 | WARN_ON(!ptr); | ||
141 | data = ptr[w]; | ||
142 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
143 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
144 | #endif | ||
145 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
146 | u32 value; | ||
147 | u32 *p = &value; | ||
148 | |||
149 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
150 | sizeof(u32), pramin_access_batch_rd_n, &p); | ||
151 | |||
152 | data = value; | ||
153 | |||
154 | } else { | ||
155 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
156 | } | ||
157 | |||
158 | return data; | ||
159 | } | ||
160 | |||
161 | u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) | ||
162 | { | ||
163 | WARN_ON(offset & 3); | ||
164 | return nvgpu_mem_rd32(g, mem, offset / sizeof(u32)); | ||
165 | } | ||
166 | |||
167 | void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, | ||
168 | u32 offset, void *dest, u32 size) | ||
169 | { | ||
170 | WARN_ON(offset & 3); | ||
171 | WARN_ON(size & 3); | ||
172 | |||
173 | if (mem->aperture == APERTURE_SYSMEM) { | ||
174 | u8 *src = (u8 *)mem->cpu_va + offset; | ||
175 | |||
176 | WARN_ON(!mem->cpu_va); | ||
177 | memcpy(dest, src, size); | ||
178 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
179 | if (size) | ||
180 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
181 | src, *dest, size); | ||
182 | #endif | ||
183 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
184 | u32 *dest_u32 = dest; | ||
185 | |||
186 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
187 | pramin_access_batch_rd_n, &dest_u32); | ||
188 | } else { | ||
189 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
194 | { | ||
195 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
196 | u32 r = start, *src_u32 = *arg; | ||
197 | |||
198 | if (!l->regs) { | ||
199 | __gk20a_warn_on_no_regs(); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | while (words--) { | ||
204 | writel_relaxed(*src_u32++, l->regs + r); | ||
205 | r += sizeof(u32); | ||
206 | } | ||
207 | |||
208 | *arg = src_u32; | ||
209 | } | ||
210 | |||
211 | void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) | ||
212 | { | ||
213 | if (mem->aperture == APERTURE_SYSMEM) { | ||
214 | u32 *ptr = mem->cpu_va; | ||
215 | |||
216 | WARN_ON(!ptr); | ||
217 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
218 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
219 | #endif | ||
220 | ptr[w] = data; | ||
221 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
222 | u32 value = data; | ||
223 | u32 *p = &value; | ||
224 | |||
225 | nvgpu_pramin_access_batched(g, mem, w * sizeof(u32), | ||
226 | sizeof(u32), pramin_access_batch_wr_n, &p); | ||
227 | if (!mem->skip_wmb) | ||
228 | wmb(); | ||
229 | } else { | ||
230 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) | ||
235 | { | ||
236 | WARN_ON(offset & 3); | ||
237 | nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
238 | } | ||
239 | |||
240 | void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
241 | void *src, u32 size) | ||
242 | { | ||
243 | WARN_ON(offset & 3); | ||
244 | WARN_ON(size & 3); | ||
245 | |||
246 | if (mem->aperture == APERTURE_SYSMEM) { | ||
247 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
248 | |||
249 | WARN_ON(!mem->cpu_va); | ||
250 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
251 | if (size) | ||
252 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
253 | dest, *src, size); | ||
254 | #endif | ||
255 | memcpy(dest, src, size); | ||
256 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
257 | u32 *src_u32 = src; | ||
258 | |||
259 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
260 | pramin_access_batch_wr_n, &src_u32); | ||
261 | if (!mem->skip_wmb) | ||
262 | wmb(); | ||
263 | } else { | ||
264 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg) | ||
269 | { | ||
270 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
271 | u32 r = start, repeat = **arg; | ||
272 | |||
273 | if (!l->regs) { | ||
274 | __gk20a_warn_on_no_regs(); | ||
275 | return; | ||
276 | } | ||
277 | |||
278 | while (words--) { | ||
279 | writel_relaxed(repeat, l->regs + r); | ||
280 | r += sizeof(u32); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | ||
285 | u32 c, u32 size) | ||
286 | { | ||
287 | WARN_ON(offset & 3); | ||
288 | WARN_ON(size & 3); | ||
289 | WARN_ON(c & ~0xff); | ||
290 | |||
291 | c &= 0xff; | ||
292 | |||
293 | if (mem->aperture == APERTURE_SYSMEM) { | ||
294 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
295 | |||
296 | WARN_ON(!mem->cpu_va); | ||
297 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
298 | if (size) | ||
299 | nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]", | ||
300 | dest, c, size); | ||
301 | #endif | ||
302 | memset(dest, c, size); | ||
303 | } else if (mem->aperture == APERTURE_VIDMEM) { | ||
304 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | ||
305 | u32 *p = &repeat_value; | ||
306 | |||
307 | nvgpu_pramin_access_batched(g, mem, offset, size, | ||
308 | pramin_access_batch_set, &p); | ||
309 | if (!mem->skip_wmb) | ||
310 | wmb(); | ||
311 | } else { | ||
312 | WARN_ON("Accessing unallocated nvgpu_mem"); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * Obtain a SYSMEM address from a Linux SGL. This should eventually go away | ||
318 | * and/or become private to this file once all bad usages of Linux SGLs are | ||
319 | * cleaned up in the driver. | ||
320 | */ | ||
321 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | ||
322 | { | ||
323 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || | ||
324 | !nvgpu_iommuable(g)) | ||
325 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
326 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
327 | |||
328 | if (sg_dma_address(sgl) == 0) | ||
329 | return g->ops.mm.gpu_phys_addr(g, NULL, | ||
330 | __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl)); | ||
331 | |||
332 | if (sg_dma_address(sgl) == DMA_ERROR_CODE) | ||
333 | return 0; | ||
334 | |||
335 | return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl)); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Obtain the address the GPU should use from the %mem assuming this is a SYSMEM | ||
340 | * allocation. | ||
341 | */ | ||
342 | static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem) | ||
343 | { | ||
344 | return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl); | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM | ||
349 | * allocation. | ||
350 | * | ||
351 | * Note: this API does not make sense to use for _VIDMEM_ buffers with greater | ||
352 | * than one scatterlist chunk. If there's more than one scatterlist chunk then | ||
353 | * the buffer will not be contiguous. As such the base address probably isn't | ||
354 | * very useful. This is true for SYSMEM as well, if there's no IOMMU. | ||
355 | * | ||
356 | * However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's | ||
357 | * an IOMMU present and enabled for the GPU. | ||
358 | * | ||
359 | * %attrs can be NULL. If it is not NULL then it may be inspected to determine | ||
360 | * if the address needs to be modified before writing into a PTE. | ||
361 | */ | ||
362 | u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
363 | { | ||
364 | struct nvgpu_page_alloc *alloc; | ||
365 | |||
366 | if (mem->aperture == APERTURE_SYSMEM) | ||
367 | return nvgpu_mem_get_addr_sysmem(g, mem); | ||
368 | |||
369 | /* | ||
370 | * Otherwise get the vidmem address. | ||
371 | */ | ||
372 | alloc = mem->vidmem_alloc; | ||
373 | |||
374 | /* This API should not be used with > 1 chunks */ | ||
375 | WARN_ON(alloc->nr_chunks != 1); | ||
376 | |||
377 | return alloc->base; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * This should only be used on contiguous buffers regardless of whether | ||
382 | * there's an IOMMU present/enabled. This applies to both SYSMEM and | ||
383 | * VIDMEM. | ||
384 | */ | ||
385 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) | ||
386 | { | ||
387 | /* | ||
388 | * For a VIDMEM buf, this is identical to simply get_addr() so just fall | ||
389 | * back to that. | ||
390 | */ | ||
391 | if (mem->aperture == APERTURE_VIDMEM) | ||
392 | return nvgpu_mem_get_addr(g, mem); | ||
393 | |||
394 | return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl); | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Be careful how you use this! You are responsible for correctly freeing this | ||
399 | * memory. | ||
400 | */ | ||
401 | int nvgpu_mem_create_from_mem(struct gk20a *g, | ||
402 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | ||
403 | int start_page, int nr_pages) | ||
404 | { | ||
405 | int ret; | ||
406 | u64 start = start_page * PAGE_SIZE; | ||
407 | u64 size = nr_pages * PAGE_SIZE; | ||
408 | dma_addr_t new_iova; | ||
409 | |||
410 | if (src->aperture != APERTURE_SYSMEM) | ||
411 | return -EINVAL; | ||
412 | |||
413 | /* Some silly things a caller might do... */ | ||
414 | if (size > src->size) | ||
415 | return -EINVAL; | ||
416 | if ((start + size) > src->size) | ||
417 | return -EINVAL; | ||
418 | |||
419 | dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; | ||
420 | dest->aperture = src->aperture; | ||
421 | dest->skip_wmb = src->skip_wmb; | ||
422 | dest->size = size; | ||
423 | |||
424 | /* | ||
425 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | ||
426 | * | ||
427 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
428 | * re-use. | ||
429 | */ | ||
430 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || | ||
431 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
432 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | ||
433 | |||
434 | dest->priv.pages = src->priv.pages + start_page; | ||
435 | dest->priv.flags = src->priv.flags; | ||
436 | |||
437 | new_iova = sg_dma_address(src->priv.sgt->sgl) ? | ||
438 | sg_dma_address(src->priv.sgt->sgl) + start : 0; | ||
439 | |||
440 | /* | ||
441 | * Make a new SG table that is based only on the subset of pages that | ||
442 | * is passed to us. This table gets freed by the dma free routines. | ||
443 | */ | ||
444 | if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) | ||
445 | ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt, | ||
446 | src->priv.pages + start_page, | ||
447 | new_iova, size); | ||
448 | else | ||
449 | ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va, | ||
450 | new_iova, size); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | ||
456 | struct page **pages, int nr_pages) | ||
457 | { | ||
458 | struct sg_table *sgt; | ||
459 | struct page **our_pages = | ||
460 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
461 | |||
462 | if (!our_pages) | ||
463 | return -ENOMEM; | ||
464 | |||
465 | memcpy(our_pages, pages, sizeof(struct page *) * nr_pages); | ||
466 | |||
467 | if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, | ||
468 | nr_pages * PAGE_SIZE)) { | ||
469 | nvgpu_kfree(g, our_pages); | ||
470 | return -ENOMEM; | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * If we are making an SGT from physical pages we can be reasonably | ||
475 | * certain that this should bypass the SMMU - thus we set the DMA (aka | ||
476 | * IOVA) address to 0. This tells the GMMU mapping code to not make a | ||
477 | * mapping directed to the SMMU. | ||
478 | */ | ||
479 | sg_dma_address(sgt->sgl) = 0; | ||
480 | |||
481 | dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; | ||
482 | dest->aperture = APERTURE_SYSMEM; | ||
483 | dest->skip_wmb = 0; | ||
484 | dest->size = PAGE_SIZE * nr_pages; | ||
485 | |||
486 | dest->priv.flags = 0; | ||
487 | dest->priv.pages = our_pages; | ||
488 | dest->priv.sgt = sgt; | ||
489 | |||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
494 | int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, | ||
495 | u64 src_phys, int nr_pages) | ||
496 | { | ||
497 | struct page **pages = | ||
498 | nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); | ||
499 | int i, ret = 0; | ||
500 | |||
501 | if (!pages) | ||
502 | return -ENOMEM; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) | ||
505 | pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); | ||
506 | |||
507 | ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); | ||
508 | nvgpu_kfree(g, pages); | ||
509 | |||
510 | return ret; | ||
511 | } | ||
512 | #endif | ||
513 | |||
514 | static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) | ||
515 | { | ||
516 | return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); | ||
517 | } | ||
518 | |||
519 | static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) | ||
520 | { | ||
521 | return (u64)__nvgpu_sgl_phys(g, sgl); | ||
522 | } | ||
523 | |||
524 | static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl) | ||
525 | { | ||
526 | return (u64)sg_dma_address((struct scatterlist *)sgl); | ||
527 | } | ||
528 | |||
529 | static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl) | ||
530 | { | ||
531 | return (u64)((struct scatterlist *)sgl)->length; | ||
532 | } | ||
533 | |||
534 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, | ||
535 | struct nvgpu_sgl *sgl, | ||
536 | struct nvgpu_gmmu_attrs *attrs) | ||
537 | { | ||
538 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
539 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
540 | __nvgpu_sgl_phys(g, sgl)); | ||
541 | |||
542 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
543 | return 0; | ||
544 | |||
545 | return nvgpu_mem_iommu_translate(g, | ||
546 | sg_dma_address((struct scatterlist *)sgl)); | ||
547 | } | ||
548 | |||
549 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
550 | struct nvgpu_sgt *sgt) | ||
551 | { | ||
552 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) | ||
553 | return false; | ||
554 | return true; | ||
555 | } | ||
556 | |||
557 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
558 | { | ||
559 | /* | ||
560 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
561 | * SGT/SGL needs to be freed separately. | ||
562 | */ | ||
563 | nvgpu_kfree(g, sgt); | ||
564 | } | ||
565 | |||
566 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
567 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
568 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
569 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
570 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
571 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
572 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, | ||
573 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
574 | }; | ||
575 | |||
576 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
577 | struct gk20a *g, | ||
578 | struct scatterlist *linux_sgl) | ||
579 | { | ||
580 | struct nvgpu_page_alloc *vidmem_alloc; | ||
581 | |||
582 | vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl); | ||
583 | if (!vidmem_alloc) | ||
584 | return NULL; | ||
585 | |||
586 | return &vidmem_alloc->sgt; | ||
587 | } | ||
588 | |||
589 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) | ||
590 | { | ||
591 | struct nvgpu_sgt *nvgpu_sgt; | ||
592 | struct scatterlist *linux_sgl = sgt->sgl; | ||
593 | |||
594 | if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
595 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); | ||
596 | |||
597 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); | ||
598 | if (!nvgpu_sgt) | ||
599 | return NULL; | ||
600 | |||
601 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); | ||
602 | |||
603 | nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl; | ||
604 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; | ||
605 | |||
606 | return nvgpu_sgt; | ||
607 | } | ||
608 | |||
609 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
610 | struct nvgpu_mem *mem) | ||
611 | { | ||
612 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); | ||
613 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/nvhost.c b/drivers/gpu/nvgpu/os/linux/nvhost.c new file mode 100644 index 00000000..6ab60248 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvhost.c | |||
@@ -0,0 +1,294 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/nvhost.h> | ||
18 | #include <linux/nvhost_t194.h> | ||
19 | #include <linux/nvhost_ioctl.h> | ||
20 | #include <linux/of_platform.h> | ||
21 | |||
22 | #include <nvgpu/nvhost.h> | ||
23 | |||
24 | #include "nvhost_priv.h" | ||
25 | |||
26 | #include "gk20a/gk20a.h" | ||
27 | #include "os_linux.h" | ||
28 | #include "module.h" | ||
29 | |||
30 | int nvgpu_get_nvhost_dev(struct gk20a *g) | ||
31 | { | ||
32 | struct device_node *np = nvgpu_get_node(g); | ||
33 | struct platform_device *host1x_pdev = NULL; | ||
34 | const __be32 *host1x_ptr; | ||
35 | |||
36 | host1x_ptr = of_get_property(np, "nvidia,host1x", NULL); | ||
37 | if (host1x_ptr) { | ||
38 | struct device_node *host1x_node = | ||
39 | of_find_node_by_phandle(be32_to_cpup(host1x_ptr)); | ||
40 | |||
41 | host1x_pdev = of_find_device_by_node(host1x_node); | ||
42 | if (!host1x_pdev) { | ||
43 | nvgpu_warn(g, "host1x device not available"); | ||
44 | return -EPROBE_DEFER; | ||
45 | } | ||
46 | |||
47 | } else { | ||
48 | if (g->has_syncpoints) { | ||
49 | nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support"); | ||
50 | g->has_syncpoints = false; | ||
51 | } | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev)); | ||
56 | if (!g->nvhost_dev) | ||
57 | return -ENOMEM; | ||
58 | |||
59 | g->nvhost_dev->host1x_pdev = host1x_pdev; | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | void nvgpu_free_nvhost_dev(struct gk20a *g) | ||
65 | { | ||
66 | nvgpu_kfree(g, g->nvhost_dev); | ||
67 | } | ||
68 | |||
69 | int nvgpu_nvhost_module_busy_ext( | ||
70 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
71 | { | ||
72 | return nvhost_module_busy_ext(nvhost_dev->host1x_pdev); | ||
73 | } | ||
74 | |||
75 | void nvgpu_nvhost_module_idle_ext( | ||
76 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
77 | { | ||
78 | nvhost_module_idle_ext(nvhost_dev->host1x_pdev); | ||
79 | } | ||
80 | |||
81 | void nvgpu_nvhost_debug_dump_device( | ||
82 | struct nvgpu_nvhost_dev *nvhost_dev) | ||
83 | { | ||
84 | nvhost_debug_dump_device(nvhost_dev->host1x_pdev); | ||
85 | } | ||
86 | |||
87 | const char *nvgpu_nvhost_syncpt_get_name( | ||
88 | struct nvgpu_nvhost_dev *nvhost_dev, int id) | ||
89 | { | ||
90 | return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id); | ||
91 | } | ||
92 | |||
93 | bool nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
94 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
95 | { | ||
96 | return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id); | ||
97 | } | ||
98 | |||
99 | int nvgpu_nvhost_syncpt_is_expired_ext( | ||
100 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
101 | { | ||
102 | return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev, | ||
103 | id, thresh); | ||
104 | } | ||
105 | |||
106 | u32 nvgpu_nvhost_syncpt_incr_max_ext( | ||
107 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs) | ||
108 | { | ||
109 | return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs); | ||
110 | } | ||
111 | |||
112 | int nvgpu_nvhost_intr_register_notifier( | ||
113 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, | ||
114 | void (*callback)(void *, int), void *private_data) | ||
115 | { | ||
116 | return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev, | ||
117 | id, thresh, | ||
118 | callback, private_data); | ||
119 | } | ||
120 | |||
121 | void nvgpu_nvhost_syncpt_set_min_eq_max_ext( | ||
122 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
123 | { | ||
124 | nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id); | ||
125 | } | ||
126 | |||
127 | void nvgpu_nvhost_syncpt_put_ref_ext( | ||
128 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
129 | { | ||
130 | nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id); | ||
131 | } | ||
132 | |||
133 | u32 nvgpu_nvhost_get_syncpt_host_managed( | ||
134 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
135 | u32 param, const char *syncpt_name) | ||
136 | { | ||
137 | return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev, | ||
138 | param, syncpt_name); | ||
139 | } | ||
140 | |||
141 | u32 nvgpu_nvhost_get_syncpt_client_managed( | ||
142 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
143 | const char *syncpt_name) | ||
144 | { | ||
145 | return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev, | ||
146 | syncpt_name); | ||
147 | } | ||
148 | |||
149 | int nvgpu_nvhost_syncpt_wait_timeout_ext( | ||
150 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, | ||
151 | u32 thresh, u32 timeout, u32 *value, struct timespec *ts) | ||
152 | { | ||
153 | return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev, | ||
154 | id, thresh, timeout, value, ts); | ||
155 | } | ||
156 | |||
157 | int nvgpu_nvhost_syncpt_read_ext_check( | ||
158 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val) | ||
159 | { | ||
160 | return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); | ||
161 | } | ||
162 | |||
163 | u32 nvgpu_nvhost_syncpt_read_maxval( | ||
164 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
165 | { | ||
166 | return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); | ||
167 | } | ||
168 | |||
169 | void nvgpu_nvhost_syncpt_set_safe_state( | ||
170 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id) | ||
171 | { | ||
172 | u32 val; | ||
173 | |||
174 | /* | ||
175 | * Add large number of increments to current value | ||
176 | * so that all waiters on this syncpoint are released | ||
177 | * | ||
178 | * We don't expect any case where more than 0x10000 increments | ||
179 | * are pending | ||
180 | */ | ||
181 | val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id); | ||
182 | val += 0x10000; | ||
183 | |||
184 | nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val); | ||
185 | nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val); | ||
186 | } | ||
187 | |||
188 | int nvgpu_nvhost_create_symlink(struct gk20a *g) | ||
189 | { | ||
190 | struct device *dev = dev_from_gk20a(g); | ||
191 | int err = 0; | ||
192 | |||
193 | if (g->nvhost_dev && | ||
194 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
195 | err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
196 | &dev->kobj, | ||
197 | dev_name(dev)); | ||
198 | } | ||
199 | |||
200 | return err; | ||
201 | } | ||
202 | |||
203 | void nvgpu_nvhost_remove_symlink(struct gk20a *g) | ||
204 | { | ||
205 | struct device *dev = dev_from_gk20a(g); | ||
206 | |||
207 | if (g->nvhost_dev && | ||
208 | (dev->parent != &g->nvhost_dev->host1x_pdev->dev)) { | ||
209 | sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj, | ||
210 | dev_name(dev)); | ||
211 | } | ||
212 | } | ||
213 | |||
214 | #ifdef CONFIG_SYNC | ||
215 | u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt) | ||
216 | { | ||
217 | return nvhost_sync_pt_id(pt); | ||
218 | } | ||
219 | |||
220 | u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt) | ||
221 | { | ||
222 | return nvhost_sync_pt_thresh(pt); | ||
223 | } | ||
224 | |||
225 | struct sync_fence *nvgpu_nvhost_sync_fdget(int fd) | ||
226 | { | ||
227 | return nvhost_sync_fdget(fd); | ||
228 | } | ||
229 | |||
230 | int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence) | ||
231 | { | ||
232 | return nvhost_sync_num_pts(fence); | ||
233 | } | ||
234 | |||
235 | struct sync_fence *nvgpu_nvhost_sync_create_fence( | ||
236 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
237 | u32 id, u32 thresh, const char *name) | ||
238 | { | ||
239 | struct nvhost_ctrl_sync_fence_info pt = { | ||
240 | .id = id, | ||
241 | .thresh = thresh, | ||
242 | }; | ||
243 | |||
244 | return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name); | ||
245 | } | ||
246 | #endif /* CONFIG_SYNC */ | ||
247 | |||
248 | #ifdef CONFIG_TEGRA_T19X_GRHOST | ||
249 | int nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
250 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
251 | u64 *base, size_t *size) | ||
252 | { | ||
253 | return nvhost_syncpt_unit_interface_get_aperture( | ||
254 | nvhost_dev->host1x_pdev, (phys_addr_t *)base, size); | ||
255 | } | ||
256 | |||
257 | u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id) | ||
258 | { | ||
259 | return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id); | ||
260 | } | ||
261 | |||
262 | int nvgpu_nvhost_syncpt_init(struct gk20a *g) | ||
263 | { | ||
264 | int err = 0; | ||
265 | |||
266 | if (!g->has_syncpoints) | ||
267 | return -ENOSYS; | ||
268 | |||
269 | err = nvgpu_get_nvhost_dev(g); | ||
270 | if (err) { | ||
271 | nvgpu_err(g, "host1x device not available"); | ||
272 | g->has_syncpoints = false; | ||
273 | return -ENOSYS; | ||
274 | } | ||
275 | |||
276 | err = nvgpu_nvhost_syncpt_unit_interface_get_aperture( | ||
277 | g->nvhost_dev, | ||
278 | &g->syncpt_unit_base, | ||
279 | &g->syncpt_unit_size); | ||
280 | if (err) { | ||
281 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
282 | g->has_syncpoints = false; | ||
283 | return -ENOSYS; | ||
284 | } | ||
285 | |||
286 | g->syncpt_size = | ||
287 | nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
288 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
289 | g->syncpt_unit_base, g->syncpt_unit_size, | ||
290 | g->syncpt_size); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/nvhost_priv.h b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h new file mode 100644 index 00000000..c03390a7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvhost_priv.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_NVHOST_PRIV_H__ | ||
18 | #define __NVGPU_NVHOST_PRIV_H__ | ||
19 | |||
20 | struct nvgpu_nvhost_dev { | ||
21 | struct platform_device *host1x_pdev; | ||
22 | }; | ||
23 | |||
24 | #endif /* __NVGPU_NVHOST_PRIV_H__ */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/nvlink.c b/drivers/gpu/nvgpu/os/linux/nvlink.c new file mode 100644 index 00000000..c93514c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/nvlink.c | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <gk20a/gk20a.h> | ||
18 | #include <nvgpu/nvlink.h> | ||
19 | #include <nvgpu/enabled.h> | ||
20 | #include "module.h" | ||
21 | |||
22 | #ifdef CONFIG_TEGRA_NVLINK | ||
23 | int nvgpu_nvlink_read_dt_props(struct gk20a *g) | ||
24 | { | ||
25 | struct device_node *np; | ||
26 | struct nvlink_device *ndev = g->nvlink.priv; | ||
27 | u32 local_dev_id; | ||
28 | u32 local_link_id; | ||
29 | u32 remote_dev_id; | ||
30 | u32 remote_link_id; | ||
31 | bool is_master; | ||
32 | |||
33 | /* Parse DT */ | ||
34 | np = nvgpu_get_node(g); | ||
35 | if (!np) | ||
36 | goto fail; | ||
37 | |||
38 | np = of_get_child_by_name(np, "nvidia,nvlink"); | ||
39 | if (!np) | ||
40 | goto fail; | ||
41 | |||
42 | np = of_get_child_by_name(np, "endpoint"); | ||
43 | if (!np) | ||
44 | goto fail; | ||
45 | |||
46 | /* Parse DT structure to detect endpoint topology */ | ||
47 | of_property_read_u32(np, "local_dev_id", &local_dev_id); | ||
48 | of_property_read_u32(np, "local_link_id", &local_link_id); | ||
49 | of_property_read_u32(np, "remote_dev_id", &remote_dev_id); | ||
50 | of_property_read_u32(np, "remote_link_id", &remote_link_id); | ||
51 | is_master = of_property_read_bool(np, "is_master"); | ||
52 | |||
53 | /* Check that we are in dGPU mode */ | ||
54 | if (local_dev_id != NVLINK_ENDPT_GV100) { | ||
55 | nvgpu_err(g, "Local nvlink device is not dGPU"); | ||
56 | return -EINVAL; | ||
57 | } | ||
58 | |||
59 | ndev->is_master = is_master; | ||
60 | ndev->device_id = local_dev_id; | ||
61 | ndev->link.link_id = local_link_id; | ||
62 | ndev->link.remote_dev_info.device_id = remote_dev_id; | ||
63 | ndev->link.remote_dev_info.link_id = remote_link_id; | ||
64 | |||
65 | return 0; | ||
66 | |||
67 | fail: | ||
68 | nvgpu_info(g, "nvlink endpoint not found or invaling in DT"); | ||
69 | return -ENODEV; | ||
70 | } | ||
71 | #endif /* CONFIG_TEGRA_NVLINK */ | ||
72 | |||
73 | void nvgpu_mss_nvlink_init_credits(struct gk20a *g) | ||
74 | { | ||
75 | /* MSS_NVLINK_1_BASE */ | ||
76 | void __iomem *soc1 = ioremap(0x01f20010, 4096); | ||
77 | /* MSS_NVLINK_2_BASE */ | ||
78 | void __iomem *soc2 = ioremap(0x01f40010, 4096); | ||
79 | /* MSS_NVLINK_3_BASE */ | ||
80 | void __iomem *soc3 = ioremap(0x01f60010, 4096); | ||
81 | /* MSS_NVLINK_4_BASE */ | ||
82 | void __iomem *soc4 = ioremap(0x01f80010, 4096); | ||
83 | u32 val; | ||
84 | |||
85 | nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits"); | ||
86 | |||
87 | val = readl_relaxed(soc1); | ||
88 | writel_relaxed(val, soc1); | ||
89 | val = readl_relaxed(soc1 + 4); | ||
90 | writel_relaxed(val, soc1 + 4); | ||
91 | |||
92 | val = readl_relaxed(soc2); | ||
93 | writel_relaxed(val, soc2); | ||
94 | val = readl_relaxed(soc2 + 4); | ||
95 | writel_relaxed(val, soc2 + 4); | ||
96 | |||
97 | val = readl_relaxed(soc3); | ||
98 | writel_relaxed(val, soc3); | ||
99 | val = readl_relaxed(soc3 + 4); | ||
100 | writel_relaxed(val, soc3 + 4); | ||
101 | |||
102 | val = readl_relaxed(soc4); | ||
103 | writel_relaxed(val, soc4); | ||
104 | val = readl_relaxed(soc4 + 4); | ||
105 | writel_relaxed(val, soc4 + 4); | ||
106 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android.c b/drivers/gpu/nvgpu/os/linux/os_fence_android.c new file mode 100644 index 00000000..9be8c6c0 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #include <nvgpu/types.h> | ||
17 | #include <nvgpu/os_fence.h> | ||
18 | #include <nvgpu/linux/os_fence_android.h> | ||
19 | |||
20 | #include "gk20a/gk20a.h" | ||
21 | |||
22 | #include "../drivers/staging/android/sync.h" | ||
23 | |||
24 | inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s) | ||
25 | { | ||
26 | struct sync_fence *fence = (struct sync_fence *)s->priv; | ||
27 | return fence; | ||
28 | } | ||
29 | |||
30 | static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out) | ||
31 | { | ||
32 | fence_out->priv = NULL; | ||
33 | fence_out->g = NULL; | ||
34 | fence_out->ops = NULL; | ||
35 | } | ||
36 | |||
37 | void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out, | ||
38 | struct gk20a *g, const struct nvgpu_os_fence_ops *fops, | ||
39 | struct sync_fence *fence) | ||
40 | { | ||
41 | fence_out->g = g; | ||
42 | fence_out->ops = fops; | ||
43 | fence_out->priv = (void *)fence; | ||
44 | } | ||
45 | |||
46 | void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s) | ||
47 | { | ||
48 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
49 | |||
50 | sync_fence_put(fence); | ||
51 | |||
52 | nvgpu_os_fence_clear(s); | ||
53 | } | ||
54 | |||
55 | void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd) | ||
56 | { | ||
57 | struct sync_fence *fence = nvgpu_get_sync_fence(s); | ||
58 | |||
59 | sync_fence_get(fence); | ||
60 | sync_fence_install(fence, fd); | ||
61 | } | ||
62 | |||
63 | int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out, | ||
64 | struct channel_gk20a *c, int fd) | ||
65 | { | ||
66 | int err = -ENOSYS; | ||
67 | |||
68 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
69 | err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd); | ||
70 | #endif | ||
71 | |||
72 | if (err) | ||
73 | err = nvgpu_os_fence_sema_fdget(fence_out, c, fd); | ||
74 | |||
75 | if (err) | ||
76 | nvgpu_err(c->g, "error obtaining fence from fd %d", fd); | ||
77 | |||
78 | return err; | ||
79 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c new file mode 100644 index 00000000..25832417 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/errno.h> | ||
18 | |||
19 | #include <nvgpu/types.h> | ||
20 | #include <nvgpu/os_fence.h> | ||
21 | #include <nvgpu/linux/os_fence_android.h> | ||
22 | #include <nvgpu/semaphore.h> | ||
23 | |||
24 | #include "gk20a/channel_sync_gk20a.h" | ||
25 | #include "gk20a/mm_gk20a.h" | ||
26 | |||
27 | #include "sync_sema_android.h" | ||
28 | |||
29 | #include "../drivers/staging/android/sync.h" | ||
30 | |||
31 | int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
32 | struct priv_cmd_entry *wait_cmd, | ||
33 | struct channel_gk20a *c, | ||
34 | int max_wait_cmds) | ||
35 | { | ||
36 | int err; | ||
37 | int wait_cmd_size; | ||
38 | int num_wait_cmds; | ||
39 | int i; | ||
40 | struct nvgpu_semaphore *sema; | ||
41 | struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); | ||
42 | |||
43 | wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); | ||
44 | |||
45 | num_wait_cmds = sync_fence->num_fences; | ||
46 | if (num_wait_cmds == 0) | ||
47 | return 0; | ||
48 | |||
49 | if (max_wait_cmds && num_wait_cmds > max_wait_cmds) | ||
50 | return -EINVAL; | ||
51 | |||
52 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
53 | wait_cmd_size * num_wait_cmds, | ||
54 | wait_cmd); | ||
55 | if (err) { | ||
56 | nvgpu_err(c->g, "not enough priv cmd buffer space"); | ||
57 | return err; | ||
58 | } | ||
59 | |||
60 | for (i = 0; i < num_wait_cmds; i++) { | ||
61 | struct fence *f = sync_fence->cbs[i].sync_pt; | ||
62 | struct sync_pt *pt = sync_pt_from_fence(f); | ||
63 | |||
64 | sema = gk20a_sync_pt_sema(pt); | ||
65 | gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd, | ||
66 | wait_cmd_size, i); | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static const struct nvgpu_os_fence_ops sema_ops = { | ||
73 | .program_waits = nvgpu_os_fence_sema_wait_gen_cmd, | ||
74 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
75 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
76 | }; | ||
77 | |||
78 | int nvgpu_os_fence_sema_create( | ||
79 | struct nvgpu_os_fence *fence_out, | ||
80 | struct channel_gk20a *c, | ||
81 | struct nvgpu_semaphore *sema) | ||
82 | { | ||
83 | struct sync_fence *fence; | ||
84 | |||
85 | fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x", | ||
86 | nvgpu_semaphore_gpu_ro_va(sema)); | ||
87 | |||
88 | if (!fence) { | ||
89 | nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x", | ||
90 | (u32)nvgpu_semaphore_gpu_ro_va(sema)); | ||
91 | |||
92 | return -ENOMEM; | ||
93 | } | ||
94 | |||
95 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out, | ||
101 | struct channel_gk20a *c, int fd) | ||
102 | { | ||
103 | struct sync_fence *fence = gk20a_sync_fence_fdget(fd); | ||
104 | |||
105 | if (!fence) | ||
106 | return -EINVAL; | ||
107 | |||
108 | nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence); | ||
109 | |||
110 | return 0; | ||
111 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c new file mode 100644 index 00000000..d7a72fcd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/errno.h> | ||
18 | |||
19 | #include <nvgpu/types.h> | ||
20 | #include <nvgpu/os_fence.h> | ||
21 | #include <nvgpu/linux/os_fence_android.h> | ||
22 | #include <nvgpu/nvhost.h> | ||
23 | #include <nvgpu/atomic.h> | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gk20a/channel_gk20a.h" | ||
27 | #include "gk20a/channel_sync_gk20a.h" | ||
28 | #include "gk20a/mm_gk20a.h" | ||
29 | |||
30 | #include "../drivers/staging/android/sync.h" | ||
31 | |||
32 | int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s, | ||
33 | struct priv_cmd_entry *wait_cmd, | ||
34 | struct channel_gk20a *c, | ||
35 | int max_wait_cmds) | ||
36 | { | ||
37 | int err; | ||
38 | int wait_cmd_size; | ||
39 | int num_wait_cmds; | ||
40 | int i; | ||
41 | u32 wait_id; | ||
42 | struct sync_pt *pt; | ||
43 | |||
44 | struct sync_fence *sync_fence = (struct sync_fence *)s->priv; | ||
45 | |||
46 | if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) | ||
47 | return -EINVAL; | ||
48 | |||
49 | /* validate syncpt ids */ | ||
50 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
51 | pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt); | ||
52 | wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
53 | if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext( | ||
54 | c->g->nvhost_dev, wait_id)) { | ||
55 | return -EINVAL; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence); | ||
60 | if (num_wait_cmds == 0) | ||
61 | return 0; | ||
62 | |||
63 | wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); | ||
64 | err = gk20a_channel_alloc_priv_cmdbuf(c, | ||
65 | wait_cmd_size * num_wait_cmds, wait_cmd); | ||
66 | if (err) { | ||
67 | nvgpu_err(c->g, | ||
68 | "not enough priv cmd buffer space"); | ||
69 | return err; | ||
70 | } | ||
71 | |||
72 | for (i = 0; i < sync_fence->num_fences; i++) { | ||
73 | struct fence *f = sync_fence->cbs[i].sync_pt; | ||
74 | struct sync_pt *pt = sync_pt_from_fence(f); | ||
75 | u32 wait_id = nvgpu_nvhost_sync_pt_id(pt); | ||
76 | u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt); | ||
77 | |||
78 | err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value, | ||
79 | wait_cmd, wait_cmd_size, i, true); | ||
80 | } | ||
81 | |||
82 | WARN_ON(i != num_wait_cmds); | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static const struct nvgpu_os_fence_ops syncpt_ops = { | ||
88 | .program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd, | ||
89 | .drop_ref = nvgpu_os_fence_android_drop_ref, | ||
90 | .install_fence = nvgpu_os_fence_android_install_fd, | ||
91 | }; | ||
92 | |||
93 | int nvgpu_os_fence_syncpt_create( | ||
94 | struct nvgpu_os_fence *fence_out, struct channel_gk20a *c, | ||
95 | struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh) | ||
96 | { | ||
97 | struct sync_fence *fence = nvgpu_nvhost_sync_create_fence( | ||
98 | nvhost_dev, id, thresh, "fence"); | ||
99 | |||
100 | if (!fence) { | ||
101 | nvgpu_err(c->g, "error constructing fence %s", "fence"); | ||
102 | return -ENOMEM; | ||
103 | } | ||
104 | |||
105 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
106 | |||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out, | ||
111 | struct channel_gk20a *c, int fd) | ||
112 | { | ||
113 | struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd); | ||
114 | |||
115 | if (!fence) | ||
116 | return -ENOMEM; | ||
117 | |||
118 | nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence); | ||
119 | |||
120 | return 0; | ||
121 | } \ No newline at end of file | ||
diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h new file mode 100644 index 00000000..4dcce322 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef NVGPU_OS_LINUX_H | ||
18 | #define NVGPU_OS_LINUX_H | ||
19 | |||
20 | #include <linux/cdev.h> | ||
21 | #include <linux/iommu.h> | ||
22 | #include <linux/hashtable.h> | ||
23 | |||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "cde.h" | ||
26 | #include "sched.h" | ||
27 | |||
28 | struct nvgpu_os_linux_ops { | ||
29 | struct { | ||
30 | void (*get_program_numbers)(struct gk20a *g, | ||
31 | u32 block_height_log2, | ||
32 | u32 shader_parameter, | ||
33 | int *hprog, int *vprog); | ||
34 | bool (*need_scatter_buffer)(struct gk20a *g); | ||
35 | int (*populate_scatter_buffer)(struct gk20a *g, | ||
36 | struct sg_table *sgt, | ||
37 | size_t surface_size, | ||
38 | void *scatter_buffer_ptr, | ||
39 | size_t scatter_buffer_size); | ||
40 | } cde; | ||
41 | }; | ||
42 | |||
43 | struct nvgpu_os_linux { | ||
44 | struct gk20a g; | ||
45 | struct device *dev; | ||
46 | |||
47 | struct { | ||
48 | struct cdev cdev; | ||
49 | struct device *node; | ||
50 | } channel; | ||
51 | |||
52 | struct { | ||
53 | struct cdev cdev; | ||
54 | struct device *node; | ||
55 | } ctrl; | ||
56 | |||
57 | struct { | ||
58 | struct cdev cdev; | ||
59 | struct device *node; | ||
60 | } as_dev; | ||
61 | |||
62 | struct { | ||
63 | struct cdev cdev; | ||
64 | struct device *node; | ||
65 | } dbg; | ||
66 | |||
67 | struct { | ||
68 | struct cdev cdev; | ||
69 | struct device *node; | ||
70 | } prof; | ||
71 | |||
72 | struct { | ||
73 | struct cdev cdev; | ||
74 | struct device *node; | ||
75 | } tsg; | ||
76 | |||
77 | struct { | ||
78 | struct cdev cdev; | ||
79 | struct device *node; | ||
80 | } ctxsw; | ||
81 | |||
82 | struct { | ||
83 | struct cdev cdev; | ||
84 | struct device *node; | ||
85 | } sched; | ||
86 | |||
87 | dev_t cdev_region; | ||
88 | |||
89 | struct devfreq *devfreq; | ||
90 | |||
91 | struct device_dma_parameters dma_parms; | ||
92 | |||
93 | atomic_t hw_irq_stall_count; | ||
94 | atomic_t hw_irq_nonstall_count; | ||
95 | |||
96 | struct nvgpu_cond sw_irq_stall_last_handled_wq; | ||
97 | atomic_t sw_irq_stall_last_handled; | ||
98 | |||
99 | atomic_t nonstall_ops; | ||
100 | |||
101 | struct nvgpu_cond sw_irq_nonstall_last_handled_wq; | ||
102 | atomic_t sw_irq_nonstall_last_handled; | ||
103 | |||
104 | struct work_struct nonstall_fn_work; | ||
105 | struct workqueue_struct *nonstall_work_queue; | ||
106 | |||
107 | struct resource *reg_mem; | ||
108 | void __iomem *regs; | ||
109 | void __iomem *regs_saved; | ||
110 | |||
111 | struct resource *bar1_mem; | ||
112 | void __iomem *bar1; | ||
113 | void __iomem *bar1_saved; | ||
114 | |||
115 | void __iomem *usermode_regs; | ||
116 | void __iomem *usermode_regs_saved; | ||
117 | |||
118 | struct nvgpu_os_linux_ops ops; | ||
119 | |||
120 | #ifdef CONFIG_DEBUG_FS | ||
121 | struct dentry *debugfs; | ||
122 | struct dentry *debugfs_alias; | ||
123 | |||
124 | struct dentry *debugfs_ltc_enabled; | ||
125 | struct dentry *debugfs_timeouts_enabled; | ||
126 | struct dentry *debugfs_gr_idle_timeout_default; | ||
127 | struct dentry *debugfs_disable_bigpage; | ||
128 | struct dentry *debugfs_gr_default_attrib_cb_size; | ||
129 | |||
130 | struct dentry *debugfs_timeslice_low_priority_us; | ||
131 | struct dentry *debugfs_timeslice_medium_priority_us; | ||
132 | struct dentry *debugfs_timeslice_high_priority_us; | ||
133 | struct dentry *debugfs_runlist_interleave; | ||
134 | struct dentry *debugfs_allocators; | ||
135 | struct dentry *debugfs_xve; | ||
136 | struct dentry *debugfs_kmem; | ||
137 | struct dentry *debugfs_hal; | ||
138 | |||
139 | struct dentry *debugfs_force_preemption_cilp; | ||
140 | struct dentry *debugfs_force_preemption_gfxp; | ||
141 | struct dentry *debugfs_dump_ctxsw_stats; | ||
142 | #endif | ||
143 | DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); | ||
144 | |||
145 | struct gk20a_cde_app cde_app; | ||
146 | |||
147 | struct rw_semaphore busy_lock; | ||
148 | |||
149 | struct gk20a_sched_ctrl sched_ctrl; | ||
150 | |||
151 | bool init_done; | ||
152 | }; | ||
153 | |||
154 | static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) | ||
155 | { | ||
156 | return container_of(g, struct nvgpu_os_linux, g); | ||
157 | } | ||
158 | |||
159 | static inline struct device *dev_from_gk20a(struct gk20a *g) | ||
160 | { | ||
161 | return nvgpu_os_linux_from_gk20a(g)->dev; | ||
162 | } | ||
163 | |||
164 | #define INTERFACE_NAME "nvhost%s-gpu" | ||
165 | |||
166 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/os_sched.c b/drivers/gpu/nvgpu/os/linux/os_sched.c new file mode 100644 index 00000000..586b35eb --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/os_sched.c | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/os_sched.h> | ||
15 | |||
16 | #include <linux/sched.h> | ||
17 | |||
18 | int nvgpu_current_tid(struct gk20a *g) | ||
19 | { | ||
20 | return current->pid; | ||
21 | } | ||
22 | |||
23 | int nvgpu_current_pid(struct gk20a *g) | ||
24 | { | ||
25 | return current->tgid; | ||
26 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c new file mode 100644 index 00000000..1011b441 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci.c | |||
@@ -0,0 +1,861 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/pm_runtime.h> | ||
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
22 | |||
23 | #include <nvgpu/nvhost.h> | ||
24 | #include <nvgpu/nvgpu_common.h> | ||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/enabled.h> | ||
27 | #include <nvgpu/nvlink.h> | ||
28 | #include <nvgpu/soc.h> | ||
29 | |||
30 | #include "gk20a/gk20a.h" | ||
31 | #include "clk/clk.h" | ||
32 | #include "clk/clk_mclk.h" | ||
33 | #include "module.h" | ||
34 | #include "intr.h" | ||
35 | #include "sysfs.h" | ||
36 | #include "os_linux.h" | ||
37 | #include "platform_gk20a.h" | ||
38 | #include <nvgpu/sim.h> | ||
39 | |||
40 | #include "pci.h" | ||
41 | #include "pci_usermode.h" | ||
42 | |||
43 | #include "os_linux.h" | ||
44 | #include "driver_common.h" | ||
45 | |||
46 | #define PCI_INTERFACE_NAME "card-%s%%s" | ||
47 | |||
48 | static int nvgpu_pci_tegra_probe(struct device *dev) | ||
49 | { | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int nvgpu_pci_tegra_remove(struct device *dev) | ||
54 | { | ||
55 | struct gk20a *g = get_gk20a(dev); | ||
56 | |||
57 | if (g->ops.gr.remove_gr_sysfs) | ||
58 | g->ops.gr.remove_gr_sysfs(g); | ||
59 | |||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static bool nvgpu_pci_tegra_is_railgated(struct device *pdev) | ||
64 | { | ||
65 | return false; | ||
66 | } | ||
67 | |||
68 | static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate) | ||
69 | { | ||
70 | long ret = (long)rate; | ||
71 | |||
72 | if (rate == UINT_MAX) | ||
73 | ret = BOOT_GPC2CLK_MHZ * 1000000UL; | ||
74 | |||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | static struct gk20a_platform nvgpu_pci_device[] = { | ||
79 | { /* DEVICE=0x1c35 */ | ||
80 | /* ptimer src frequency in hz */ | ||
81 | .ptimer_src_freq = 31250000, | ||
82 | |||
83 | .probe = nvgpu_pci_tegra_probe, | ||
84 | .remove = nvgpu_pci_tegra_remove, | ||
85 | |||
86 | /* power management configuration */ | ||
87 | .railgate_delay_init = 500, | ||
88 | .can_railgate_init = false, | ||
89 | .can_elpg_init = true, | ||
90 | .enable_elpg = true, | ||
91 | .enable_elcg = false, | ||
92 | .enable_slcg = true, | ||
93 | .enable_blcg = true, | ||
94 | .enable_mscg = true, | ||
95 | .can_slcg = true, | ||
96 | .can_blcg = true, | ||
97 | .can_elcg = true, | ||
98 | |||
99 | .disable_aspm = true, | ||
100 | |||
101 | /* power management callbacks */ | ||
102 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
103 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
104 | |||
105 | .ch_wdt_timeout_ms = 7000, | ||
106 | |||
107 | .honors_aperture = true, | ||
108 | .dma_mask = DMA_BIT_MASK(40), | ||
109 | .vbios_min_version = 0x86063000, | ||
110 | .hardcode_sw_threshold = true, | ||
111 | .ina3221_dcb_index = 0, | ||
112 | .ina3221_i2c_address = 0x84, | ||
113 | .ina3221_i2c_port = 0x2, | ||
114 | }, | ||
115 | { /* DEVICE=0x1c36 */ | ||
116 | /* ptimer src frequency in hz */ | ||
117 | .ptimer_src_freq = 31250000, | ||
118 | |||
119 | .probe = nvgpu_pci_tegra_probe, | ||
120 | .remove = nvgpu_pci_tegra_remove, | ||
121 | |||
122 | /* power management configuration */ | ||
123 | .railgate_delay_init = 500, | ||
124 | .can_railgate_init = false, | ||
125 | .can_elpg_init = true, | ||
126 | .enable_elpg = true, | ||
127 | .enable_elcg = false, | ||
128 | .enable_slcg = true, | ||
129 | .enable_blcg = true, | ||
130 | .enable_mscg = true, | ||
131 | .can_slcg = true, | ||
132 | .can_blcg = true, | ||
133 | .can_elcg = true, | ||
134 | |||
135 | .disable_aspm = true, | ||
136 | |||
137 | /* power management callbacks */ | ||
138 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
139 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
140 | |||
141 | .ch_wdt_timeout_ms = 7000, | ||
142 | |||
143 | .honors_aperture = true, | ||
144 | .dma_mask = DMA_BIT_MASK(40), | ||
145 | .vbios_min_version = 0x86062d00, | ||
146 | .hardcode_sw_threshold = true, | ||
147 | .ina3221_dcb_index = 0, | ||
148 | .ina3221_i2c_address = 0x84, | ||
149 | .ina3221_i2c_port = 0x2, | ||
150 | }, | ||
151 | { /* DEVICE=0x1c37 */ | ||
152 | /* ptimer src frequency in hz */ | ||
153 | .ptimer_src_freq = 31250000, | ||
154 | |||
155 | .probe = nvgpu_pci_tegra_probe, | ||
156 | .remove = nvgpu_pci_tegra_remove, | ||
157 | |||
158 | /* power management configuration */ | ||
159 | .railgate_delay_init = 500, | ||
160 | .can_railgate_init = false, | ||
161 | .can_elpg_init = true, | ||
162 | .enable_elpg = true, | ||
163 | .enable_elcg = false, | ||
164 | .enable_slcg = true, | ||
165 | .enable_blcg = true, | ||
166 | .enable_mscg = true, | ||
167 | .can_slcg = true, | ||
168 | .can_blcg = true, | ||
169 | .can_elcg = true, | ||
170 | |||
171 | .disable_aspm = true, | ||
172 | |||
173 | /* power management callbacks */ | ||
174 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
175 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
176 | |||
177 | .ch_wdt_timeout_ms = 7000, | ||
178 | |||
179 | .honors_aperture = true, | ||
180 | .dma_mask = DMA_BIT_MASK(40), | ||
181 | .vbios_min_version = 0x86063000, | ||
182 | .hardcode_sw_threshold = true, | ||
183 | .ina3221_dcb_index = 0, | ||
184 | .ina3221_i2c_address = 0x84, | ||
185 | .ina3221_i2c_port = 0x2, | ||
186 | }, | ||
187 | { /* DEVICE=0x1c75 */ | ||
188 | /* ptimer src frequency in hz */ | ||
189 | .ptimer_src_freq = 31250000, | ||
190 | |||
191 | .probe = nvgpu_pci_tegra_probe, | ||
192 | .remove = nvgpu_pci_tegra_remove, | ||
193 | |||
194 | /* power management configuration */ | ||
195 | .railgate_delay_init = 500, | ||
196 | .can_railgate_init = false, | ||
197 | .can_elpg_init = true, | ||
198 | .enable_elpg = true, | ||
199 | .enable_elcg = false, | ||
200 | .enable_slcg = true, | ||
201 | .enable_blcg = true, | ||
202 | .enable_mscg = true, | ||
203 | .can_slcg = true, | ||
204 | .can_blcg = true, | ||
205 | .can_elcg = true, | ||
206 | |||
207 | .disable_aspm = true, | ||
208 | |||
209 | /* power management callbacks */ | ||
210 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
211 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
212 | |||
213 | .ch_wdt_timeout_ms = 7000, | ||
214 | |||
215 | .honors_aperture = true, | ||
216 | .dma_mask = DMA_BIT_MASK(40), | ||
217 | .vbios_min_version = 0x86065300, | ||
218 | .hardcode_sw_threshold = false, | ||
219 | .ina3221_dcb_index = 1, | ||
220 | .ina3221_i2c_address = 0x80, | ||
221 | .ina3221_i2c_port = 0x1, | ||
222 | }, | ||
223 | { /* DEVICE=PG503 SKU 201 */ | ||
224 | /* ptimer src frequency in hz */ | ||
225 | .ptimer_src_freq = 31250000, | ||
226 | |||
227 | .probe = nvgpu_pci_tegra_probe, | ||
228 | .remove = nvgpu_pci_tegra_remove, | ||
229 | |||
230 | /* power management configuration */ | ||
231 | .railgate_delay_init = 500, | ||
232 | .can_railgate_init = false, | ||
233 | .can_elpg_init = false, | ||
234 | .enable_elpg = false, | ||
235 | .enable_elcg = false, | ||
236 | .enable_slcg = false, | ||
237 | .enable_blcg = false, | ||
238 | .enable_mscg = false, | ||
239 | .can_slcg = false, | ||
240 | .can_blcg = false, | ||
241 | .can_elcg = false, | ||
242 | |||
243 | .disable_aspm = true, | ||
244 | |||
245 | /* power management callbacks */ | ||
246 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
247 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
248 | |||
249 | .ch_wdt_timeout_ms = 7000, | ||
250 | |||
251 | .honors_aperture = true, | ||
252 | .dma_mask = DMA_BIT_MASK(40), | ||
253 | .vbios_min_version = 0x88001e00, | ||
254 | .hardcode_sw_threshold = false, | ||
255 | .run_preos = true, | ||
256 | }, | ||
257 | { /* DEVICE=PG503 SKU 200 ES */ | ||
258 | /* ptimer src frequency in hz */ | ||
259 | .ptimer_src_freq = 31250000, | ||
260 | |||
261 | .probe = nvgpu_pci_tegra_probe, | ||
262 | .remove = nvgpu_pci_tegra_remove, | ||
263 | |||
264 | /* power management configuration */ | ||
265 | .railgate_delay_init = 500, | ||
266 | .can_railgate_init = false, | ||
267 | .can_elpg_init = false, | ||
268 | .enable_elpg = false, | ||
269 | .enable_elcg = false, | ||
270 | .enable_slcg = false, | ||
271 | .enable_blcg = false, | ||
272 | .enable_mscg = false, | ||
273 | .can_slcg = false, | ||
274 | .can_blcg = false, | ||
275 | .can_elcg = false, | ||
276 | |||
277 | .disable_aspm = true, | ||
278 | |||
279 | /* power management callbacks */ | ||
280 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
281 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
282 | |||
283 | .ch_wdt_timeout_ms = 7000, | ||
284 | |||
285 | .honors_aperture = true, | ||
286 | .dma_mask = DMA_BIT_MASK(40), | ||
287 | .vbios_min_version = 0x88001e00, | ||
288 | .hardcode_sw_threshold = false, | ||
289 | .run_preos = true, | ||
290 | }, | ||
291 | { | ||
292 | /* ptimer src frequency in hz */ | ||
293 | .ptimer_src_freq = 31250000, | ||
294 | |||
295 | .probe = nvgpu_pci_tegra_probe, | ||
296 | .remove = nvgpu_pci_tegra_remove, | ||
297 | |||
298 | /* power management configuration */ | ||
299 | .railgate_delay_init = 500, | ||
300 | .can_railgate_init = false, | ||
301 | .can_elpg_init = false, | ||
302 | .enable_elpg = false, | ||
303 | .enable_elcg = false, | ||
304 | .enable_slcg = false, | ||
305 | .enable_blcg = false, | ||
306 | .enable_mscg = false, | ||
307 | .can_slcg = false, | ||
308 | .can_blcg = false, | ||
309 | .can_elcg = false, | ||
310 | |||
311 | .disable_aspm = true, | ||
312 | |||
313 | /* power management callbacks */ | ||
314 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
315 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
316 | |||
317 | .ch_wdt_timeout_ms = 7000, | ||
318 | |||
319 | .honors_aperture = true, | ||
320 | .dma_mask = DMA_BIT_MASK(40), | ||
321 | .vbios_min_version = 0x88000126, | ||
322 | .hardcode_sw_threshold = false, | ||
323 | .run_preos = true, | ||
324 | .has_syncpoints = true, | ||
325 | }, | ||
326 | { /* SKU250 */ | ||
327 | /* ptimer src frequency in hz */ | ||
328 | .ptimer_src_freq = 31250000, | ||
329 | |||
330 | .probe = nvgpu_pci_tegra_probe, | ||
331 | .remove = nvgpu_pci_tegra_remove, | ||
332 | |||
333 | /* power management configuration */ | ||
334 | .railgate_delay_init = 500, | ||
335 | .can_railgate_init = false, | ||
336 | .can_elpg_init = false, | ||
337 | .enable_elpg = false, | ||
338 | .enable_elcg = true, | ||
339 | .enable_slcg = true, | ||
340 | .enable_blcg = true, | ||
341 | .enable_mscg = false, | ||
342 | .can_slcg = true, | ||
343 | .can_blcg = true, | ||
344 | .can_elcg = true, | ||
345 | |||
346 | .disable_aspm = true, | ||
347 | |||
348 | /* power management callbacks */ | ||
349 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
350 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
351 | |||
352 | .ch_wdt_timeout_ms = 7000, | ||
353 | |||
354 | .honors_aperture = true, | ||
355 | .dma_mask = DMA_BIT_MASK(40), | ||
356 | .vbios_min_version = 0x1, | ||
357 | .hardcode_sw_threshold = false, | ||
358 | .run_preos = true, | ||
359 | .has_syncpoints = true, | ||
360 | }, | ||
361 | { /* SKU 0x1e3f */ | ||
362 | /* ptimer src frequency in hz */ | ||
363 | .ptimer_src_freq = 31250000, | ||
364 | |||
365 | .probe = nvgpu_pci_tegra_probe, | ||
366 | .remove = nvgpu_pci_tegra_remove, | ||
367 | |||
368 | /* power management configuration */ | ||
369 | .railgate_delay_init = 500, | ||
370 | .can_railgate_init = false, | ||
371 | .can_elpg_init = false, | ||
372 | .enable_elpg = false, | ||
373 | .enable_elcg = false, | ||
374 | .enable_slcg = false, | ||
375 | .enable_blcg = false, | ||
376 | .enable_mscg = false, | ||
377 | .can_slcg = false, | ||
378 | .can_blcg = false, | ||
379 | .can_elcg = false, | ||
380 | |||
381 | .disable_aspm = true, | ||
382 | |||
383 | /* power management callbacks */ | ||
384 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
385 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
386 | |||
387 | /* | ||
388 | * WAR: PCIE X1 is very slow, set to very high value till nvlink is up | ||
389 | */ | ||
390 | .ch_wdt_timeout_ms = 30000, | ||
391 | |||
392 | .honors_aperture = true, | ||
393 | .dma_mask = DMA_BIT_MASK(40), | ||
394 | .vbios_min_version = 0x1, | ||
395 | .hardcode_sw_threshold = false, | ||
396 | .unified_memory = false, | ||
397 | }, | ||
398 | { /* 0x1eba */ | ||
399 | /* ptimer src frequency in hz */ | ||
400 | .ptimer_src_freq = 31250000, | ||
401 | |||
402 | .probe = nvgpu_pci_tegra_probe, | ||
403 | .remove = nvgpu_pci_tegra_remove, | ||
404 | |||
405 | /* power management configuration */ | ||
406 | .railgate_delay_init = 500, | ||
407 | .can_railgate_init = false, | ||
408 | .can_elpg_init = false, | ||
409 | .enable_elpg = false, | ||
410 | .enable_elcg = false, | ||
411 | .enable_slcg = false, | ||
412 | .enable_blcg = false, | ||
413 | .enable_mscg = false, | ||
414 | .can_slcg = false, | ||
415 | .can_blcg = false, | ||
416 | .can_elcg = false, | ||
417 | |||
418 | .disable_aspm = true, | ||
419 | |||
420 | /* power management callbacks */ | ||
421 | .is_railgated = nvgpu_pci_tegra_is_railgated, | ||
422 | .clk_round_rate = nvgpu_pci_clk_round_rate, | ||
423 | |||
424 | .ch_wdt_timeout_ms = 7000, | ||
425 | |||
426 | .honors_aperture = true, | ||
427 | .dma_mask = DMA_BIT_MASK(40), | ||
428 | .vbios_min_version = 0x90040109, | ||
429 | .hardcode_sw_threshold = false, | ||
430 | .has_syncpoints = true, | ||
431 | }, | ||
432 | }; | ||
433 | |||
434 | static struct pci_device_id nvgpu_pci_table[] = { | ||
435 | { | ||
436 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35), | ||
437 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
438 | .class_mask = 0xff << 16, | ||
439 | .driver_data = 0, | ||
440 | }, | ||
441 | { | ||
442 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36), | ||
443 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
444 | .class_mask = 0xff << 16, | ||
445 | .driver_data = 1, | ||
446 | }, | ||
447 | { | ||
448 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37), | ||
449 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
450 | .class_mask = 0xff << 16, | ||
451 | .driver_data = 2, | ||
452 | }, | ||
453 | { | ||
454 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75), | ||
455 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
456 | .class_mask = 0xff << 16, | ||
457 | .driver_data = 3, | ||
458 | }, | ||
459 | { | ||
460 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1), | ||
461 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
462 | .class_mask = 0xff << 16, | ||
463 | .driver_data = 4, | ||
464 | }, | ||
465 | { | ||
466 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0), | ||
467 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
468 | .class_mask = 0xff << 16, | ||
469 | .driver_data = 5, | ||
470 | }, | ||
471 | { | ||
472 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe), | ||
473 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
474 | .class_mask = 0xff << 16, | ||
475 | .driver_data = 6, | ||
476 | }, | ||
477 | { | ||
478 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1), | ||
479 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
480 | .class_mask = 0xff << 16, | ||
481 | .driver_data = 7, | ||
482 | }, | ||
483 | { | ||
484 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f), | ||
485 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
486 | .class_mask = 0xff << 16, | ||
487 | .driver_data = 8, | ||
488 | }, | ||
489 | { | ||
490 | PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba), | ||
491 | .class = PCI_BASE_CLASS_DISPLAY << 16, | ||
492 | .class_mask = 0xff << 16, | ||
493 | .driver_data = 9, | ||
494 | }, | ||
495 | {} | ||
496 | }; | ||
497 | |||
498 | static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id) | ||
499 | { | ||
500 | struct gk20a *g = dev_id; | ||
501 | irqreturn_t ret_stall; | ||
502 | irqreturn_t ret_nonstall; | ||
503 | |||
504 | ret_stall = nvgpu_intr_stall(g); | ||
505 | ret_nonstall = nvgpu_intr_nonstall(g); | ||
506 | |||
507 | #if defined(CONFIG_PCI_MSI) | ||
508 | /* Send MSI EOI */ | ||
509 | if (g->ops.xve.rearm_msi && g->msi_enabled) | ||
510 | g->ops.xve.rearm_msi(g); | ||
511 | #endif | ||
512 | |||
513 | return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD; | ||
514 | } | ||
515 | |||
516 | static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id) | ||
517 | { | ||
518 | struct gk20a *g = dev_id; | ||
519 | |||
520 | return nvgpu_intr_thread_stall(g); | ||
521 | } | ||
522 | |||
523 | static int nvgpu_pci_init_support(struct pci_dev *pdev) | ||
524 | { | ||
525 | int err = 0; | ||
526 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
527 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
528 | |||
529 | l->regs = ioremap(pci_resource_start(pdev, 0), | ||
530 | pci_resource_len(pdev, 0)); | ||
531 | if (IS_ERR(l->regs)) { | ||
532 | nvgpu_err(g, "failed to remap gk20a registers"); | ||
533 | err = PTR_ERR(l->regs); | ||
534 | goto fail; | ||
535 | } | ||
536 | |||
537 | l->bar1 = ioremap(pci_resource_start(pdev, 1), | ||
538 | pci_resource_len(pdev, 1)); | ||
539 | if (IS_ERR(l->bar1)) { | ||
540 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
541 | err = PTR_ERR(l->bar1); | ||
542 | goto fail; | ||
543 | } | ||
544 | |||
545 | err = nvgpu_init_sim_support_linux_pci(g); | ||
546 | if (err) | ||
547 | goto fail; | ||
548 | err = nvgpu_init_sim_support_pci(g); | ||
549 | if (err) | ||
550 | goto fail_sim; | ||
551 | |||
552 | nvgpu_pci_init_usermode_support(l); | ||
553 | |||
554 | return 0; | ||
555 | |||
556 | fail_sim: | ||
557 | nvgpu_remove_sim_support_linux_pci(g); | ||
558 | fail: | ||
559 | if (l->regs) { | ||
560 | iounmap(l->regs); | ||
561 | l->regs = NULL; | ||
562 | } | ||
563 | if (l->bar1) { | ||
564 | iounmap(l->bar1); | ||
565 | l->bar1 = NULL; | ||
566 | } | ||
567 | |||
568 | return err; | ||
569 | } | ||
570 | |||
571 | static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) | ||
572 | { | ||
573 | if (mode) | ||
574 | *mode = S_IRUGO | S_IWUGO; | ||
575 | return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); | ||
576 | } | ||
577 | |||
578 | static struct class nvgpu_pci_class = { | ||
579 | .owner = THIS_MODULE, | ||
580 | .name = "nvidia-pci-gpu", | ||
581 | .devnode = nvgpu_pci_devnode, | ||
582 | }; | ||
583 | |||
584 | #ifdef CONFIG_PM | ||
585 | static int nvgpu_pci_pm_runtime_resume(struct device *dev) | ||
586 | { | ||
587 | return gk20a_pm_finalize_poweron(dev); | ||
588 | } | ||
589 | |||
590 | static int nvgpu_pci_pm_runtime_suspend(struct device *dev) | ||
591 | { | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static const struct dev_pm_ops nvgpu_pci_pm_ops = { | ||
596 | .runtime_resume = nvgpu_pci_pm_runtime_resume, | ||
597 | .runtime_suspend = nvgpu_pci_pm_runtime_suspend, | ||
598 | .resume = nvgpu_pci_pm_runtime_resume, | ||
599 | .suspend = nvgpu_pci_pm_runtime_suspend, | ||
600 | }; | ||
601 | #endif | ||
602 | |||
603 | static int nvgpu_pci_pm_init(struct device *dev) | ||
604 | { | ||
605 | #ifdef CONFIG_PM | ||
606 | struct gk20a *g = get_gk20a(dev); | ||
607 | |||
608 | if (!g->can_railgate) { | ||
609 | pm_runtime_disable(dev); | ||
610 | } else { | ||
611 | if (g->railgate_delay) | ||
612 | pm_runtime_set_autosuspend_delay(dev, | ||
613 | g->railgate_delay); | ||
614 | |||
615 | /* | ||
616 | * Runtime PM for PCI devices is disabled by default, | ||
617 | * so we need to enable it first | ||
618 | */ | ||
619 | pm_runtime_use_autosuspend(dev); | ||
620 | pm_runtime_put_noidle(dev); | ||
621 | pm_runtime_allow(dev); | ||
622 | } | ||
623 | #endif | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | static int nvgpu_pci_probe(struct pci_dev *pdev, | ||
628 | const struct pci_device_id *pent) | ||
629 | { | ||
630 | struct gk20a_platform *platform = NULL; | ||
631 | struct nvgpu_os_linux *l; | ||
632 | struct gk20a *g; | ||
633 | int err; | ||
634 | char nodefmt[64]; | ||
635 | struct device_node *np; | ||
636 | |||
637 | /* make sure driver_data is a sane index */ | ||
638 | if (pent->driver_data >= sizeof(nvgpu_pci_device) / | ||
639 | sizeof(nvgpu_pci_device[0])) { | ||
640 | return -EINVAL; | ||
641 | } | ||
642 | |||
643 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
644 | if (!l) { | ||
645 | dev_err(&pdev->dev, "couldn't allocate gk20a support"); | ||
646 | return -ENOMEM; | ||
647 | } | ||
648 | |||
649 | hash_init(l->ecc_sysfs_stats_htable); | ||
650 | |||
651 | g = &l->g; | ||
652 | nvgpu_init_gk20a(g); | ||
653 | |||
654 | nvgpu_kmem_init(g); | ||
655 | |||
656 | /* Allocate memory to hold platform data*/ | ||
657 | platform = (struct gk20a_platform *)nvgpu_kzalloc( g, | ||
658 | sizeof(struct gk20a_platform)); | ||
659 | if (!platform) { | ||
660 | dev_err(&pdev->dev, "couldn't allocate platform data"); | ||
661 | err = -ENOMEM; | ||
662 | goto err_free_l; | ||
663 | } | ||
664 | |||
665 | /* copy detected device data to allocated platform space*/ | ||
666 | memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data], | ||
667 | sizeof(struct gk20a_platform)); | ||
668 | |||
669 | pci_set_drvdata(pdev, platform); | ||
670 | |||
671 | err = nvgpu_init_enabled_flags(g); | ||
672 | if (err) | ||
673 | goto err_free_platform; | ||
674 | |||
675 | platform->g = g; | ||
676 | l->dev = &pdev->dev; | ||
677 | |||
678 | np = nvgpu_get_node(g); | ||
679 | if (of_dma_is_coherent(np)) { | ||
680 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
681 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
682 | } | ||
683 | |||
684 | err = pci_enable_device(pdev); | ||
685 | if (err) | ||
686 | goto err_free_platform; | ||
687 | pci_set_master(pdev); | ||
688 | |||
689 | g->pci_vendor_id = pdev->vendor; | ||
690 | g->pci_device_id = pdev->device; | ||
691 | g->pci_subsystem_vendor_id = pdev->subsystem_vendor; | ||
692 | g->pci_subsystem_device_id = pdev->subsystem_device; | ||
693 | g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub | ||
694 | g->pci_revision = pdev->revision; | ||
695 | |||
696 | g->ina3221_dcb_index = platform->ina3221_dcb_index; | ||
697 | g->ina3221_i2c_address = platform->ina3221_i2c_address; | ||
698 | g->ina3221_i2c_port = platform->ina3221_i2c_port; | ||
699 | g->hardcode_sw_threshold = platform->hardcode_sw_threshold; | ||
700 | |||
701 | #if defined(CONFIG_PCI_MSI) | ||
702 | err = pci_enable_msi(pdev); | ||
703 | if (err) { | ||
704 | nvgpu_err(g, | ||
705 | "MSI could not be enabled, falling back to legacy"); | ||
706 | g->msi_enabled = false; | ||
707 | } else | ||
708 | g->msi_enabled = true; | ||
709 | #endif | ||
710 | |||
711 | g->irq_stall = pdev->irq; | ||
712 | g->irq_nonstall = pdev->irq; | ||
713 | if (g->irq_stall < 0) { | ||
714 | err = -ENXIO; | ||
715 | goto err_disable_msi; | ||
716 | } | ||
717 | |||
718 | err = devm_request_threaded_irq(&pdev->dev, | ||
719 | g->irq_stall, | ||
720 | nvgpu_pci_isr, | ||
721 | nvgpu_pci_intr_thread, | ||
722 | #if defined(CONFIG_PCI_MSI) | ||
723 | g->msi_enabled ? 0 : | ||
724 | #endif | ||
725 | IRQF_SHARED, "nvgpu", g); | ||
726 | if (err) { | ||
727 | nvgpu_err(g, | ||
728 | "failed to request irq @ %d", g->irq_stall); | ||
729 | goto err_disable_msi; | ||
730 | } | ||
731 | disable_irq(g->irq_stall); | ||
732 | |||
733 | err = nvgpu_pci_init_support(pdev); | ||
734 | if (err) | ||
735 | goto err_free_irq; | ||
736 | |||
737 | if (strchr(dev_name(&pdev->dev), '%')) { | ||
738 | nvgpu_err(g, "illegal character in device name"); | ||
739 | err = -EINVAL; | ||
740 | goto err_free_irq; | ||
741 | } | ||
742 | |||
743 | snprintf(nodefmt, sizeof(nodefmt), | ||
744 | PCI_INTERFACE_NAME, dev_name(&pdev->dev)); | ||
745 | |||
746 | err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class); | ||
747 | if (err) | ||
748 | goto err_free_irq; | ||
749 | |||
750 | err = nvgpu_pci_pm_init(&pdev->dev); | ||
751 | if (err) { | ||
752 | nvgpu_err(g, "pm init failed"); | ||
753 | goto err_free_irq; | ||
754 | } | ||
755 | |||
756 | err = nvgpu_nvlink_probe(g); | ||
757 | /* | ||
758 | * ENODEV is a legal error which means there is no NVLINK | ||
759 | * any other error is fatal | ||
760 | */ | ||
761 | if (err) { | ||
762 | if (err != -ENODEV) { | ||
763 | nvgpu_err(g, "fatal error probing nvlink, bailing out"); | ||
764 | goto err_free_irq; | ||
765 | } | ||
766 | /* Enable Semaphore SHIM on nvlink only for now. */ | ||
767 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false); | ||
768 | g->has_syncpoints = false; | ||
769 | } else { | ||
770 | err = nvgpu_nvhost_syncpt_init(g); | ||
771 | if (err) { | ||
772 | if (err != -ENOSYS) { | ||
773 | nvgpu_err(g, "syncpt init failed"); | ||
774 | goto err_free_irq; | ||
775 | } | ||
776 | } | ||
777 | } | ||
778 | |||
779 | g->mm.has_physical_mode = false; | ||
780 | |||
781 | return 0; | ||
782 | |||
783 | err_free_irq: | ||
784 | nvgpu_free_irq(g); | ||
785 | err_disable_msi: | ||
786 | #if defined(CONFIG_PCI_MSI) | ||
787 | if (g->msi_enabled) | ||
788 | pci_disable_msi(pdev); | ||
789 | #endif | ||
790 | err_free_platform: | ||
791 | nvgpu_kfree(g, platform); | ||
792 | err_free_l: | ||
793 | kfree(l); | ||
794 | return err; | ||
795 | } | ||
796 | |||
797 | static void nvgpu_pci_remove(struct pci_dev *pdev) | ||
798 | { | ||
799 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
800 | struct device *dev = dev_from_gk20a(g); | ||
801 | int err; | ||
802 | |||
803 | /* no support yet for unbind if DGPU is in VGPU mode */ | ||
804 | if (gk20a_gpu_is_virtual(dev)) | ||
805 | return; | ||
806 | |||
807 | nvgpu_nvlink_remove(g); | ||
808 | |||
809 | gk20a_driver_start_unload(g); | ||
810 | err = nvgpu_quiesce(g); | ||
811 | /* TODO: handle failure to idle */ | ||
812 | WARN(err, "gpu failed to idle during driver removal"); | ||
813 | |||
814 | nvgpu_free_irq(g); | ||
815 | |||
816 | nvgpu_remove(dev, &nvgpu_pci_class); | ||
817 | |||
818 | #if defined(CONFIG_PCI_MSI) | ||
819 | if (g->msi_enabled) | ||
820 | pci_disable_msi(pdev); | ||
821 | else { | ||
822 | /* IRQ does not need to be enabled in MSI as the line is not | ||
823 | * shared | ||
824 | */ | ||
825 | enable_irq(g->irq_stall); | ||
826 | } | ||
827 | #endif | ||
828 | |||
829 | /* free allocated platform data space */ | ||
830 | nvgpu_kfree(g, gk20a_get_platform(&pdev->dev)); | ||
831 | |||
832 | gk20a_get_platform(&pdev->dev)->g = NULL; | ||
833 | gk20a_put(g); | ||
834 | } | ||
835 | |||
836 | static struct pci_driver nvgpu_pci_driver = { | ||
837 | .name = "nvgpu", | ||
838 | .id_table = nvgpu_pci_table, | ||
839 | .probe = nvgpu_pci_probe, | ||
840 | .remove = nvgpu_pci_remove, | ||
841 | #ifdef CONFIG_PM | ||
842 | .driver.pm = &nvgpu_pci_pm_ops, | ||
843 | #endif | ||
844 | }; | ||
845 | |||
846 | int __init nvgpu_pci_init(void) | ||
847 | { | ||
848 | int ret; | ||
849 | |||
850 | ret = class_register(&nvgpu_pci_class); | ||
851 | if (ret) | ||
852 | return ret; | ||
853 | |||
854 | return pci_register_driver(&nvgpu_pci_driver); | ||
855 | } | ||
856 | |||
857 | void __exit nvgpu_pci_exit(void) | ||
858 | { | ||
859 | pci_unregister_driver(&nvgpu_pci_driver); | ||
860 | class_unregister(&nvgpu_pci_class); | ||
861 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/pci.h b/drivers/gpu/nvgpu/os/linux/pci.h new file mode 100644 index 00000000..cc6b77b1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef NVGPU_PCI_H | ||
17 | #define NVGPU_PCI_H | ||
18 | |||
19 | #ifdef CONFIG_GK20A_PCI | ||
20 | int nvgpu_pci_init(void); | ||
21 | void nvgpu_pci_exit(void); | ||
22 | #else | ||
23 | static inline int nvgpu_pci_init(void) { return 0; } | ||
24 | static inline void nvgpu_pci_exit(void) {} | ||
25 | #endif | ||
26 | |||
27 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.c b/drivers/gpu/nvgpu/os/linux/pci_usermode.c new file mode 100644 index 00000000..270b834b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/types.h> | ||
15 | |||
16 | #include <nvgpu/hw/gv11b/hw_usermode_gv11b.h> | ||
17 | |||
18 | #include "os_linux.h" | ||
19 | |||
20 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l) | ||
21 | { | ||
22 | l->usermode_regs = l->regs + usermode_cfg0_r(); | ||
23 | l->usermode_regs_saved = l->usermode_regs; | ||
24 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/pci_usermode.h b/drivers/gpu/nvgpu/os/linux/pci_usermode.h new file mode 100644 index 00000000..25a08d28 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/pci_usermode.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_PCI_USERMODE_H__ | ||
17 | #define __NVGPU_PCI_USERMODE_H__ | ||
18 | |||
19 | struct nvgpu_os_linux; | ||
20 | |||
21 | void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l); | ||
22 | |||
23 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c new file mode 100644 index 00000000..2a6ace37 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c | |||
@@ -0,0 +1,269 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/hashtable.h> | ||
18 | |||
19 | #include <nvgpu/kmem.h> | ||
20 | #include <nvgpu/bug.h> | ||
21 | #include <nvgpu/hashtable.h> | ||
22 | |||
23 | #include "os_linux.h" | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | |||
27 | #include "platform_gk20a.h" | ||
28 | #include "platform_gk20a_tegra.h" | ||
29 | #include "platform_gp10b.h" | ||
30 | #include "platform_gp10b_tegra.h" | ||
31 | #include "platform_ecc_sysfs.h" | ||
32 | |||
33 | static u32 gen_ecc_hash_key(char *str) | ||
34 | { | ||
35 | int i = 0; | ||
36 | u32 hash_key = 0x811c9dc5; | ||
37 | |||
38 | while (str[i]) { | ||
39 | hash_key *= 0x1000193; | ||
40 | hash_key ^= (u32)(str[i]); | ||
41 | i++; | ||
42 | }; | ||
43 | |||
44 | return hash_key; | ||
45 | } | ||
46 | |||
47 | static ssize_t ecc_stat_show(struct device *dev, | ||
48 | struct device_attribute *attr, | ||
49 | char *buf) | ||
50 | { | ||
51 | const char *ecc_stat_full_name = attr->attr.name; | ||
52 | const char *ecc_stat_base_name; | ||
53 | unsigned int hw_unit; | ||
54 | unsigned int subunit; | ||
55 | struct gk20a_ecc_stat *ecc_stat; | ||
56 | u32 hash_key; | ||
57 | struct gk20a *g = get_gk20a(dev); | ||
58 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
59 | |||
60 | if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit, | ||
61 | &subunit) == 2) { | ||
62 | ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]); | ||
63 | hw_unit = g->gr.slices_per_ltc * hw_unit + subunit; | ||
64 | } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) { | ||
65 | ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]); | ||
66 | } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) { | ||
67 | ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]); | ||
68 | } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) { | ||
69 | ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]); | ||
70 | } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) { | ||
71 | ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]); | ||
72 | } else { | ||
73 | return snprintf(buf, | ||
74 | PAGE_SIZE, | ||
75 | "Error: Invalid ECC stat name!\n"); | ||
76 | } | ||
77 | |||
78 | hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); | ||
79 | |||
80 | hash_for_each_possible(l->ecc_sysfs_stats_htable, | ||
81 | ecc_stat, | ||
82 | hash_node, | ||
83 | hash_key) { | ||
84 | if (hw_unit >= ecc_stat->count) | ||
85 | continue; | ||
86 | if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit])) | ||
87 | return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]); | ||
88 | } | ||
89 | |||
90 | return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); | ||
91 | } | ||
92 | |||
93 | int nvgpu_gr_ecc_stat_create(struct device *dev, | ||
94 | int is_l2, char *ecc_stat_name, | ||
95 | struct gk20a_ecc_stat *ecc_stat) | ||
96 | { | ||
97 | struct gk20a *g = get_gk20a(dev); | ||
98 | char *ltc_unit_name = "ltc"; | ||
99 | char *gr_unit_name = "gpc0_tpc"; | ||
100 | char *lts_unit_name = "lts"; | ||
101 | int num_hw_units = 0; | ||
102 | int num_subunits = 0; | ||
103 | |||
104 | if (is_l2 == 1) | ||
105 | num_hw_units = g->ltc_count; | ||
106 | else if (is_l2 == 2) { | ||
107 | num_hw_units = g->ltc_count; | ||
108 | num_subunits = g->gr.slices_per_ltc; | ||
109 | } else | ||
110 | num_hw_units = g->gr.tpc_count; | ||
111 | |||
112 | |||
113 | return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits, | ||
114 | is_l2 ? ltc_unit_name : gr_unit_name, | ||
115 | num_subunits ? lts_unit_name: NULL, | ||
116 | ecc_stat_name, | ||
117 | ecc_stat); | ||
118 | } | ||
119 | |||
120 | int nvgpu_ecc_stat_create(struct device *dev, | ||
121 | int num_hw_units, int num_subunits, | ||
122 | char *ecc_unit_name, char *ecc_subunit_name, | ||
123 | char *ecc_stat_name, | ||
124 | struct gk20a_ecc_stat *ecc_stat) | ||
125 | { | ||
126 | int error = 0; | ||
127 | struct gk20a *g = get_gk20a(dev); | ||
128 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
129 | int hw_unit = 0; | ||
130 | int subunit = 0; | ||
131 | int element = 0; | ||
132 | u32 hash_key = 0; | ||
133 | struct device_attribute *dev_attr_array; | ||
134 | |||
135 | int num_elements = num_subunits ? num_subunits * num_hw_units : | ||
136 | num_hw_units; | ||
137 | |||
138 | /* Allocate arrays */ | ||
139 | dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) * | ||
140 | num_elements); | ||
141 | ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements); | ||
142 | ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements); | ||
143 | |||
144 | for (hw_unit = 0; hw_unit < num_elements; hw_unit++) { | ||
145 | ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) * | ||
146 | ECC_STAT_NAME_MAX_SIZE); | ||
147 | } | ||
148 | ecc_stat->count = num_elements; | ||
149 | if (num_subunits) { | ||
150 | for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { | ||
151 | for (subunit = 0; subunit < num_subunits; subunit++) { | ||
152 | element = hw_unit*num_subunits + subunit; | ||
153 | |||
154 | snprintf(ecc_stat->names[element], | ||
155 | ECC_STAT_NAME_MAX_SIZE, | ||
156 | "%s%d_%s%d_%s", | ||
157 | ecc_unit_name, | ||
158 | hw_unit, | ||
159 | ecc_subunit_name, | ||
160 | subunit, | ||
161 | ecc_stat_name); | ||
162 | |||
163 | sysfs_attr_init(&dev_attr_array[element].attr); | ||
164 | dev_attr_array[element].attr.name = | ||
165 | ecc_stat->names[element]; | ||
166 | dev_attr_array[element].attr.mode = | ||
167 | VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
168 | dev_attr_array[element].show = ecc_stat_show; | ||
169 | dev_attr_array[element].store = NULL; | ||
170 | |||
171 | /* Create sysfs file */ | ||
172 | error |= device_create_file(dev, | ||
173 | &dev_attr_array[element]); | ||
174 | |||
175 | } | ||
176 | } | ||
177 | } else { | ||
178 | for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { | ||
179 | |||
180 | /* Fill in struct device_attribute members */ | ||
181 | snprintf(ecc_stat->names[hw_unit], | ||
182 | ECC_STAT_NAME_MAX_SIZE, | ||
183 | "%s%d_%s", | ||
184 | ecc_unit_name, | ||
185 | hw_unit, | ||
186 | ecc_stat_name); | ||
187 | |||
188 | sysfs_attr_init(&dev_attr_array[hw_unit].attr); | ||
189 | dev_attr_array[hw_unit].attr.name = | ||
190 | ecc_stat->names[hw_unit]; | ||
191 | dev_attr_array[hw_unit].attr.mode = | ||
192 | VERIFY_OCTAL_PERMISSIONS(S_IRUGO); | ||
193 | dev_attr_array[hw_unit].show = ecc_stat_show; | ||
194 | dev_attr_array[hw_unit].store = NULL; | ||
195 | |||
196 | /* Create sysfs file */ | ||
197 | error |= device_create_file(dev, | ||
198 | &dev_attr_array[hw_unit]); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* Add hash table entry */ | ||
203 | hash_key = gen_ecc_hash_key(ecc_stat_name); | ||
204 | hash_add(l->ecc_sysfs_stats_htable, | ||
205 | &ecc_stat->hash_node, | ||
206 | hash_key); | ||
207 | |||
208 | ecc_stat->attr_array = dev_attr_array; | ||
209 | |||
210 | return error; | ||
211 | } | ||
212 | |||
213 | void nvgpu_gr_ecc_stat_remove(struct device *dev, | ||
214 | int is_l2, struct gk20a_ecc_stat *ecc_stat) | ||
215 | { | ||
216 | struct gk20a *g = get_gk20a(dev); | ||
217 | int num_hw_units = 0; | ||
218 | int num_subunits = 0; | ||
219 | |||
220 | if (is_l2 == 1) | ||
221 | num_hw_units = g->ltc_count; | ||
222 | else if (is_l2 == 2) { | ||
223 | num_hw_units = g->ltc_count; | ||
224 | num_subunits = g->gr.slices_per_ltc; | ||
225 | } else | ||
226 | num_hw_units = g->gr.tpc_count; | ||
227 | |||
228 | nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat); | ||
229 | } | ||
230 | |||
231 | void nvgpu_ecc_stat_remove(struct device *dev, | ||
232 | int num_hw_units, int num_subunits, | ||
233 | struct gk20a_ecc_stat *ecc_stat) | ||
234 | { | ||
235 | struct gk20a *g = get_gk20a(dev); | ||
236 | struct device_attribute *dev_attr_array = ecc_stat->attr_array; | ||
237 | int hw_unit = 0; | ||
238 | int subunit = 0; | ||
239 | int element = 0; | ||
240 | int num_elements = num_subunits ? num_subunits * num_hw_units : | ||
241 | num_hw_units; | ||
242 | |||
243 | /* Remove sysfs files */ | ||
244 | if (num_subunits) { | ||
245 | for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { | ||
246 | for (subunit = 0; subunit < num_subunits; subunit++) { | ||
247 | element = hw_unit * num_subunits + subunit; | ||
248 | |||
249 | device_remove_file(dev, | ||
250 | &dev_attr_array[element]); | ||
251 | } | ||
252 | } | ||
253 | } else { | ||
254 | for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) | ||
255 | device_remove_file(dev, &dev_attr_array[hw_unit]); | ||
256 | } | ||
257 | |||
258 | /* Remove hash table entry */ | ||
259 | hash_del(&ecc_stat->hash_node); | ||
260 | |||
261 | /* Free arrays */ | ||
262 | nvgpu_kfree(g, ecc_stat->counters); | ||
263 | |||
264 | for (hw_unit = 0; hw_unit < num_elements; hw_unit++) | ||
265 | nvgpu_kfree(g, ecc_stat->names[hw_unit]); | ||
266 | |||
267 | nvgpu_kfree(g, ecc_stat->names); | ||
268 | nvgpu_kfree(g, dev_attr_array); | ||
269 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h new file mode 100644 index 00000000..d29f7bd3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef _NVGPU_PLATFORM_SYSFS_H_ | ||
18 | #define _NVGPU_PLATFORM_SYSFS_H_ | ||
19 | |||
20 | #include "gp10b/gr_gp10b.h" | ||
21 | |||
22 | #define ECC_STAT_NAME_MAX_SIZE 100 | ||
23 | |||
24 | int nvgpu_gr_ecc_stat_create(struct device *dev, | ||
25 | int is_l2, char *ecc_stat_name, | ||
26 | struct gk20a_ecc_stat *ecc_stat); | ||
27 | int nvgpu_ecc_stat_create(struct device *dev, | ||
28 | int num_hw_units, int num_subunits, | ||
29 | char *ecc_unit_name, char *ecc_subunit_name, | ||
30 | char *ecc_stat_name, | ||
31 | struct gk20a_ecc_stat *ecc_stat); | ||
32 | void nvgpu_gr_ecc_stat_remove(struct device *dev, | ||
33 | int is_l2, struct gk20a_ecc_stat *ecc_stat); | ||
34 | void nvgpu_ecc_stat_remove(struct device *dev, | ||
35 | int num_hw_units, int num_subunits, | ||
36 | struct gk20a_ecc_stat *ecc_stat); | ||
37 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h new file mode 100644 index 00000000..9a99b7fe --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h | |||
@@ -0,0 +1,317 @@ | |||
1 | /* | ||
2 | * GK20A Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _GK20A_PLATFORM_H_ | ||
17 | #define _GK20A_PLATFORM_H_ | ||
18 | |||
19 | #include <linux/device.h> | ||
20 | |||
21 | #include <nvgpu/lock.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | |||
25 | #define GK20A_CLKS_MAX 4 | ||
26 | |||
27 | struct gk20a; | ||
28 | struct channel_gk20a; | ||
29 | struct gr_ctx_buffer_desc; | ||
30 | struct gk20a_scale_profile; | ||
31 | |||
32 | struct secure_page_buffer { | ||
33 | void (*destroy)(struct gk20a *, struct secure_page_buffer *); | ||
34 | size_t size; | ||
35 | dma_addr_t phys; | ||
36 | size_t used; | ||
37 | }; | ||
38 | |||
39 | struct gk20a_platform { | ||
40 | /* Populated by the gk20a driver before probing the platform. */ | ||
41 | struct gk20a *g; | ||
42 | |||
43 | /* Should be populated at probe. */ | ||
44 | bool can_railgate_init; | ||
45 | |||
46 | /* Should be populated at probe. */ | ||
47 | bool can_elpg_init; | ||
48 | |||
49 | /* Should be populated at probe. */ | ||
50 | bool has_syncpoints; | ||
51 | |||
52 | /* channel limit after which to start aggressive sync destroy */ | ||
53 | unsigned int aggressive_sync_destroy_thresh; | ||
54 | |||
55 | /* flag to set sync destroy aggressiveness */ | ||
56 | bool aggressive_sync_destroy; | ||
57 | |||
58 | /* set if ASPM should be disabled on boot; only makes sense for PCI */ | ||
59 | bool disable_aspm; | ||
60 | |||
61 | /* Set if the platform can unify the small/large address spaces. */ | ||
62 | bool unify_address_spaces; | ||
63 | |||
64 | /* Clock configuration is stored here. Platform probe is responsible | ||
65 | * for filling this data. */ | ||
66 | struct clk *clk[GK20A_CLKS_MAX]; | ||
67 | int num_clks; | ||
68 | int maxmin_clk_id; | ||
69 | |||
70 | #ifdef CONFIG_RESET_CONTROLLER | ||
71 | /* Reset control for device */ | ||
72 | struct reset_control *reset_control; | ||
73 | #endif | ||
74 | |||
75 | /* Delay before rail gated */ | ||
76 | int railgate_delay_init; | ||
77 | |||
78 | /* init value for slowdown factor */ | ||
79 | u8 ldiv_slowdown_factor_init; | ||
80 | |||
81 | /* Second Level Clock Gating: true = enable false = disable */ | ||
82 | bool enable_slcg; | ||
83 | |||
84 | /* Block Level Clock Gating: true = enable flase = disable */ | ||
85 | bool enable_blcg; | ||
86 | |||
87 | /* Engine Level Clock Gating: true = enable flase = disable */ | ||
88 | bool enable_elcg; | ||
89 | |||
90 | /* Should be populated at probe. */ | ||
91 | bool can_slcg; | ||
92 | |||
93 | /* Should be populated at probe. */ | ||
94 | bool can_blcg; | ||
95 | |||
96 | /* Should be populated at probe. */ | ||
97 | bool can_elcg; | ||
98 | |||
99 | /* Engine Level Power Gating: true = enable flase = disable */ | ||
100 | bool enable_elpg; | ||
101 | |||
102 | /* Adaptative ELPG: true = enable flase = disable */ | ||
103 | bool enable_aelpg; | ||
104 | |||
105 | /* PMU Perfmon: true = enable false = disable */ | ||
106 | bool enable_perfmon; | ||
107 | |||
108 | /* Memory System Clock Gating: true = enable flase = disable*/ | ||
109 | bool enable_mscg; | ||
110 | |||
111 | /* Timeout for per-channel watchdog (in mS) */ | ||
112 | u32 ch_wdt_timeout_ms; | ||
113 | |||
114 | /* Disable big page support */ | ||
115 | bool disable_bigpage; | ||
116 | |||
117 | /* | ||
118 | * gk20a_do_idle() API can take GPU either into rail gate or CAR reset | ||
119 | * This flag can be used to force CAR reset case instead of rail gate | ||
120 | */ | ||
121 | bool force_reset_in_do_idle; | ||
122 | |||
123 | /* guest/vm id, needed for IPA to PA transation */ | ||
124 | int vmid; | ||
125 | |||
126 | /* Initialize the platform interface of the gk20a driver. | ||
127 | * | ||
128 | * The platform implementation of this function must | ||
129 | * - set the power and clocks of the gk20a device to a known | ||
130 | * state, and | ||
131 | * - populate the gk20a_platform structure (a pointer to the | ||
132 | * structure can be obtained by calling gk20a_get_platform). | ||
133 | * | ||
134 | * After this function is finished, the driver will initialise | ||
135 | * pm runtime and genpd based on the platform configuration. | ||
136 | */ | ||
137 | int (*probe)(struct device *dev); | ||
138 | |||
139 | /* Second stage initialisation - called once all power management | ||
140 | * initialisations are done. | ||
141 | */ | ||
142 | int (*late_probe)(struct device *dev); | ||
143 | |||
144 | /* Remove device after power management has been done | ||
145 | */ | ||
146 | int (*remove)(struct device *dev); | ||
147 | |||
148 | /* Poweron platform dependencies */ | ||
149 | int (*busy)(struct device *dev); | ||
150 | |||
151 | /* Powerdown platform dependencies */ | ||
152 | void (*idle)(struct device *dev); | ||
153 | |||
154 | /* Preallocated VPR buffer for kernel */ | ||
155 | size_t secure_buffer_size; | ||
156 | struct secure_page_buffer secure_buffer; | ||
157 | |||
158 | /* Device is going to be suspended */ | ||
159 | int (*suspend)(struct device *); | ||
160 | |||
161 | /* Called to turn off the device */ | ||
162 | int (*railgate)(struct device *dev); | ||
163 | |||
164 | /* Called to turn on the device */ | ||
165 | int (*unrailgate)(struct device *dev); | ||
166 | struct nvgpu_mutex railgate_lock; | ||
167 | |||
168 | /* Called to check state of device */ | ||
169 | bool (*is_railgated)(struct device *dev); | ||
170 | |||
171 | /* get supported frequency list */ | ||
172 | int (*get_clk_freqs)(struct device *pdev, | ||
173 | unsigned long **freqs, int *num_freqs); | ||
174 | |||
175 | /* clk related supported functions */ | ||
176 | long (*clk_round_rate)(struct device *dev, | ||
177 | unsigned long rate); | ||
178 | |||
179 | /* Called to register GPCPLL with common clk framework */ | ||
180 | int (*clk_register)(struct gk20a *g); | ||
181 | |||
182 | /* platform specific scale init quirks */ | ||
183 | void (*initscale)(struct device *dev); | ||
184 | |||
185 | /* Postscale callback is called after frequency change */ | ||
186 | void (*postscale)(struct device *dev, | ||
187 | unsigned long freq); | ||
188 | |||
189 | /* Pre callback is called before frequency change */ | ||
190 | void (*prescale)(struct device *dev); | ||
191 | |||
192 | /* Devfreq governor name. If scaling is enabled, we request | ||
193 | * this governor to be used in scaling */ | ||
194 | const char *devfreq_governor; | ||
195 | |||
196 | /* Quality of service notifier callback. If this is set, the scaling | ||
197 | * routines will register a callback to Qos. Each time we receive | ||
198 | * a new value, this callback gets called. */ | ||
199 | int (*qos_notify)(struct notifier_block *nb, | ||
200 | unsigned long n, void *p); | ||
201 | |||
202 | /* Called as part of debug dump. If the gpu gets hung, this function | ||
203 | * is responsible for delivering all necessary debug data of other | ||
204 | * hw units which may interact with the gpu without direct supervision | ||
205 | * of the CPU. | ||
206 | */ | ||
207 | void (*dump_platform_dependencies)(struct device *dev); | ||
208 | |||
209 | /* Defined when SMMU stage-2 is enabled, and we need to use physical | ||
210 | * addresses (not IPA). This is the case for GV100 nvlink in HV+L | ||
211 | * configuration, when dGPU is in pass-through mode. | ||
212 | */ | ||
213 | u64 (*phys_addr)(struct gk20a *g, u64 ipa); | ||
214 | |||
215 | /* Callbacks to assert/deassert GPU reset */ | ||
216 | int (*reset_assert)(struct device *dev); | ||
217 | int (*reset_deassert)(struct device *dev); | ||
218 | struct clk *clk_reset; | ||
219 | struct dvfs_rail *gpu_rail; | ||
220 | |||
221 | bool virtual_dev; | ||
222 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
223 | void *vgpu_priv; | ||
224 | #endif | ||
225 | /* source frequency for ptimer in hz */ | ||
226 | u32 ptimer_src_freq; | ||
227 | |||
228 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
229 | bool has_cde; | ||
230 | #endif | ||
231 | |||
232 | /* soc name for finding firmware files */ | ||
233 | const char *soc_name; | ||
234 | |||
235 | /* false if vidmem aperture actually points to sysmem */ | ||
236 | bool honors_aperture; | ||
237 | /* unified or split memory with separate vidmem? */ | ||
238 | bool unified_memory; | ||
239 | |||
240 | /* | ||
241 | * DMA mask for Linux (both coh and non-coh). If not set defaults to | ||
242 | * 0x3ffffffff (i.e a 34 bit mask). | ||
243 | */ | ||
244 | u64 dma_mask; | ||
245 | |||
246 | /* minimum supported VBIOS version */ | ||
247 | u32 vbios_min_version; | ||
248 | |||
249 | /* true if we run preos microcode on this board */ | ||
250 | bool run_preos; | ||
251 | |||
252 | /* true if we need to program sw threshold for | ||
253 | * power limits | ||
254 | */ | ||
255 | bool hardcode_sw_threshold; | ||
256 | |||
257 | /* i2c device index, port and address for INA3221 */ | ||
258 | u32 ina3221_dcb_index; | ||
259 | u32 ina3221_i2c_address; | ||
260 | u32 ina3221_i2c_port; | ||
261 | |||
262 | /* stream id to use */ | ||
263 | u32 ltc_streamid; | ||
264 | |||
265 | /* scaling rate */ | ||
266 | unsigned long cached_rate; | ||
267 | }; | ||
268 | |||
269 | static inline struct gk20a_platform *gk20a_get_platform( | ||
270 | struct device *dev) | ||
271 | { | ||
272 | return (struct gk20a_platform *)dev_get_drvdata(dev); | ||
273 | } | ||
274 | |||
275 | #ifdef CONFIG_TEGRA_GK20A | ||
276 | extern struct gk20a_platform gm20b_tegra_platform; | ||
277 | extern struct gk20a_platform gp10b_tegra_platform; | ||
278 | extern struct gk20a_platform gv11b_tegra_platform; | ||
279 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
280 | extern struct gk20a_platform vgpu_tegra_platform; | ||
281 | extern struct gk20a_platform gv11b_vgpu_tegra_platform; | ||
282 | #endif | ||
283 | #endif | ||
284 | |||
285 | int gk20a_tegra_busy(struct device *dev); | ||
286 | void gk20a_tegra_idle(struct device *dev); | ||
287 | void gk20a_tegra_debug_dump(struct device *pdev); | ||
288 | |||
289 | static inline struct gk20a *get_gk20a(struct device *dev) | ||
290 | { | ||
291 | return gk20a_get_platform(dev)->g; | ||
292 | } | ||
293 | static inline struct gk20a *gk20a_from_dev(struct device *dev) | ||
294 | { | ||
295 | if (!dev) | ||
296 | return NULL; | ||
297 | |||
298 | return ((struct gk20a_platform *)dev_get_drvdata(dev))->g; | ||
299 | } | ||
300 | static inline bool gk20a_gpu_is_virtual(struct device *dev) | ||
301 | { | ||
302 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
303 | |||
304 | return platform->virtual_dev; | ||
305 | } | ||
306 | |||
307 | static inline int support_gk20a_pmu(struct device *dev) | ||
308 | { | ||
309 | if (IS_ENABLED(CONFIG_GK20A_PMU)) { | ||
310 | /* gPMU is not supported for vgpu */ | ||
311 | return !gk20a_gpu_is_virtual(dev); | ||
312 | } | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c new file mode 100644 index 00000000..af55e5b6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c | |||
@@ -0,0 +1,957 @@ | |||
1 | /* | ||
2 | * GK20A Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/clkdev.h> | ||
17 | #include <linux/of_platform.h> | ||
18 | #include <linux/debugfs.h> | ||
19 | #include <linux/platform_data/tegra_edp.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <uapi/linux/nvgpu.h> | ||
22 | #include <linux/dma-buf.h> | ||
23 | #include <linux/dma-attrs.h> | ||
24 | #include <linux/nvmap.h> | ||
25 | #include <linux/reset.h> | ||
26 | #if defined(CONFIG_TEGRA_DVFS) | ||
27 | #include <linux/tegra_soctherm.h> | ||
28 | #endif | ||
29 | #include <linux/platform/tegra/common.h> | ||
30 | #include <linux/platform/tegra/mc.h> | ||
31 | #include <linux/clk/tegra.h> | ||
32 | #if defined(CONFIG_COMMON_CLK) | ||
33 | #include <soc/tegra/tegra-dvfs.h> | ||
34 | #endif | ||
35 | #ifdef CONFIG_TEGRA_BWMGR | ||
36 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
37 | #endif | ||
38 | |||
39 | #include <linux/platform/tegra/tegra_emc.h> | ||
40 | #include <soc/tegra/chip-id.h> | ||
41 | |||
42 | #include <nvgpu/kmem.h> | ||
43 | #include <nvgpu/bug.h> | ||
44 | #include <nvgpu/enabled.h> | ||
45 | #include <nvgpu/nvhost.h> | ||
46 | |||
47 | #include <nvgpu/linux/dma.h> | ||
48 | |||
49 | #include "gk20a/gk20a.h" | ||
50 | #include "gm20b/clk_gm20b.h" | ||
51 | |||
52 | #include "scale.h" | ||
53 | #include "platform_gk20a.h" | ||
54 | #include "clk.h" | ||
55 | #include "os_linux.h" | ||
56 | |||
57 | #include "../../../arch/arm/mach-tegra/iomap.h" | ||
58 | #include <soc/tegra/pmc.h> | ||
59 | |||
60 | #define TEGRA_GK20A_BW_PER_FREQ 32 | ||
61 | #define TEGRA_GM20B_BW_PER_FREQ 64 | ||
62 | #define TEGRA_DDR3_BW_PER_FREQ 16 | ||
63 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
64 | #define MC_CLIENT_GPU 34 | ||
65 | #define PMC_GPU_RG_CNTRL_0 0x2d4 | ||
66 | |||
67 | #ifdef CONFIG_COMMON_CLK | ||
68 | #define GPU_RAIL_NAME "vdd-gpu" | ||
69 | #else | ||
70 | #define GPU_RAIL_NAME "vdd_gpu" | ||
71 | #endif | ||
72 | |||
73 | extern struct device tegra_vpr_dev; | ||
74 | |||
75 | #ifdef CONFIG_TEGRA_BWMGR | ||
76 | struct gk20a_emc_params { | ||
77 | unsigned long bw_ratio; | ||
78 | unsigned long freq_last_set; | ||
79 | struct tegra_bwmgr_client *bwmgr_cl; | ||
80 | }; | ||
81 | #else | ||
82 | struct gk20a_emc_params { | ||
83 | unsigned long bw_ratio; | ||
84 | unsigned long freq_last_set; | ||
85 | }; | ||
86 | #endif | ||
87 | |||
88 | #define MHZ_TO_HZ(x) ((x) * 1000000) | ||
89 | #define HZ_TO_MHZ(x) ((x) / 1000000) | ||
90 | |||
91 | static void gk20a_tegra_secure_page_destroy(struct gk20a *g, | ||
92 | struct secure_page_buffer *secure_buffer) | ||
93 | { | ||
94 | DEFINE_DMA_ATTRS(attrs); | ||
95 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
96 | dma_free_attrs(&tegra_vpr_dev, secure_buffer->size, | ||
97 | (void *)(uintptr_t)secure_buffer->phys, | ||
98 | secure_buffer->phys, __DMA_ATTR(attrs)); | ||
99 | |||
100 | secure_buffer->destroy = NULL; | ||
101 | } | ||
102 | |||
103 | static int gk20a_tegra_secure_alloc(struct gk20a *g, | ||
104 | struct gr_ctx_buffer_desc *desc, | ||
105 | size_t size) | ||
106 | { | ||
107 | struct device *dev = dev_from_gk20a(g); | ||
108 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
109 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
110 | dma_addr_t phys; | ||
111 | struct sg_table *sgt; | ||
112 | struct page *page; | ||
113 | int err = 0; | ||
114 | size_t aligned_size = PAGE_ALIGN(size); | ||
115 | |||
116 | if (nvgpu_mem_is_valid(&desc->mem)) | ||
117 | return 0; | ||
118 | |||
119 | /* We ran out of preallocated memory */ | ||
120 | if (secure_buffer->used + aligned_size > secure_buffer->size) { | ||
121 | nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used", | ||
122 | size, secure_buffer->used, secure_buffer->size); | ||
123 | return -ENOMEM; | ||
124 | } | ||
125 | |||
126 | phys = secure_buffer->phys + secure_buffer->used; | ||
127 | |||
128 | sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt)); | ||
129 | if (!sgt) { | ||
130 | nvgpu_err(platform->g, "failed to allocate memory"); | ||
131 | return -ENOMEM; | ||
132 | } | ||
133 | err = sg_alloc_table(sgt, 1, GFP_KERNEL); | ||
134 | if (err) { | ||
135 | nvgpu_err(platform->g, "failed to allocate sg_table"); | ||
136 | goto fail_sgt; | ||
137 | } | ||
138 | page = phys_to_page(phys); | ||
139 | sg_set_page(sgt->sgl, page, size, 0); | ||
140 | /* This bypasses SMMU for VPR during gmmu_map. */ | ||
141 | sg_dma_address(sgt->sgl) = 0; | ||
142 | |||
143 | desc->destroy = NULL; | ||
144 | |||
145 | desc->mem.priv.sgt = sgt; | ||
146 | desc->mem.size = size; | ||
147 | desc->mem.aperture = APERTURE_SYSMEM; | ||
148 | |||
149 | secure_buffer->used += aligned_size; | ||
150 | |||
151 | return err; | ||
152 | |||
153 | fail_sgt: | ||
154 | nvgpu_kfree(platform->g, sgt); | ||
155 | return err; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * gk20a_tegra_get_emc_rate() | ||
160 | * | ||
161 | * This function returns the minimum emc clock based on gpu frequency | ||
162 | */ | ||
163 | |||
164 | static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, | ||
165 | struct gk20a_emc_params *emc_params) | ||
166 | { | ||
167 | unsigned long gpu_freq, gpu_fmax_at_vmin; | ||
168 | unsigned long emc_rate, emc_scale; | ||
169 | |||
170 | gpu_freq = clk_get_rate(g->clk.tegra_clk); | ||
171 | gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t( | ||
172 | clk_get_parent(g->clk.tegra_clk)); | ||
173 | |||
174 | /* When scaling emc, account for the gpu load when the | ||
175 | * gpu frequency is less than or equal to fmax@vmin. */ | ||
176 | if (gpu_freq <= gpu_fmax_at_vmin) | ||
177 | emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); | ||
178 | else | ||
179 | emc_scale = g->emc3d_ratio; | ||
180 | |||
181 | emc_rate = | ||
182 | (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000; | ||
183 | |||
184 | return MHZ_TO_HZ(emc_rate); | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * gk20a_tegra_prescale(profile, freq) | ||
189 | * | ||
190 | * This function informs EDP about changed constraints. | ||
191 | */ | ||
192 | |||
193 | static void gk20a_tegra_prescale(struct device *dev) | ||
194 | { | ||
195 | struct gk20a *g = get_gk20a(dev); | ||
196 | u32 avg = 0; | ||
197 | |||
198 | nvgpu_pmu_load_norm(g, &avg); | ||
199 | tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk)); | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * gk20a_tegra_calibrate_emc() | ||
204 | * | ||
205 | */ | ||
206 | |||
207 | static void gk20a_tegra_calibrate_emc(struct device *dev, | ||
208 | struct gk20a_emc_params *emc_params) | ||
209 | { | ||
210 | enum tegra_chipid cid = tegra_get_chip_id(); | ||
211 | long gpu_bw, emc_bw; | ||
212 | |||
213 | /* store gpu bw based on soc */ | ||
214 | switch (cid) { | ||
215 | case TEGRA210: | ||
216 | gpu_bw = TEGRA_GM20B_BW_PER_FREQ; | ||
217 | break; | ||
218 | case TEGRA124: | ||
219 | case TEGRA132: | ||
220 | gpu_bw = TEGRA_GK20A_BW_PER_FREQ; | ||
221 | break; | ||
222 | default: | ||
223 | gpu_bw = 0; | ||
224 | break; | ||
225 | } | ||
226 | |||
227 | /* TODO detect DDR type. | ||
228 | * Okay for now since DDR3 and DDR4 have the same BW ratio */ | ||
229 | emc_bw = TEGRA_DDR3_BW_PER_FREQ; | ||
230 | |||
231 | /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq | ||
232 | * NOTE the ratio must come out as an integer */ | ||
233 | emc_params->bw_ratio = (gpu_bw / emc_bw); | ||
234 | } | ||
235 | |||
236 | #ifdef CONFIG_TEGRA_BWMGR | ||
237 | #ifdef CONFIG_TEGRA_DVFS | ||
238 | static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb) | ||
239 | { | ||
240 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
241 | struct gk20a_emc_params *params; | ||
242 | unsigned long rate; | ||
243 | |||
244 | if (!profile || !profile->private_data) | ||
245 | return; | ||
246 | |||
247 | params = (struct gk20a_emc_params *)profile->private_data; | ||
248 | rate = (enb) ? params->freq_last_set : 0; | ||
249 | tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
250 | } | ||
251 | #endif | ||
252 | |||
253 | static void gm20b_tegra_postscale(struct device *dev, unsigned long freq) | ||
254 | { | ||
255 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
256 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
257 | struct gk20a_emc_params *emc_params; | ||
258 | unsigned long emc_rate; | ||
259 | |||
260 | if (!profile || !profile->private_data) | ||
261 | return; | ||
262 | |||
263 | emc_params = profile->private_data; | ||
264 | emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params); | ||
265 | |||
266 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
267 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
268 | |||
269 | emc_params->freq_last_set = emc_rate; | ||
270 | if (platform->is_railgated && platform->is_railgated(dev)) | ||
271 | return; | ||
272 | |||
273 | tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate, | ||
274 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
275 | |||
276 | } | ||
277 | |||
278 | #endif | ||
279 | |||
280 | #if defined(CONFIG_TEGRA_DVFS) | ||
281 | /* | ||
282 | * gk20a_tegra_is_railgated() | ||
283 | * | ||
284 | * Check status of gk20a power rail | ||
285 | */ | ||
286 | |||
287 | static bool gk20a_tegra_is_railgated(struct device *dev) | ||
288 | { | ||
289 | struct gk20a *g = get_gk20a(dev); | ||
290 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
291 | bool ret = false; | ||
292 | |||
293 | if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
294 | ret = !tegra_dvfs_is_rail_up(platform->gpu_rail); | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * gm20b_tegra_railgate() | ||
301 | * | ||
302 | * Gate (disable) gm20b power rail | ||
303 | */ | ||
304 | |||
305 | static int gm20b_tegra_railgate(struct device *dev) | ||
306 | { | ||
307 | struct gk20a *g = get_gk20a(dev); | ||
308 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
309 | int ret = 0; | ||
310 | |||
311 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) || | ||
312 | !tegra_dvfs_is_rail_up(platform->gpu_rail)) | ||
313 | return 0; | ||
314 | |||
315 | tegra_mc_flush(MC_CLIENT_GPU); | ||
316 | |||
317 | udelay(10); | ||
318 | |||
319 | /* enable clamp */ | ||
320 | tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0); | ||
321 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
322 | |||
323 | udelay(10); | ||
324 | |||
325 | platform->reset_assert(dev); | ||
326 | |||
327 | udelay(10); | ||
328 | |||
329 | /* | ||
330 | * GPCPLL is already disabled before entering this function; reference | ||
331 | * clocks are enabled until now - disable them just before rail gating | ||
332 | */ | ||
333 | clk_disable_unprepare(platform->clk_reset); | ||
334 | clk_disable_unprepare(platform->clk[0]); | ||
335 | clk_disable_unprepare(platform->clk[1]); | ||
336 | if (platform->clk[3]) | ||
337 | clk_disable_unprepare(platform->clk[3]); | ||
338 | |||
339 | udelay(10); | ||
340 | |||
341 | tegra_soctherm_gpu_tsens_invalidate(1); | ||
342 | |||
343 | if (tegra_dvfs_is_rail_up(platform->gpu_rail)) { | ||
344 | ret = tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
345 | if (ret) | ||
346 | goto err_power_off; | ||
347 | } else | ||
348 | pr_info("No GPU regulator?\n"); | ||
349 | |||
350 | #ifdef CONFIG_TEGRA_BWMGR | ||
351 | gm20b_bwmgr_set_rate(platform, false); | ||
352 | #endif | ||
353 | |||
354 | return 0; | ||
355 | |||
356 | err_power_off: | ||
357 | nvgpu_err(platform->g, "Could not railgate GPU"); | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | |||
362 | /* | ||
363 | * gm20b_tegra_unrailgate() | ||
364 | * | ||
365 | * Ungate (enable) gm20b power rail | ||
366 | */ | ||
367 | |||
368 | static int gm20b_tegra_unrailgate(struct device *dev) | ||
369 | { | ||
370 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
371 | struct gk20a *g = platform->g; | ||
372 | int ret = 0; | ||
373 | bool first = false; | ||
374 | |||
375 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
376 | return 0; | ||
377 | |||
378 | ret = tegra_dvfs_rail_power_up(platform->gpu_rail); | ||
379 | if (ret) | ||
380 | return ret; | ||
381 | |||
382 | #ifdef CONFIG_TEGRA_BWMGR | ||
383 | gm20b_bwmgr_set_rate(platform, true); | ||
384 | #endif | ||
385 | |||
386 | tegra_soctherm_gpu_tsens_invalidate(0); | ||
387 | |||
388 | if (!platform->clk_reset) { | ||
389 | platform->clk_reset = clk_get(dev, "gpu_gate"); | ||
390 | if (IS_ERR(platform->clk_reset)) { | ||
391 | nvgpu_err(g, "fail to get gpu reset clk"); | ||
392 | goto err_clk_on; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | if (!first) { | ||
397 | ret = clk_prepare_enable(platform->clk_reset); | ||
398 | if (ret) { | ||
399 | nvgpu_err(g, "could not turn on gpu_gate"); | ||
400 | goto err_clk_on; | ||
401 | } | ||
402 | |||
403 | ret = clk_prepare_enable(platform->clk[0]); | ||
404 | if (ret) { | ||
405 | nvgpu_err(g, "could not turn on gpu pll"); | ||
406 | goto err_clk_on; | ||
407 | } | ||
408 | ret = clk_prepare_enable(platform->clk[1]); | ||
409 | if (ret) { | ||
410 | nvgpu_err(g, "could not turn on pwr clock"); | ||
411 | goto err_clk_on; | ||
412 | } | ||
413 | |||
414 | if (platform->clk[3]) { | ||
415 | ret = clk_prepare_enable(platform->clk[3]); | ||
416 | if (ret) { | ||
417 | nvgpu_err(g, "could not turn on fuse clock"); | ||
418 | goto err_clk_on; | ||
419 | } | ||
420 | } | ||
421 | } | ||
422 | |||
423 | udelay(10); | ||
424 | |||
425 | platform->reset_assert(dev); | ||
426 | |||
427 | udelay(10); | ||
428 | |||
429 | tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0); | ||
430 | tegra_pmc_readl(PMC_GPU_RG_CNTRL_0); | ||
431 | |||
432 | udelay(10); | ||
433 | |||
434 | clk_disable(platform->clk_reset); | ||
435 | platform->reset_deassert(dev); | ||
436 | clk_enable(platform->clk_reset); | ||
437 | |||
438 | /* Flush MC after boot/railgate/SC7 */ | ||
439 | tegra_mc_flush(MC_CLIENT_GPU); | ||
440 | |||
441 | udelay(10); | ||
442 | |||
443 | tegra_mc_flush_done(MC_CLIENT_GPU); | ||
444 | |||
445 | udelay(10); | ||
446 | |||
447 | return 0; | ||
448 | |||
449 | err_clk_on: | ||
450 | tegra_dvfs_rail_power_down(platform->gpu_rail); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | #endif | ||
455 | |||
456 | |||
457 | static struct { | ||
458 | char *name; | ||
459 | unsigned long default_rate; | ||
460 | } tegra_gk20a_clocks[] = { | ||
461 | {"gpu_ref", UINT_MAX}, | ||
462 | {"pll_p_out5", 204000000}, | ||
463 | {"emc", UINT_MAX}, | ||
464 | {"fuse", UINT_MAX}, | ||
465 | }; | ||
466 | |||
467 | |||
468 | |||
469 | /* | ||
470 | * gk20a_tegra_get_clocks() | ||
471 | * | ||
472 | * This function finds clocks in tegra platform and populates | ||
473 | * the clock information to gk20a platform data. | ||
474 | */ | ||
475 | |||
476 | static int gk20a_tegra_get_clocks(struct device *dev) | ||
477 | { | ||
478 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
479 | char devname[16]; | ||
480 | unsigned int i; | ||
481 | int ret = 0; | ||
482 | |||
483 | BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks)); | ||
484 | |||
485 | snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev)); | ||
486 | |||
487 | platform->num_clks = 0; | ||
488 | for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) { | ||
489 | long rate = tegra_gk20a_clocks[i].default_rate; | ||
490 | struct clk *c; | ||
491 | |||
492 | c = clk_get_sys(devname, tegra_gk20a_clocks[i].name); | ||
493 | if (IS_ERR(c)) { | ||
494 | ret = PTR_ERR(c); | ||
495 | goto err_get_clock; | ||
496 | } | ||
497 | rate = clk_round_rate(c, rate); | ||
498 | clk_set_rate(c, rate); | ||
499 | platform->clk[i] = c; | ||
500 | if (i == 0) | ||
501 | platform->cached_rate = rate; | ||
502 | } | ||
503 | platform->num_clks = i; | ||
504 | |||
505 | return 0; | ||
506 | |||
507 | err_get_clock: | ||
508 | |||
509 | while (i--) | ||
510 | clk_put(platform->clk[i]); | ||
511 | return ret; | ||
512 | } | ||
513 | |||
514 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
515 | static int gm20b_tegra_reset_assert(struct device *dev) | ||
516 | { | ||
517 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
518 | |||
519 | if (!platform->reset_control) { | ||
520 | WARN(1, "Reset control not initialized\n"); | ||
521 | return -ENOSYS; | ||
522 | } | ||
523 | |||
524 | return reset_control_assert(platform->reset_control); | ||
525 | } | ||
526 | |||
527 | static int gm20b_tegra_reset_deassert(struct device *dev) | ||
528 | { | ||
529 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
530 | |||
531 | if (!platform->reset_control) { | ||
532 | WARN(1, "Reset control not initialized\n"); | ||
533 | return -ENOSYS; | ||
534 | } | ||
535 | |||
536 | return reset_control_deassert(platform->reset_control); | ||
537 | } | ||
538 | #endif | ||
539 | |||
540 | static void gk20a_tegra_scale_init(struct device *dev) | ||
541 | { | ||
542 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
543 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
544 | struct gk20a_emc_params *emc_params; | ||
545 | struct gk20a *g = platform->g; | ||
546 | |||
547 | if (!profile) | ||
548 | return; | ||
549 | |||
550 | if (profile->private_data) | ||
551 | return; | ||
552 | |||
553 | emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params)); | ||
554 | if (!emc_params) | ||
555 | return; | ||
556 | |||
557 | emc_params->freq_last_set = -1; | ||
558 | gk20a_tegra_calibrate_emc(dev, emc_params); | ||
559 | |||
560 | #ifdef CONFIG_TEGRA_BWMGR | ||
561 | emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
562 | if (!emc_params->bwmgr_cl) { | ||
563 | nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__); | ||
564 | return; | ||
565 | } | ||
566 | #endif | ||
567 | |||
568 | profile->private_data = emc_params; | ||
569 | } | ||
570 | |||
571 | static void gk20a_tegra_scale_exit(struct device *dev) | ||
572 | { | ||
573 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
574 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
575 | struct gk20a_emc_params *emc_params; | ||
576 | |||
577 | if (!profile) | ||
578 | return; | ||
579 | |||
580 | emc_params = profile->private_data; | ||
581 | #ifdef CONFIG_TEGRA_BWMGR | ||
582 | tegra_bwmgr_unregister(emc_params->bwmgr_cl); | ||
583 | #endif | ||
584 | |||
585 | nvgpu_kfree(platform->g, profile->private_data); | ||
586 | } | ||
587 | |||
588 | void gk20a_tegra_debug_dump(struct device *dev) | ||
589 | { | ||
590 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
591 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
592 | struct gk20a *g = platform->g; | ||
593 | |||
594 | if (g->nvhost_dev) | ||
595 | nvgpu_nvhost_debug_dump_device(g->nvhost_dev); | ||
596 | #endif | ||
597 | } | ||
598 | |||
599 | int gk20a_tegra_busy(struct device *dev) | ||
600 | { | ||
601 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
602 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
603 | struct gk20a *g = platform->g; | ||
604 | |||
605 | if (g->nvhost_dev) | ||
606 | return nvgpu_nvhost_module_busy_ext(g->nvhost_dev); | ||
607 | #endif | ||
608 | return 0; | ||
609 | } | ||
610 | |||
611 | void gk20a_tegra_idle(struct device *dev) | ||
612 | { | ||
613 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
614 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
615 | struct gk20a *g = platform->g; | ||
616 | |||
617 | if (g->nvhost_dev) | ||
618 | nvgpu_nvhost_module_idle_ext(g->nvhost_dev); | ||
619 | #endif | ||
620 | } | ||
621 | |||
622 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform) | ||
623 | { | ||
624 | struct gk20a *g = platform->g; | ||
625 | struct secure_page_buffer *secure_buffer = &platform->secure_buffer; | ||
626 | DEFINE_DMA_ATTRS(attrs); | ||
627 | dma_addr_t iova; | ||
628 | |||
629 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
630 | return 0; | ||
631 | |||
632 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs)); | ||
633 | (void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova, | ||
634 | GFP_KERNEL, __DMA_ATTR(attrs)); | ||
635 | /* Some platforms disable VPR. In that case VPR allocations always | ||
636 | * fail. Just disable VPR usage in nvgpu in that case. */ | ||
637 | if (dma_mapping_error(&tegra_vpr_dev, iova)) | ||
638 | return 0; | ||
639 | |||
640 | secure_buffer->size = platform->secure_buffer_size; | ||
641 | secure_buffer->phys = iova; | ||
642 | secure_buffer->destroy = gk20a_tegra_secure_page_destroy; | ||
643 | |||
644 | g->ops.secure_alloc = gk20a_tegra_secure_alloc; | ||
645 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | #ifdef CONFIG_COMMON_CLK | ||
651 | static struct clk *gk20a_clk_get(struct gk20a *g) | ||
652 | { | ||
653 | if (!g->clk.tegra_clk) { | ||
654 | struct clk *clk; | ||
655 | char clk_dev_id[32]; | ||
656 | struct device *dev = dev_from_gk20a(g); | ||
657 | |||
658 | snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); | ||
659 | |||
660 | clk = clk_get_sys(clk_dev_id, "gpu"); | ||
661 | if (IS_ERR(clk)) { | ||
662 | nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n", | ||
663 | clk_dev_id); | ||
664 | return NULL; | ||
665 | } | ||
666 | g->clk.tegra_clk = clk; | ||
667 | } | ||
668 | |||
669 | return g->clk.tegra_clk; | ||
670 | } | ||
671 | |||
672 | static int gm20b_clk_prepare_ops(struct clk_hw *hw) | ||
673 | { | ||
674 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
675 | return gm20b_clk_prepare(clk); | ||
676 | } | ||
677 | |||
678 | static void gm20b_clk_unprepare_ops(struct clk_hw *hw) | ||
679 | { | ||
680 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
681 | gm20b_clk_unprepare(clk); | ||
682 | } | ||
683 | |||
684 | static int gm20b_clk_is_prepared_ops(struct clk_hw *hw) | ||
685 | { | ||
686 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
687 | return gm20b_clk_is_prepared(clk); | ||
688 | } | ||
689 | |||
690 | static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate) | ||
691 | { | ||
692 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
693 | return gm20b_recalc_rate(clk, parent_rate); | ||
694 | } | ||
695 | |||
696 | static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
697 | unsigned long parent_rate) | ||
698 | { | ||
699 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
700 | return gm20b_gpcclk_set_rate(clk, rate, parent_rate); | ||
701 | } | ||
702 | |||
703 | static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate, | ||
704 | unsigned long *parent_rate) | ||
705 | { | ||
706 | struct clk_gk20a *clk = to_clk_gk20a(hw); | ||
707 | return gm20b_round_rate(clk, rate, parent_rate); | ||
708 | } | ||
709 | |||
710 | static const struct clk_ops gm20b_clk_ops = { | ||
711 | .prepare = gm20b_clk_prepare_ops, | ||
712 | .unprepare = gm20b_clk_unprepare_ops, | ||
713 | .is_prepared = gm20b_clk_is_prepared_ops, | ||
714 | .recalc_rate = gm20b_recalc_rate_ops, | ||
715 | .set_rate = gm20b_gpcclk_set_rate_ops, | ||
716 | .round_rate = gm20b_round_rate_ops, | ||
717 | }; | ||
718 | |||
719 | static int gm20b_register_gpcclk(struct gk20a *g) | ||
720 | { | ||
721 | const char *parent_name = "pllg_ref"; | ||
722 | struct clk_gk20a *clk = &g->clk; | ||
723 | struct clk_init_data init; | ||
724 | struct clk *c; | ||
725 | int err = 0; | ||
726 | |||
727 | /* make sure the clock is available */ | ||
728 | if (!gk20a_clk_get(g)) | ||
729 | return -ENOSYS; | ||
730 | |||
731 | err = gm20b_init_clk_setup_sw(g); | ||
732 | if (err) | ||
733 | return err; | ||
734 | |||
735 | init.name = "gpcclk"; | ||
736 | init.ops = &gm20b_clk_ops; | ||
737 | init.parent_names = &parent_name; | ||
738 | init.num_parents = 1; | ||
739 | init.flags = 0; | ||
740 | |||
741 | /* Data in .init is copied by clk_register(), so stack variable OK */ | ||
742 | clk->hw.init = &init; | ||
743 | c = clk_register(dev_from_gk20a(g), &clk->hw); | ||
744 | if (IS_ERR(c)) { | ||
745 | nvgpu_err(g, "Failed to register GPCPLL clock"); | ||
746 | return -EINVAL; | ||
747 | } | ||
748 | |||
749 | clk->g = g; | ||
750 | clk_register_clkdev(c, "gpcclk", "gpcclk"); | ||
751 | |||
752 | return err; | ||
753 | } | ||
754 | #endif /* CONFIG_COMMON_CLK */ | ||
755 | |||
756 | static int gk20a_tegra_probe(struct device *dev) | ||
757 | { | ||
758 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
759 | struct device_node *np = dev->of_node; | ||
760 | bool joint_xpu_rail = false; | ||
761 | int ret; | ||
762 | struct gk20a *g = platform->g; | ||
763 | |||
764 | #ifdef CONFIG_COMMON_CLK | ||
765 | /* DVFS is not guaranteed to be initialized at the time of probe on | ||
766 | * kernels with Common Clock Framework enabled. | ||
767 | */ | ||
768 | if (!platform->gpu_rail) { | ||
769 | platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME); | ||
770 | if (!platform->gpu_rail) { | ||
771 | nvgpu_log_info(g, "deferring probe no gpu_rail"); | ||
772 | return -EPROBE_DEFER; | ||
773 | } | ||
774 | } | ||
775 | |||
776 | if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) { | ||
777 | nvgpu_log_info(g, "deferring probe gpu_rail not ready"); | ||
778 | return -EPROBE_DEFER; | ||
779 | } | ||
780 | #endif | ||
781 | |||
782 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
783 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
784 | if (ret) | ||
785 | return ret; | ||
786 | #endif | ||
787 | |||
788 | #ifdef CONFIG_OF | ||
789 | joint_xpu_rail = of_property_read_bool(of_chosen, | ||
790 | "nvidia,tegra-joint_xpu_rail"); | ||
791 | #endif | ||
792 | |||
793 | if (joint_xpu_rail) { | ||
794 | nvgpu_log_info(g, "XPU rails are joint\n"); | ||
795 | platform->g->can_railgate = false; | ||
796 | } | ||
797 | |||
798 | platform->g->clk.gpc_pll.id = GK20A_GPC_PLL; | ||
799 | if (tegra_get_chip_id() == TEGRA210) { | ||
800 | /* WAR for bug 1547668: Disable railgating and scaling | ||
801 | irrespective of platform data if the rework was not made. */ | ||
802 | np = of_find_node_by_path("/gpu-dvfs-rework"); | ||
803 | if (!(np && of_device_is_available(np))) { | ||
804 | platform->devfreq_governor = ""; | ||
805 | dev_warn(dev, "board does not support scaling"); | ||
806 | } | ||
807 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1; | ||
808 | if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p) | ||
809 | platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1; | ||
810 | } | ||
811 | |||
812 | if (tegra_get_chip_id() == TEGRA132) | ||
813 | platform->soc_name = "tegra13x"; | ||
814 | |||
815 | gk20a_tegra_get_clocks(dev); | ||
816 | nvgpu_linux_init_clk_support(platform->g); | ||
817 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
818 | if (ret) | ||
819 | return ret; | ||
820 | |||
821 | if (platform->clk_register) { | ||
822 | ret = platform->clk_register(platform->g); | ||
823 | if (ret) | ||
824 | return ret; | ||
825 | } | ||
826 | |||
827 | return 0; | ||
828 | } | ||
829 | |||
830 | static int gk20a_tegra_late_probe(struct device *dev) | ||
831 | { | ||
832 | return 0; | ||
833 | } | ||
834 | |||
835 | static int gk20a_tegra_remove(struct device *dev) | ||
836 | { | ||
837 | /* deinitialise tegra specific scaling quirks */ | ||
838 | gk20a_tegra_scale_exit(dev); | ||
839 | |||
840 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
841 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
842 | #endif | ||
843 | |||
844 | return 0; | ||
845 | } | ||
846 | |||
847 | static int gk20a_tegra_suspend(struct device *dev) | ||
848 | { | ||
849 | tegra_edp_notify_gpu_load(0, 0); | ||
850 | return 0; | ||
851 | } | ||
852 | |||
853 | #if defined(CONFIG_COMMON_CLK) | ||
854 | static long gk20a_round_clk_rate(struct device *dev, unsigned long rate) | ||
855 | { | ||
856 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
857 | struct gk20a *g = platform->g; | ||
858 | |||
859 | /* make sure the clock is available */ | ||
860 | if (!gk20a_clk_get(g)) | ||
861 | return rate; | ||
862 | |||
863 | return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate); | ||
864 | } | ||
865 | |||
866 | static int gk20a_clk_get_freqs(struct device *dev, | ||
867 | unsigned long **freqs, int *num_freqs) | ||
868 | { | ||
869 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
870 | struct gk20a *g = platform->g; | ||
871 | |||
872 | /* make sure the clock is available */ | ||
873 | if (!gk20a_clk_get(g)) | ||
874 | return -ENOSYS; | ||
875 | |||
876 | return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk), | ||
877 | freqs, num_freqs); | ||
878 | } | ||
879 | #endif | ||
880 | |||
881 | struct gk20a_platform gm20b_tegra_platform = { | ||
882 | .has_syncpoints = true, | ||
883 | .aggressive_sync_destroy_thresh = 64, | ||
884 | |||
885 | /* power management configuration */ | ||
886 | .railgate_delay_init = 500, | ||
887 | .can_railgate_init = true, | ||
888 | .can_elpg_init = true, | ||
889 | .enable_slcg = true, | ||
890 | .enable_blcg = true, | ||
891 | .enable_elcg = true, | ||
892 | .can_slcg = true, | ||
893 | .can_blcg = true, | ||
894 | .can_elcg = true, | ||
895 | .enable_elpg = true, | ||
896 | .enable_aelpg = true, | ||
897 | .enable_perfmon = true, | ||
898 | .ptimer_src_freq = 19200000, | ||
899 | |||
900 | .force_reset_in_do_idle = false, | ||
901 | |||
902 | .ch_wdt_timeout_ms = 5000, | ||
903 | |||
904 | .probe = gk20a_tegra_probe, | ||
905 | .late_probe = gk20a_tegra_late_probe, | ||
906 | .remove = gk20a_tegra_remove, | ||
907 | /* power management callbacks */ | ||
908 | .suspend = gk20a_tegra_suspend, | ||
909 | |||
910 | #if defined(CONFIG_TEGRA_DVFS) | ||
911 | .railgate = gm20b_tegra_railgate, | ||
912 | .unrailgate = gm20b_tegra_unrailgate, | ||
913 | .is_railgated = gk20a_tegra_is_railgated, | ||
914 | #endif | ||
915 | |||
916 | .busy = gk20a_tegra_busy, | ||
917 | .idle = gk20a_tegra_idle, | ||
918 | |||
919 | #if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK) | ||
920 | .reset_assert = gm20b_tegra_reset_assert, | ||
921 | .reset_deassert = gm20b_tegra_reset_deassert, | ||
922 | #else | ||
923 | .reset_assert = gk20a_tegra_reset_assert, | ||
924 | .reset_deassert = gk20a_tegra_reset_deassert, | ||
925 | #endif | ||
926 | |||
927 | #if defined(CONFIG_COMMON_CLK) | ||
928 | .clk_round_rate = gk20a_round_clk_rate, | ||
929 | .get_clk_freqs = gk20a_clk_get_freqs, | ||
930 | #endif | ||
931 | |||
932 | #ifdef CONFIG_COMMON_CLK | ||
933 | .clk_register = gm20b_register_gpcclk, | ||
934 | #endif | ||
935 | |||
936 | /* frequency scaling configuration */ | ||
937 | .initscale = gk20a_tegra_scale_init, | ||
938 | .prescale = gk20a_tegra_prescale, | ||
939 | #ifdef CONFIG_TEGRA_BWMGR | ||
940 | .postscale = gm20b_tegra_postscale, | ||
941 | #endif | ||
942 | .devfreq_governor = "nvhost_podgov", | ||
943 | .qos_notify = gk20a_scale_qos_notify, | ||
944 | |||
945 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
946 | |||
947 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
948 | .has_cde = true, | ||
949 | #endif | ||
950 | |||
951 | .soc_name = "tegra21x", | ||
952 | |||
953 | .unified_memory = true, | ||
954 | .dma_mask = DMA_BIT_MASK(34), | ||
955 | |||
956 | .secure_buffer_size = 335872, | ||
957 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h new file mode 100644 index 00000000..f7d50406 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * GK20A Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
17 | #define _NVGPU_PLATFORM_GK20A_TEGRA_H_ | ||
18 | |||
19 | struct gk20a_platform; | ||
20 | |||
21 | int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform); | ||
22 | |||
23 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h new file mode 100644 index 00000000..d256d126 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * GP10B Platform (SoC) Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #ifndef _GP10B_PLATFORM_H_ | ||
26 | #define _GP10B_PLATFORM_H_ | ||
27 | |||
28 | struct device; | ||
29 | |||
30 | int gp10b_tegra_get_clocks(struct device *dev); | ||
31 | int gp10b_tegra_reset_assert(struct device *dev); | ||
32 | int gp10b_tegra_reset_deassert(struct device *dev); | ||
33 | void gp10b_tegra_scale_init(struct device *dev); | ||
34 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate); | ||
35 | int gp10b_clk_get_freqs(struct device *dev, | ||
36 | unsigned long **freqs, int *num_freqs); | ||
37 | void gp10b_tegra_prescale(struct device *dev); | ||
38 | void gp10b_tegra_postscale(struct device *pdev, unsigned long freq); | ||
39 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c new file mode 100644 index 00000000..5cb82687 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | |||
@@ -0,0 +1,607 @@ | |||
1 | /* | ||
2 | * GP10B Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | */ | ||
15 | |||
16 | #include <linux/of_platform.h> | ||
17 | #include <linux/debugfs.h> | ||
18 | #include <linux/dma-buf.h> | ||
19 | #include <linux/nvmap.h> | ||
20 | #include <linux/reset.h> | ||
21 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
22 | |||
23 | #include <uapi/linux/nvgpu.h> | ||
24 | |||
25 | #include <soc/tegra/tegra_bpmp.h> | ||
26 | #include <soc/tegra/tegra_powergate.h> | ||
27 | #include <soc/tegra/tegra-bpmp-dvfs.h> | ||
28 | |||
29 | #include <dt-bindings/memory/tegra-swgroup.h> | ||
30 | |||
31 | #include <nvgpu/kmem.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/enabled.h> | ||
34 | #include <nvgpu/hashtable.h> | ||
35 | #include <nvgpu/nvhost.h> | ||
36 | |||
37 | #include "os_linux.h" | ||
38 | |||
39 | #include "clk.h" | ||
40 | |||
41 | #include "gk20a/gk20a.h" | ||
42 | |||
43 | #include "platform_gk20a.h" | ||
44 | #include "platform_ecc_sysfs.h" | ||
45 | #include "platform_gk20a_tegra.h" | ||
46 | #include "platform_gp10b.h" | ||
47 | #include "platform_gp10b_tegra.h" | ||
48 | #include "scale.h" | ||
49 | |||
50 | /* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ | ||
51 | #define GP10B_FREQ_SELECT_STEP 8 | ||
52 | /* Max number of freq supported in h/w */ | ||
53 | #define GP10B_MAX_SUPPORTED_FREQS 120 | ||
54 | static unsigned long | ||
55 | gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; | ||
56 | |||
57 | #define TEGRA_GP10B_BW_PER_FREQ 64 | ||
58 | #define TEGRA_DDR4_BW_PER_FREQ 16 | ||
59 | |||
60 | #define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) | ||
61 | |||
62 | #define GPCCLK_INIT_RATE 1000000000 | ||
63 | |||
64 | static struct { | ||
65 | char *name; | ||
66 | unsigned long default_rate; | ||
67 | } tegra_gp10b_clocks[] = { | ||
68 | {"gpu", GPCCLK_INIT_RATE}, | ||
69 | {"gpu_sys", 204000000} }; | ||
70 | |||
71 | /* | ||
72 | * gp10b_tegra_get_clocks() | ||
73 | * | ||
74 | * This function finds clocks in tegra platform and populates | ||
75 | * the clock information to gp10b platform data. | ||
76 | */ | ||
77 | |||
78 | int gp10b_tegra_get_clocks(struct device *dev) | ||
79 | { | ||
80 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
81 | unsigned int i; | ||
82 | |||
83 | platform->num_clks = 0; | ||
84 | for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { | ||
85 | long rate = tegra_gp10b_clocks[i].default_rate; | ||
86 | struct clk *c; | ||
87 | |||
88 | c = clk_get(dev, tegra_gp10b_clocks[i].name); | ||
89 | if (IS_ERR(c)) { | ||
90 | nvgpu_err(platform->g, "cannot get clock %s", | ||
91 | tegra_gp10b_clocks[i].name); | ||
92 | } else { | ||
93 | clk_set_rate(c, rate); | ||
94 | platform->clk[i] = c; | ||
95 | if (i == 0) | ||
96 | platform->cached_rate = rate; | ||
97 | } | ||
98 | } | ||
99 | platform->num_clks = i; | ||
100 | |||
101 | if (platform->clk[0]) { | ||
102 | i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, | ||
103 | tegra_gp10b_clocks[0].name); | ||
104 | if (i > 0) | ||
105 | platform->maxmin_clk_id = i; | ||
106 | } | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | void gp10b_tegra_scale_init(struct device *dev) | ||
112 | { | ||
113 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
114 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
115 | struct tegra_bwmgr_client *bwmgr_handle; | ||
116 | |||
117 | if (!profile) | ||
118 | return; | ||
119 | |||
120 | if ((struct tegra_bwmgr_client *)profile->private_data) | ||
121 | return; | ||
122 | |||
123 | bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); | ||
124 | if (!bwmgr_handle) | ||
125 | return; | ||
126 | |||
127 | profile->private_data = (void *)bwmgr_handle; | ||
128 | } | ||
129 | |||
130 | static void gp10b_tegra_scale_exit(struct device *dev) | ||
131 | { | ||
132 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
133 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
134 | |||
135 | if (profile) | ||
136 | tegra_bwmgr_unregister( | ||
137 | (struct tegra_bwmgr_client *)profile->private_data); | ||
138 | } | ||
139 | |||
140 | static int gp10b_tegra_probe(struct device *dev) | ||
141 | { | ||
142 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
143 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
144 | int ret; | ||
145 | |||
146 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
147 | if (ret) | ||
148 | return ret; | ||
149 | #endif | ||
150 | |||
151 | ret = gk20a_tegra_init_secure_alloc(platform); | ||
152 | if (ret) | ||
153 | return ret; | ||
154 | |||
155 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
156 | |||
157 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
158 | = false; | ||
159 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
160 | = false; | ||
161 | |||
162 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
163 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
164 | |||
165 | gp10b_tegra_get_clocks(dev); | ||
166 | nvgpu_linux_init_clk_support(platform->g); | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int gp10b_tegra_late_probe(struct device *dev) | ||
172 | { | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | static int gp10b_tegra_remove(struct device *dev) | ||
177 | { | ||
178 | struct gk20a *g = get_gk20a(dev); | ||
179 | |||
180 | if (g->ops.gr.remove_gr_sysfs) | ||
181 | g->ops.gr.remove_gr_sysfs(g); | ||
182 | |||
183 | /* deinitialise tegra specific scaling quirks */ | ||
184 | gp10b_tegra_scale_exit(dev); | ||
185 | |||
186 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
187 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
188 | #endif | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static bool gp10b_tegra_is_railgated(struct device *dev) | ||
194 | { | ||
195 | bool ret = false; | ||
196 | |||
197 | if (tegra_bpmp_running()) | ||
198 | ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); | ||
199 | |||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | static int gp10b_tegra_railgate(struct device *dev) | ||
204 | { | ||
205 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
206 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
207 | |||
208 | /* remove emc frequency floor */ | ||
209 | if (profile) | ||
210 | tegra_bwmgr_set_emc( | ||
211 | (struct tegra_bwmgr_client *)profile->private_data, | ||
212 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
213 | |||
214 | if (tegra_bpmp_running() && | ||
215 | tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { | ||
216 | int i; | ||
217 | for (i = 0; i < platform->num_clks; i++) { | ||
218 | if (platform->clk[i]) | ||
219 | clk_disable_unprepare(platform->clk[i]); | ||
220 | } | ||
221 | tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
222 | } | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | static int gp10b_tegra_unrailgate(struct device *dev) | ||
227 | { | ||
228 | int ret = 0; | ||
229 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
230 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
231 | |||
232 | if (tegra_bpmp_running()) { | ||
233 | int i; | ||
234 | ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); | ||
235 | for (i = 0; i < platform->num_clks; i++) { | ||
236 | if (platform->clk[i]) | ||
237 | clk_prepare_enable(platform->clk[i]); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /* to start with set emc frequency floor to max rate*/ | ||
242 | if (profile) | ||
243 | tegra_bwmgr_set_emc( | ||
244 | (struct tegra_bwmgr_client *)profile->private_data, | ||
245 | tegra_bwmgr_get_max_emc_rate(), | ||
246 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
247 | return ret; | ||
248 | } | ||
249 | |||
250 | static int gp10b_tegra_suspend(struct device *dev) | ||
251 | { | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | int gp10b_tegra_reset_assert(struct device *dev) | ||
256 | { | ||
257 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
258 | int ret = 0; | ||
259 | |||
260 | if (!platform->reset_control) | ||
261 | return -EINVAL; | ||
262 | |||
263 | ret = reset_control_assert(platform->reset_control); | ||
264 | |||
265 | return ret; | ||
266 | } | ||
267 | |||
268 | int gp10b_tegra_reset_deassert(struct device *dev) | ||
269 | { | ||
270 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
271 | int ret = 0; | ||
272 | |||
273 | if (!platform->reset_control) | ||
274 | return -EINVAL; | ||
275 | |||
276 | ret = reset_control_deassert(platform->reset_control); | ||
277 | |||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | void gp10b_tegra_prescale(struct device *dev) | ||
282 | { | ||
283 | struct gk20a *g = get_gk20a(dev); | ||
284 | u32 avg = 0; | ||
285 | |||
286 | nvgpu_log_fn(g, " "); | ||
287 | |||
288 | nvgpu_pmu_load_norm(g, &avg); | ||
289 | |||
290 | nvgpu_log_fn(g, "done"); | ||
291 | } | ||
292 | |||
293 | void gp10b_tegra_postscale(struct device *pdev, | ||
294 | unsigned long freq) | ||
295 | { | ||
296 | struct gk20a_platform *platform = gk20a_get_platform(pdev); | ||
297 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
298 | struct gk20a *g = get_gk20a(pdev); | ||
299 | unsigned long emc_rate; | ||
300 | |||
301 | nvgpu_log_fn(g, " "); | ||
302 | if (profile && !platform->is_railgated(pdev)) { | ||
303 | unsigned long emc_scale; | ||
304 | |||
305 | if (freq <= gp10b_freq_table[0]) | ||
306 | emc_scale = 0; | ||
307 | else | ||
308 | emc_scale = g->emc3d_ratio; | ||
309 | |||
310 | emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; | ||
311 | |||
312 | if (emc_rate > tegra_bwmgr_get_max_emc_rate()) | ||
313 | emc_rate = tegra_bwmgr_get_max_emc_rate(); | ||
314 | |||
315 | tegra_bwmgr_set_emc( | ||
316 | (struct tegra_bwmgr_client *)profile->private_data, | ||
317 | emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
318 | } | ||
319 | nvgpu_log_fn(g, "done"); | ||
320 | } | ||
321 | |||
322 | long gp10b_round_clk_rate(struct device *dev, unsigned long rate) | ||
323 | { | ||
324 | struct gk20a *g = get_gk20a(dev); | ||
325 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
326 | unsigned long *freq_table = profile->devfreq_profile.freq_table; | ||
327 | int max_states = profile->devfreq_profile.max_state; | ||
328 | int i; | ||
329 | |||
330 | for (i = 0; i < max_states; ++i) | ||
331 | if (freq_table[i] >= rate) | ||
332 | return freq_table[i]; | ||
333 | |||
334 | return freq_table[max_states - 1]; | ||
335 | } | ||
336 | |||
337 | int gp10b_clk_get_freqs(struct device *dev, | ||
338 | unsigned long **freqs, int *num_freqs) | ||
339 | { | ||
340 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
341 | struct gk20a *g = platform->g; | ||
342 | unsigned long max_rate; | ||
343 | unsigned long new_rate = 0, prev_rate = 0; | ||
344 | int i = 0, freq_counter = 0; | ||
345 | |||
346 | max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); | ||
347 | |||
348 | /* | ||
349 | * Walk the h/w frequency table and only select | ||
350 | * GP10B_FREQ_SELECT_STEP'th frequencies and | ||
351 | * add MAX freq to last | ||
352 | */ | ||
353 | for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { | ||
354 | prev_rate = new_rate; | ||
355 | new_rate = clk_round_rate(platform->clk[0], prev_rate + 1); | ||
356 | |||
357 | if (i % GP10B_FREQ_SELECT_STEP == 0 || | ||
358 | new_rate == max_rate) { | ||
359 | gp10b_freq_table[freq_counter++] = new_rate; | ||
360 | |||
361 | if (new_rate == max_rate) | ||
362 | break; | ||
363 | } | ||
364 | } | ||
365 | |||
366 | WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS); | ||
367 | |||
368 | /* Fill freq table */ | ||
369 | *freqs = gp10b_freq_table; | ||
370 | *num_freqs = freq_counter; | ||
371 | |||
372 | nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", | ||
373 | gp10b_freq_table[0], max_rate, *num_freqs); | ||
374 | |||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | struct gk20a_platform gp10b_tegra_platform = { | ||
379 | .has_syncpoints = true, | ||
380 | |||
381 | /* power management configuration */ | ||
382 | .railgate_delay_init = 500, | ||
383 | |||
384 | /* ldiv slowdown factor */ | ||
385 | .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, | ||
386 | |||
387 | /* power management configuration */ | ||
388 | .can_railgate_init = true, | ||
389 | .enable_elpg = true, | ||
390 | .can_elpg_init = true, | ||
391 | .enable_blcg = true, | ||
392 | .enable_slcg = true, | ||
393 | .enable_elcg = true, | ||
394 | .can_slcg = true, | ||
395 | .can_blcg = true, | ||
396 | .can_elcg = true, | ||
397 | .enable_aelpg = true, | ||
398 | .enable_perfmon = true, | ||
399 | |||
400 | /* ptimer src frequency in hz*/ | ||
401 | .ptimer_src_freq = 31250000, | ||
402 | |||
403 | .ch_wdt_timeout_ms = 5000, | ||
404 | |||
405 | .probe = gp10b_tegra_probe, | ||
406 | .late_probe = gp10b_tegra_late_probe, | ||
407 | .remove = gp10b_tegra_remove, | ||
408 | |||
409 | /* power management callbacks */ | ||
410 | .suspend = gp10b_tegra_suspend, | ||
411 | .railgate = gp10b_tegra_railgate, | ||
412 | .unrailgate = gp10b_tegra_unrailgate, | ||
413 | .is_railgated = gp10b_tegra_is_railgated, | ||
414 | |||
415 | .busy = gk20a_tegra_busy, | ||
416 | .idle = gk20a_tegra_idle, | ||
417 | |||
418 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
419 | |||
420 | #ifdef CONFIG_NVGPU_SUPPORT_CDE | ||
421 | .has_cde = true, | ||
422 | #endif | ||
423 | |||
424 | .clk_round_rate = gp10b_round_clk_rate, | ||
425 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
426 | |||
427 | /* frequency scaling configuration */ | ||
428 | .initscale = gp10b_tegra_scale_init, | ||
429 | .prescale = gp10b_tegra_prescale, | ||
430 | .postscale = gp10b_tegra_postscale, | ||
431 | .devfreq_governor = "nvhost_podgov", | ||
432 | |||
433 | .qos_notify = gk20a_scale_qos_notify, | ||
434 | |||
435 | .reset_assert = gp10b_tegra_reset_assert, | ||
436 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
437 | |||
438 | .force_reset_in_do_idle = false, | ||
439 | |||
440 | .soc_name = "tegra18x", | ||
441 | |||
442 | .unified_memory = true, | ||
443 | .dma_mask = DMA_BIT_MASK(36), | ||
444 | |||
445 | .ltc_streamid = TEGRA_SID_GPUB, | ||
446 | |||
447 | .secure_buffer_size = 401408, | ||
448 | }; | ||
449 | |||
450 | void gr_gp10b_create_sysfs(struct gk20a *g) | ||
451 | { | ||
452 | int error = 0; | ||
453 | struct device *dev = dev_from_gk20a(g); | ||
454 | |||
455 | /* This stat creation function is called on GR init. GR can get | ||
456 | initialized multiple times but we only need to create the ECC | ||
457 | stats once. Therefore, add the following check to avoid | ||
458 | creating duplicate stat sysfs nodes. */ | ||
459 | if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL) | ||
460 | return; | ||
461 | |||
462 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
463 | 0, | ||
464 | "sm_lrf_ecc_single_err_count", | ||
465 | &g->ecc.gr.sm_lrf_single_err_count); | ||
466 | |||
467 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
468 | 0, | ||
469 | "sm_lrf_ecc_double_err_count", | ||
470 | &g->ecc.gr.sm_lrf_double_err_count); | ||
471 | |||
472 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
473 | 0, | ||
474 | "sm_shm_ecc_sec_count", | ||
475 | &g->ecc.gr.sm_shm_sec_count); | ||
476 | |||
477 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
478 | 0, | ||
479 | "sm_shm_ecc_sed_count", | ||
480 | &g->ecc.gr.sm_shm_sed_count); | ||
481 | |||
482 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
483 | 0, | ||
484 | "sm_shm_ecc_ded_count", | ||
485 | &g->ecc.gr.sm_shm_ded_count); | ||
486 | |||
487 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
488 | 0, | ||
489 | "tex_ecc_total_sec_pipe0_count", | ||
490 | &g->ecc.gr.tex_total_sec_pipe0_count); | ||
491 | |||
492 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
493 | 0, | ||
494 | "tex_ecc_total_ded_pipe0_count", | ||
495 | &g->ecc.gr.tex_total_ded_pipe0_count); | ||
496 | |||
497 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
498 | 0, | ||
499 | "tex_ecc_unique_sec_pipe0_count", | ||
500 | &g->ecc.gr.tex_unique_sec_pipe0_count); | ||
501 | |||
502 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
503 | 0, | ||
504 | "tex_ecc_unique_ded_pipe0_count", | ||
505 | &g->ecc.gr.tex_unique_ded_pipe0_count); | ||
506 | |||
507 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
508 | 0, | ||
509 | "tex_ecc_total_sec_pipe1_count", | ||
510 | &g->ecc.gr.tex_total_sec_pipe1_count); | ||
511 | |||
512 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
513 | 0, | ||
514 | "tex_ecc_total_ded_pipe1_count", | ||
515 | &g->ecc.gr.tex_total_ded_pipe1_count); | ||
516 | |||
517 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
518 | 0, | ||
519 | "tex_ecc_unique_sec_pipe1_count", | ||
520 | &g->ecc.gr.tex_unique_sec_pipe1_count); | ||
521 | |||
522 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
523 | 0, | ||
524 | "tex_ecc_unique_ded_pipe1_count", | ||
525 | &g->ecc.gr.tex_unique_ded_pipe1_count); | ||
526 | |||
527 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
528 | 2, | ||
529 | "ecc_sec_count", | ||
530 | &g->ecc.ltc.l2_sec_count); | ||
531 | |||
532 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
533 | 2, | ||
534 | "ecc_ded_count", | ||
535 | &g->ecc.ltc.l2_ded_count); | ||
536 | |||
537 | if (error) | ||
538 | dev_err(dev, "Failed to create sysfs attributes!\n"); | ||
539 | } | ||
540 | |||
541 | void gr_gp10b_remove_sysfs(struct gk20a *g) | ||
542 | { | ||
543 | struct device *dev = dev_from_gk20a(g); | ||
544 | |||
545 | if (!g->ecc.gr.sm_lrf_single_err_count.counters) | ||
546 | return; | ||
547 | |||
548 | nvgpu_gr_ecc_stat_remove(dev, | ||
549 | 0, | ||
550 | &g->ecc.gr.sm_lrf_single_err_count); | ||
551 | |||
552 | nvgpu_gr_ecc_stat_remove(dev, | ||
553 | 0, | ||
554 | &g->ecc.gr.sm_lrf_double_err_count); | ||
555 | |||
556 | nvgpu_gr_ecc_stat_remove(dev, | ||
557 | 0, | ||
558 | &g->ecc.gr.sm_shm_sec_count); | ||
559 | |||
560 | nvgpu_gr_ecc_stat_remove(dev, | ||
561 | 0, | ||
562 | &g->ecc.gr.sm_shm_sed_count); | ||
563 | |||
564 | nvgpu_gr_ecc_stat_remove(dev, | ||
565 | 0, | ||
566 | &g->ecc.gr.sm_shm_ded_count); | ||
567 | |||
568 | nvgpu_gr_ecc_stat_remove(dev, | ||
569 | 0, | ||
570 | &g->ecc.gr.tex_total_sec_pipe0_count); | ||
571 | |||
572 | nvgpu_gr_ecc_stat_remove(dev, | ||
573 | 0, | ||
574 | &g->ecc.gr.tex_total_ded_pipe0_count); | ||
575 | |||
576 | nvgpu_gr_ecc_stat_remove(dev, | ||
577 | 0, | ||
578 | &g->ecc.gr.tex_unique_sec_pipe0_count); | ||
579 | |||
580 | nvgpu_gr_ecc_stat_remove(dev, | ||
581 | 0, | ||
582 | &g->ecc.gr.tex_unique_ded_pipe0_count); | ||
583 | |||
584 | nvgpu_gr_ecc_stat_remove(dev, | ||
585 | 0, | ||
586 | &g->ecc.gr.tex_total_sec_pipe1_count); | ||
587 | |||
588 | nvgpu_gr_ecc_stat_remove(dev, | ||
589 | 0, | ||
590 | &g->ecc.gr.tex_total_ded_pipe1_count); | ||
591 | |||
592 | nvgpu_gr_ecc_stat_remove(dev, | ||
593 | 0, | ||
594 | &g->ecc.gr.tex_unique_sec_pipe1_count); | ||
595 | |||
596 | nvgpu_gr_ecc_stat_remove(dev, | ||
597 | 0, | ||
598 | &g->ecc.gr.tex_unique_ded_pipe1_count); | ||
599 | |||
600 | nvgpu_gr_ecc_stat_remove(dev, | ||
601 | 2, | ||
602 | &g->ecc.ltc.l2_sec_count); | ||
603 | |||
604 | nvgpu_gr_ecc_stat_remove(dev, | ||
605 | 2, | ||
606 | &g->ecc.ltc.l2_ded_count); | ||
607 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h new file mode 100644 index 00000000..6de90275 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef _PLATFORM_GP10B_TEGRA_H_ | ||
18 | #define _PLATFORM_GP10B_TEGRA_H_ | ||
19 | |||
20 | #include "gp10b/gr_gp10b.h" | ||
21 | #include "platform_ecc_sysfs.h" | ||
22 | |||
23 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c new file mode 100644 index 00000000..d62e7932 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | |||
@@ -0,0 +1,588 @@ | |||
1 | /* | ||
2 | * GV11B Tegra Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/of_platform.h> | ||
20 | #include <linux/debugfs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <linux/nvmap.h> | ||
23 | #include <linux/reset.h> | ||
24 | #include <linux/hashtable.h> | ||
25 | #include <linux/clk.h> | ||
26 | #include <linux/platform/tegra/emc_bwmgr.h> | ||
27 | |||
28 | #include <nvgpu/nvhost.h> | ||
29 | |||
30 | #include <uapi/linux/nvgpu.h> | ||
31 | |||
32 | #include <soc/tegra/tegra_bpmp.h> | ||
33 | #include <soc/tegra/tegra_powergate.h> | ||
34 | |||
35 | #include "gk20a/gk20a.h" | ||
36 | #include "platform_gk20a.h" | ||
37 | #include "clk.h" | ||
38 | #include "scale.h" | ||
39 | |||
40 | #include "platform_gp10b.h" | ||
41 | #include "platform_gp10b_tegra.h" | ||
42 | #include "platform_ecc_sysfs.h" | ||
43 | |||
44 | #include "os_linux.h" | ||
45 | #include "platform_gk20a_tegra.h" | ||
46 | #include "gv11b/gr_gv11b.h" | ||
47 | |||
48 | static void gv11b_tegra_scale_exit(struct device *dev) | ||
49 | { | ||
50 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
51 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
52 | |||
53 | if (profile) | ||
54 | tegra_bwmgr_unregister( | ||
55 | (struct tegra_bwmgr_client *)profile->private_data); | ||
56 | } | ||
57 | |||
58 | static int gv11b_tegra_probe(struct device *dev) | ||
59 | { | ||
60 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
61 | int err; | ||
62 | |||
63 | err = nvgpu_nvhost_syncpt_init(platform->g); | ||
64 | if (err) { | ||
65 | if (err != -ENOSYS) | ||
66 | return err; | ||
67 | } | ||
68 | |||
69 | err = gk20a_tegra_init_secure_alloc(platform); | ||
70 | if (err) | ||
71 | return err; | ||
72 | |||
73 | platform->disable_bigpage = !device_is_iommuable(dev); | ||
74 | |||
75 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
76 | = false; | ||
77 | platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close | ||
78 | = false; | ||
79 | |||
80 | platform->g->gr.ctx_vars.force_preemption_gfxp = false; | ||
81 | platform->g->gr.ctx_vars.force_preemption_cilp = false; | ||
82 | |||
83 | gp10b_tegra_get_clocks(dev); | ||
84 | nvgpu_linux_init_clk_support(platform->g); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static int gv11b_tegra_late_probe(struct device *dev) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | |||
95 | static int gv11b_tegra_remove(struct device *dev) | ||
96 | { | ||
97 | struct gk20a *g = get_gk20a(dev); | ||
98 | |||
99 | if (g->ops.gr.remove_gr_sysfs) | ||
100 | g->ops.gr.remove_gr_sysfs(g); | ||
101 | |||
102 | gv11b_tegra_scale_exit(dev); | ||
103 | |||
104 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
105 | nvgpu_free_nvhost_dev(get_gk20a(dev)); | ||
106 | #endif | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static bool gv11b_tegra_is_railgated(struct device *dev) | ||
112 | { | ||
113 | bool ret = false; | ||
114 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
115 | struct gk20a *g = get_gk20a(dev); | ||
116 | |||
117 | if (tegra_bpmp_running()) { | ||
118 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
119 | ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU); | ||
120 | |||
121 | nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no"); | ||
122 | } else { | ||
123 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
124 | } | ||
125 | #endif | ||
126 | return ret; | ||
127 | } | ||
128 | |||
129 | static int gv11b_tegra_railgate(struct device *dev) | ||
130 | { | ||
131 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
132 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
133 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
134 | struct gk20a *g = get_gk20a(dev); | ||
135 | int i; | ||
136 | |||
137 | /* remove emc frequency floor */ | ||
138 | if (profile) | ||
139 | tegra_bwmgr_set_emc( | ||
140 | (struct tegra_bwmgr_client *)profile->private_data, | ||
141 | 0, TEGRA_BWMGR_SET_EMC_FLOOR); | ||
142 | |||
143 | if (tegra_bpmp_running()) { | ||
144 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
145 | if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) { | ||
146 | nvgpu_log(g, gpu_dbg_info, "powergate is not powered"); | ||
147 | return 0; | ||
148 | } | ||
149 | nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare"); | ||
150 | for (i = 0; i < platform->num_clks; i++) { | ||
151 | if (platform->clk[i]) | ||
152 | clk_disable_unprepare(platform->clk[i]); | ||
153 | } | ||
154 | nvgpu_log(g, gpu_dbg_info, "powergate_partition"); | ||
155 | tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
156 | } else { | ||
157 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
158 | } | ||
159 | #endif | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | static int gv11b_tegra_unrailgate(struct device *dev) | ||
164 | { | ||
165 | int ret = 0; | ||
166 | #ifdef TEGRA194_POWER_DOMAIN_GPU | ||
167 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
168 | struct gk20a *g = get_gk20a(dev); | ||
169 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
170 | int i; | ||
171 | |||
172 | if (tegra_bpmp_running()) { | ||
173 | nvgpu_log(g, gpu_dbg_info, "bpmp running"); | ||
174 | ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU); | ||
175 | if (ret) { | ||
176 | nvgpu_log(g, gpu_dbg_info, | ||
177 | "unpowergate partition failed"); | ||
178 | return ret; | ||
179 | } | ||
180 | nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable"); | ||
181 | for (i = 0; i < platform->num_clks; i++) { | ||
182 | if (platform->clk[i]) | ||
183 | clk_prepare_enable(platform->clk[i]); | ||
184 | } | ||
185 | } else { | ||
186 | nvgpu_log(g, gpu_dbg_info, "bpmp not running"); | ||
187 | } | ||
188 | |||
189 | /* to start with set emc frequency floor to max rate*/ | ||
190 | if (profile) | ||
191 | tegra_bwmgr_set_emc( | ||
192 | (struct tegra_bwmgr_client *)profile->private_data, | ||
193 | tegra_bwmgr_get_max_emc_rate(), | ||
194 | TEGRA_BWMGR_SET_EMC_FLOOR); | ||
195 | #endif | ||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static int gv11b_tegra_suspend(struct device *dev) | ||
200 | { | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | struct gk20a_platform gv11b_tegra_platform = { | ||
205 | .has_syncpoints = true, | ||
206 | |||
207 | /* ptimer src frequency in hz*/ | ||
208 | .ptimer_src_freq = 31250000, | ||
209 | |||
210 | .ch_wdt_timeout_ms = 5000, | ||
211 | |||
212 | .probe = gv11b_tegra_probe, | ||
213 | .late_probe = gv11b_tegra_late_probe, | ||
214 | .remove = gv11b_tegra_remove, | ||
215 | .railgate_delay_init = 500, | ||
216 | .can_railgate_init = true, | ||
217 | |||
218 | .can_slcg = true, | ||
219 | .can_blcg = true, | ||
220 | .can_elcg = true, | ||
221 | .enable_slcg = true, | ||
222 | .enable_blcg = true, | ||
223 | .enable_elcg = true, | ||
224 | .enable_perfmon = true, | ||
225 | |||
226 | /* power management configuration */ | ||
227 | .enable_elpg = true, | ||
228 | .can_elpg_init = true, | ||
229 | .enable_aelpg = true, | ||
230 | |||
231 | /* power management callbacks */ | ||
232 | .suspend = gv11b_tegra_suspend, | ||
233 | .railgate = gv11b_tegra_railgate, | ||
234 | .unrailgate = gv11b_tegra_unrailgate, | ||
235 | .is_railgated = gv11b_tegra_is_railgated, | ||
236 | |||
237 | .busy = gk20a_tegra_busy, | ||
238 | .idle = gk20a_tegra_idle, | ||
239 | |||
240 | .clk_round_rate = gp10b_round_clk_rate, | ||
241 | .get_clk_freqs = gp10b_clk_get_freqs, | ||
242 | |||
243 | /* frequency scaling configuration */ | ||
244 | .initscale = gp10b_tegra_scale_init, | ||
245 | .prescale = gp10b_tegra_prescale, | ||
246 | .postscale = gp10b_tegra_postscale, | ||
247 | .devfreq_governor = "nvhost_podgov", | ||
248 | |||
249 | .qos_notify = gk20a_scale_qos_notify, | ||
250 | |||
251 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | ||
252 | |||
253 | .soc_name = "tegra19x", | ||
254 | |||
255 | .honors_aperture = true, | ||
256 | .unified_memory = true, | ||
257 | .dma_mask = DMA_BIT_MASK(36), | ||
258 | |||
259 | .reset_assert = gp10b_tegra_reset_assert, | ||
260 | .reset_deassert = gp10b_tegra_reset_deassert, | ||
261 | |||
262 | .secure_buffer_size = 667648, | ||
263 | }; | ||
264 | |||
265 | void gr_gv11b_create_sysfs(struct gk20a *g) | ||
266 | { | ||
267 | struct device *dev = dev_from_gk20a(g); | ||
268 | int error = 0; | ||
269 | |||
270 | /* This stat creation function is called on GR init. GR can get | ||
271 | initialized multiple times but we only need to create the ECC | ||
272 | stats once. Therefore, add the following check to avoid | ||
273 | creating duplicate stat sysfs nodes. */ | ||
274 | if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL) | ||
275 | return; | ||
276 | |||
277 | gr_gp10b_create_sysfs(g); | ||
278 | |||
279 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
280 | 0, | ||
281 | "sm_l1_tag_ecc_corrected_err_count", | ||
282 | &g->ecc.gr.sm_l1_tag_corrected_err_count); | ||
283 | |||
284 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
285 | 0, | ||
286 | "sm_l1_tag_ecc_uncorrected_err_count", | ||
287 | &g->ecc.gr.sm_l1_tag_uncorrected_err_count); | ||
288 | |||
289 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
290 | 0, | ||
291 | "sm_cbu_ecc_corrected_err_count", | ||
292 | &g->ecc.gr.sm_cbu_corrected_err_count); | ||
293 | |||
294 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
295 | 0, | ||
296 | "sm_cbu_ecc_uncorrected_err_count", | ||
297 | &g->ecc.gr.sm_cbu_uncorrected_err_count); | ||
298 | |||
299 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
300 | 0, | ||
301 | "sm_l1_data_ecc_corrected_err_count", | ||
302 | &g->ecc.gr.sm_l1_data_corrected_err_count); | ||
303 | |||
304 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
305 | 0, | ||
306 | "sm_l1_data_ecc_uncorrected_err_count", | ||
307 | &g->ecc.gr.sm_l1_data_uncorrected_err_count); | ||
308 | |||
309 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
310 | 0, | ||
311 | "sm_icache_ecc_corrected_err_count", | ||
312 | &g->ecc.gr.sm_icache_corrected_err_count); | ||
313 | |||
314 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
315 | 0, | ||
316 | "sm_icache_ecc_uncorrected_err_count", | ||
317 | &g->ecc.gr.sm_icache_uncorrected_err_count); | ||
318 | |||
319 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
320 | 0, | ||
321 | "gcc_l15_ecc_corrected_err_count", | ||
322 | &g->ecc.gr.gcc_l15_corrected_err_count); | ||
323 | |||
324 | error |= nvgpu_gr_ecc_stat_create(dev, | ||
325 | 0, | ||
326 | "gcc_l15_ecc_uncorrected_err_count", | ||
327 | &g->ecc.gr.gcc_l15_uncorrected_err_count); | ||
328 | |||
329 | error |= nvgpu_ecc_stat_create(dev, | ||
330 | g->ltc_count, | ||
331 | 0, | ||
332 | "ltc", | ||
333 | NULL, | ||
334 | "l2_cache_uncorrected_err_count", | ||
335 | &g->ecc.ltc.l2_cache_uncorrected_err_count); | ||
336 | |||
337 | error |= nvgpu_ecc_stat_create(dev, | ||
338 | g->ltc_count, | ||
339 | 0, | ||
340 | "ltc", | ||
341 | NULL, | ||
342 | "l2_cache_corrected_err_count", | ||
343 | &g->ecc.ltc.l2_cache_corrected_err_count); | ||
344 | |||
345 | error |= nvgpu_ecc_stat_create(dev, | ||
346 | 1, | ||
347 | 0, | ||
348 | "gpc", | ||
349 | NULL, | ||
350 | "fecs_ecc_uncorrected_err_count", | ||
351 | &g->ecc.gr.fecs_uncorrected_err_count); | ||
352 | |||
353 | error |= nvgpu_ecc_stat_create(dev, | ||
354 | 1, | ||
355 | 0, | ||
356 | "gpc", | ||
357 | NULL, | ||
358 | "fecs_ecc_corrected_err_count", | ||
359 | &g->ecc.gr.fecs_corrected_err_count); | ||
360 | |||
361 | error |= nvgpu_ecc_stat_create(dev, | ||
362 | g->gr.gpc_count, | ||
363 | 0, | ||
364 | "gpc", | ||
365 | NULL, | ||
366 | "gpccs_ecc_uncorrected_err_count", | ||
367 | &g->ecc.gr.gpccs_uncorrected_err_count); | ||
368 | |||
369 | error |= nvgpu_ecc_stat_create(dev, | ||
370 | g->gr.gpc_count, | ||
371 | 0, | ||
372 | "gpc", | ||
373 | NULL, | ||
374 | "gpccs_ecc_corrected_err_count", | ||
375 | &g->ecc.gr.gpccs_corrected_err_count); | ||
376 | |||
377 | error |= nvgpu_ecc_stat_create(dev, | ||
378 | g->gr.gpc_count, | ||
379 | 0, | ||
380 | "gpc", | ||
381 | NULL, | ||
382 | "mmu_l1tlb_ecc_uncorrected_err_count", | ||
383 | &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); | ||
384 | |||
385 | error |= nvgpu_ecc_stat_create(dev, | ||
386 | g->gr.gpc_count, | ||
387 | 0, | ||
388 | "gpc", | ||
389 | NULL, | ||
390 | "mmu_l1tlb_ecc_corrected_err_count", | ||
391 | &g->ecc.gr.mmu_l1tlb_corrected_err_count); | ||
392 | |||
393 | error |= nvgpu_ecc_stat_create(dev, | ||
394 | 1, | ||
395 | 0, | ||
396 | "eng", | ||
397 | NULL, | ||
398 | "mmu_l2tlb_ecc_uncorrected_err_count", | ||
399 | &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); | ||
400 | |||
401 | error |= nvgpu_ecc_stat_create(dev, | ||
402 | 1, | ||
403 | 0, | ||
404 | "eng", | ||
405 | NULL, | ||
406 | "mmu_l2tlb_ecc_corrected_err_count", | ||
407 | &g->ecc.fb.mmu_l2tlb_corrected_err_count); | ||
408 | |||
409 | error |= nvgpu_ecc_stat_create(dev, | ||
410 | 1, | ||
411 | 0, | ||
412 | "eng", | ||
413 | NULL, | ||
414 | "mmu_hubtlb_ecc_uncorrected_err_count", | ||
415 | &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); | ||
416 | |||
417 | error |= nvgpu_ecc_stat_create(dev, | ||
418 | 1, | ||
419 | 0, | ||
420 | "eng", | ||
421 | NULL, | ||
422 | "mmu_hubtlb_ecc_corrected_err_count", | ||
423 | &g->ecc.fb.mmu_hubtlb_corrected_err_count); | ||
424 | |||
425 | error |= nvgpu_ecc_stat_create(dev, | ||
426 | 1, | ||
427 | 0, | ||
428 | "eng", | ||
429 | NULL, | ||
430 | "mmu_fillunit_ecc_uncorrected_err_count", | ||
431 | &g->ecc.fb.mmu_fillunit_uncorrected_err_count); | ||
432 | |||
433 | error |= nvgpu_ecc_stat_create(dev, | ||
434 | 1, | ||
435 | 0, | ||
436 | "eng", | ||
437 | NULL, | ||
438 | "mmu_fillunit_ecc_corrected_err_count", | ||
439 | &g->ecc.fb.mmu_fillunit_corrected_err_count); | ||
440 | |||
441 | error |= nvgpu_ecc_stat_create(dev, | ||
442 | 1, | ||
443 | 0, | ||
444 | "eng", | ||
445 | NULL, | ||
446 | "pmu_ecc_uncorrected_err_count", | ||
447 | &g->ecc.pmu.pmu_uncorrected_err_count); | ||
448 | |||
449 | error |= nvgpu_ecc_stat_create(dev, | ||
450 | 1, | ||
451 | 0, | ||
452 | "eng", | ||
453 | NULL, | ||
454 | "pmu_ecc_corrected_err_count", | ||
455 | &g->ecc.pmu.pmu_corrected_err_count); | ||
456 | |||
457 | if (error) | ||
458 | dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); | ||
459 | } | ||
460 | |||
461 | void gr_gv11b_remove_sysfs(struct gk20a *g) | ||
462 | { | ||
463 | struct device *dev = dev_from_gk20a(g); | ||
464 | |||
465 | if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters) | ||
466 | return; | ||
467 | gr_gp10b_remove_sysfs(g); | ||
468 | |||
469 | nvgpu_gr_ecc_stat_remove(dev, | ||
470 | 0, | ||
471 | &g->ecc.gr.sm_l1_tag_corrected_err_count); | ||
472 | |||
473 | nvgpu_gr_ecc_stat_remove(dev, | ||
474 | 0, | ||
475 | &g->ecc.gr.sm_l1_tag_uncorrected_err_count); | ||
476 | |||
477 | nvgpu_gr_ecc_stat_remove(dev, | ||
478 | 0, | ||
479 | &g->ecc.gr.sm_cbu_corrected_err_count); | ||
480 | |||
481 | nvgpu_gr_ecc_stat_remove(dev, | ||
482 | 0, | ||
483 | &g->ecc.gr.sm_cbu_uncorrected_err_count); | ||
484 | |||
485 | nvgpu_gr_ecc_stat_remove(dev, | ||
486 | 0, | ||
487 | &g->ecc.gr.sm_l1_data_corrected_err_count); | ||
488 | |||
489 | nvgpu_gr_ecc_stat_remove(dev, | ||
490 | 0, | ||
491 | &g->ecc.gr.sm_l1_data_uncorrected_err_count); | ||
492 | |||
493 | nvgpu_gr_ecc_stat_remove(dev, | ||
494 | 0, | ||
495 | &g->ecc.gr.sm_icache_corrected_err_count); | ||
496 | |||
497 | nvgpu_gr_ecc_stat_remove(dev, | ||
498 | 0, | ||
499 | &g->ecc.gr.sm_icache_uncorrected_err_count); | ||
500 | |||
501 | nvgpu_gr_ecc_stat_remove(dev, | ||
502 | 0, | ||
503 | &g->ecc.gr.gcc_l15_corrected_err_count); | ||
504 | |||
505 | nvgpu_gr_ecc_stat_remove(dev, | ||
506 | 0, | ||
507 | &g->ecc.gr.gcc_l15_uncorrected_err_count); | ||
508 | |||
509 | nvgpu_ecc_stat_remove(dev, | ||
510 | g->ltc_count, | ||
511 | 0, | ||
512 | &g->ecc.ltc.l2_cache_uncorrected_err_count); | ||
513 | |||
514 | nvgpu_ecc_stat_remove(dev, | ||
515 | g->ltc_count, | ||
516 | 0, | ||
517 | &g->ecc.ltc.l2_cache_corrected_err_count); | ||
518 | |||
519 | nvgpu_ecc_stat_remove(dev, | ||
520 | 1, | ||
521 | 0, | ||
522 | &g->ecc.gr.fecs_uncorrected_err_count); | ||
523 | |||
524 | nvgpu_ecc_stat_remove(dev, | ||
525 | 1, | ||
526 | 0, | ||
527 | &g->ecc.gr.fecs_corrected_err_count); | ||
528 | |||
529 | nvgpu_ecc_stat_remove(dev, | ||
530 | g->gr.gpc_count, | ||
531 | 0, | ||
532 | &g->ecc.gr.gpccs_uncorrected_err_count); | ||
533 | |||
534 | nvgpu_ecc_stat_remove(dev, | ||
535 | g->gr.gpc_count, | ||
536 | 0, | ||
537 | &g->ecc.gr.gpccs_corrected_err_count); | ||
538 | |||
539 | nvgpu_ecc_stat_remove(dev, | ||
540 | g->gr.gpc_count, | ||
541 | 0, | ||
542 | &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); | ||
543 | |||
544 | nvgpu_ecc_stat_remove(dev, | ||
545 | g->gr.gpc_count, | ||
546 | 0, | ||
547 | &g->ecc.gr.mmu_l1tlb_corrected_err_count); | ||
548 | |||
549 | nvgpu_ecc_stat_remove(dev, | ||
550 | 1, | ||
551 | 0, | ||
552 | &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); | ||
553 | |||
554 | nvgpu_ecc_stat_remove(dev, | ||
555 | 1, | ||
556 | 0, | ||
557 | &g->ecc.fb.mmu_l2tlb_corrected_err_count); | ||
558 | |||
559 | nvgpu_ecc_stat_remove(dev, | ||
560 | 1, | ||
561 | 0, | ||
562 | &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); | ||
563 | |||
564 | nvgpu_ecc_stat_remove(dev, | ||
565 | 1, | ||
566 | 0, | ||
567 | &g->ecc.fb.mmu_hubtlb_corrected_err_count); | ||
568 | |||
569 | nvgpu_ecc_stat_remove(dev, | ||
570 | 1, | ||
571 | 0, | ||
572 | &g->ecc.fb.mmu_fillunit_uncorrected_err_count); | ||
573 | |||
574 | nvgpu_ecc_stat_remove(dev, | ||
575 | 1, | ||
576 | 0, | ||
577 | &g->ecc.fb.mmu_fillunit_corrected_err_count); | ||
578 | |||
579 | nvgpu_ecc_stat_remove(dev, | ||
580 | 1, | ||
581 | 0, | ||
582 | &g->ecc.pmu.pmu_uncorrected_err_count); | ||
583 | |||
584 | nvgpu_ecc_stat_remove(dev, | ||
585 | 1, | ||
586 | 0, | ||
587 | &g->ecc.pmu.pmu_corrected_err_count); | ||
588 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/rwsem.c b/drivers/gpu/nvgpu/os/linux/rwsem.c new file mode 100644 index 00000000..297ddf11 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/rwsem.c | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <nvgpu/rwsem.h> | ||
15 | |||
16 | void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem) | ||
17 | { | ||
18 | init_rwsem(&rwsem->rwsem); | ||
19 | } | ||
20 | |||
21 | void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem) | ||
22 | { | ||
23 | up_read(&rwsem->rwsem); | ||
24 | } | ||
25 | |||
26 | void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem) | ||
27 | { | ||
28 | down_read(&rwsem->rwsem); | ||
29 | } | ||
30 | |||
31 | void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem) | ||
32 | { | ||
33 | up_write(&rwsem->rwsem); | ||
34 | } | ||
35 | |||
36 | void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem) | ||
37 | { | ||
38 | down_write(&rwsem->rwsem); | ||
39 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c new file mode 100644 index 00000000..84ac1cfd --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/scale.c | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/devfreq.h> | ||
20 | #include <linux/export.h> | ||
21 | #include <soc/tegra/chip-id.h> | ||
22 | #include <linux/pm_qos.h> | ||
23 | |||
24 | #include <governor.h> | ||
25 | |||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/log.h> | ||
28 | |||
29 | #include "gk20a/gk20a.h" | ||
30 | #include "platform_gk20a.h" | ||
31 | #include "scale.h" | ||
32 | #include "os_linux.h" | ||
33 | |||
34 | /* | ||
35 | * gk20a_scale_qos_notify() | ||
36 | * | ||
37 | * This function is called when the minimum QoS requirement for the device | ||
38 | * has changed. The function calls postscaling callback if it is defined. | ||
39 | */ | ||
40 | |||
41 | #if defined(CONFIG_COMMON_CLK) | ||
42 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
43 | unsigned long n, void *p) | ||
44 | { | ||
45 | struct gk20a_scale_profile *profile = | ||
46 | container_of(nb, struct gk20a_scale_profile, | ||
47 | qos_notify_block); | ||
48 | struct gk20a *g = get_gk20a(profile->dev); | ||
49 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
50 | struct devfreq *devfreq = l->devfreq; | ||
51 | |||
52 | if (!devfreq) | ||
53 | return NOTIFY_OK; | ||
54 | |||
55 | mutex_lock(&devfreq->lock); | ||
56 | /* check for pm_qos min and max frequency requirement */ | ||
57 | profile->qos_min_freq = | ||
58 | (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
59 | profile->qos_max_freq = | ||
60 | (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL; | ||
61 | |||
62 | if (profile->qos_min_freq > profile->qos_max_freq) { | ||
63 | nvgpu_err(g, | ||
64 | "QoS: setting invalid limit, min_freq=%lu max_freq=%lu", | ||
65 | profile->qos_min_freq, profile->qos_max_freq); | ||
66 | profile->qos_min_freq = profile->qos_max_freq; | ||
67 | } | ||
68 | |||
69 | update_devfreq(devfreq); | ||
70 | mutex_unlock(&devfreq->lock); | ||
71 | |||
72 | return NOTIFY_OK; | ||
73 | } | ||
74 | #else | ||
75 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
76 | unsigned long n, void *p) | ||
77 | { | ||
78 | struct gk20a_scale_profile *profile = | ||
79 | container_of(nb, struct gk20a_scale_profile, | ||
80 | qos_notify_block); | ||
81 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
82 | struct gk20a *g = get_gk20a(profile->dev); | ||
83 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
84 | unsigned long freq; | ||
85 | |||
86 | if (!platform->postscale) | ||
87 | return NOTIFY_OK; | ||
88 | |||
89 | /* get the frequency requirement. if devfreq is enabled, check if it | ||
90 | * has higher demand than qos */ | ||
91 | freq = platform->clk_round_rate(profile->dev, | ||
92 | (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS)); | ||
93 | if (l->devfreq) | ||
94 | freq = max(l->devfreq->previous_freq, freq); | ||
95 | |||
96 | /* Update gpu load because we may scale the emc target | ||
97 | * if the gpu load changed. */ | ||
98 | nvgpu_pmu_load_update(g); | ||
99 | platform->postscale(profile->dev, freq); | ||
100 | |||
101 | return NOTIFY_OK; | ||
102 | } | ||
103 | #endif | ||
104 | |||
105 | /* | ||
106 | * gk20a_scale_make_freq_table(profile) | ||
107 | * | ||
108 | * This function initialises the frequency table for the given device profile | ||
109 | */ | ||
110 | |||
111 | static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile) | ||
112 | { | ||
113 | struct gk20a_platform *platform = dev_get_drvdata(profile->dev); | ||
114 | int num_freqs, err; | ||
115 | unsigned long *freqs; | ||
116 | |||
117 | if (platform->get_clk_freqs) { | ||
118 | /* get gpu frequency table */ | ||
119 | err = platform->get_clk_freqs(profile->dev, &freqs, | ||
120 | &num_freqs); | ||
121 | if (err) | ||
122 | return -ENOSYS; | ||
123 | } else | ||
124 | return -ENOSYS; | ||
125 | |||
126 | profile->devfreq_profile.freq_table = (unsigned long *)freqs; | ||
127 | profile->devfreq_profile.max_state = num_freqs; | ||
128 | |||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * gk20a_scale_target(dev, *freq, flags) | ||
134 | * | ||
135 | * This function scales the clock | ||
136 | */ | ||
137 | |||
138 | static int gk20a_scale_target(struct device *dev, unsigned long *freq, | ||
139 | u32 flags) | ||
140 | { | ||
141 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
142 | struct gk20a *g = platform->g; | ||
143 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
144 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
145 | struct devfreq *devfreq = l->devfreq; | ||
146 | unsigned long local_freq = *freq; | ||
147 | unsigned long rounded_rate; | ||
148 | unsigned long min_freq = 0, max_freq = 0; | ||
149 | |||
150 | /* | ||
151 | * Calculate floor and cap frequency values | ||
152 | * | ||
153 | * Policy : | ||
154 | * We have two APIs to clip the frequency | ||
155 | * 1. devfreq | ||
156 | * 2. pm_qos | ||
157 | * | ||
158 | * To calculate floor (min) freq, we select MAX of floor frequencies | ||
159 | * requested from both APIs | ||
160 | * To get cap (max) freq, we select MIN of max frequencies | ||
161 | * | ||
162 | * In case we have conflict (min_freq > max_freq) after above | ||
163 | * steps, we ensure that max_freq wins over min_freq | ||
164 | */ | ||
165 | min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq); | ||
166 | max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq); | ||
167 | |||
168 | if (min_freq > max_freq) | ||
169 | min_freq = max_freq; | ||
170 | |||
171 | /* Clip requested frequency */ | ||
172 | if (local_freq < min_freq) | ||
173 | local_freq = min_freq; | ||
174 | |||
175 | if (local_freq > max_freq) | ||
176 | local_freq = max_freq; | ||
177 | |||
178 | /* set the final frequency */ | ||
179 | rounded_rate = platform->clk_round_rate(dev, local_freq); | ||
180 | |||
181 | /* Check for duplicate request */ | ||
182 | if (rounded_rate == g->last_freq) | ||
183 | return 0; | ||
184 | |||
185 | if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate) | ||
186 | *freq = rounded_rate; | ||
187 | else { | ||
188 | g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); | ||
189 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
190 | } | ||
191 | |||
192 | g->last_freq = *freq; | ||
193 | |||
194 | /* postscale will only scale emc (dram clock) if evaluating | ||
195 | * gk20a_tegra_get_emc_rate() produces a new or different emc | ||
196 | * target because the load or_and gpufreq has changed */ | ||
197 | if (platform->postscale) | ||
198 | platform->postscale(dev, rounded_rate); | ||
199 | |||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * update_load_estimate_gpmu(profile) | ||
205 | * | ||
206 | * Update load estimate using gpmu. The gpmu value is normalised | ||
207 | * based on the time it was asked last time. | ||
208 | */ | ||
209 | |||
210 | static void update_load_estimate_gpmu(struct device *dev) | ||
211 | { | ||
212 | struct gk20a *g = get_gk20a(dev); | ||
213 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
214 | unsigned long dt; | ||
215 | u32 busy_time; | ||
216 | ktime_t t; | ||
217 | |||
218 | t = ktime_get(); | ||
219 | dt = ktime_us_delta(t, profile->last_event_time); | ||
220 | |||
221 | profile->dev_stat.total_time = dt; | ||
222 | profile->last_event_time = t; | ||
223 | nvgpu_pmu_load_norm(g, &busy_time); | ||
224 | profile->dev_stat.busy_time = (busy_time * dt) / 1000; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * gk20a_scale_suspend(dev) | ||
229 | * | ||
230 | * This function informs devfreq of suspend | ||
231 | */ | ||
232 | |||
233 | void gk20a_scale_suspend(struct device *dev) | ||
234 | { | ||
235 | struct gk20a *g = get_gk20a(dev); | ||
236 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
237 | struct devfreq *devfreq = l->devfreq; | ||
238 | |||
239 | if (!devfreq) | ||
240 | return; | ||
241 | |||
242 | devfreq_suspend_device(devfreq); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * gk20a_scale_resume(dev) | ||
247 | * | ||
248 | * This functions informs devfreq of resume | ||
249 | */ | ||
250 | |||
251 | void gk20a_scale_resume(struct device *dev) | ||
252 | { | ||
253 | struct gk20a *g = get_gk20a(dev); | ||
254 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
255 | struct devfreq *devfreq = l->devfreq; | ||
256 | |||
257 | if (!devfreq) | ||
258 | return; | ||
259 | |||
260 | g->last_freq = 0; | ||
261 | devfreq_resume_device(devfreq); | ||
262 | } | ||
263 | |||
264 | /* | ||
265 | * gk20a_scale_get_dev_status(dev, *stat) | ||
266 | * | ||
267 | * This function queries the current device status. | ||
268 | */ | ||
269 | |||
270 | static int gk20a_scale_get_dev_status(struct device *dev, | ||
271 | struct devfreq_dev_status *stat) | ||
272 | { | ||
273 | struct gk20a *g = get_gk20a(dev); | ||
274 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
275 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
276 | |||
277 | /* update the software shadow */ | ||
278 | nvgpu_pmu_load_update(g); | ||
279 | |||
280 | /* inform edp about new constraint */ | ||
281 | if (platform->prescale) | ||
282 | platform->prescale(dev); | ||
283 | |||
284 | /* Make sure there are correct values for the current frequency */ | ||
285 | profile->dev_stat.current_frequency = | ||
286 | g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
287 | |||
288 | /* Update load estimate */ | ||
289 | update_load_estimate_gpmu(dev); | ||
290 | |||
291 | /* Copy the contents of the current device status */ | ||
292 | *stat = profile->dev_stat; | ||
293 | |||
294 | /* Finally, clear out the local values */ | ||
295 | profile->dev_stat.total_time = 0; | ||
296 | profile->dev_stat.busy_time = 0; | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * get_cur_freq(struct device *dev, unsigned long *freq) | ||
303 | * | ||
304 | * This function gets the current GPU clock rate. | ||
305 | */ | ||
306 | |||
307 | static int get_cur_freq(struct device *dev, unsigned long *freq) | ||
308 | { | ||
309 | struct gk20a *g = get_gk20a(dev); | ||
310 | *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK); | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | |||
315 | /* | ||
316 | * gk20a_scale_init(dev) | ||
317 | */ | ||
318 | |||
319 | void gk20a_scale_init(struct device *dev) | ||
320 | { | ||
321 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
322 | struct gk20a *g = platform->g; | ||
323 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
324 | struct gk20a_scale_profile *profile; | ||
325 | int err; | ||
326 | |||
327 | if (g->scale_profile) | ||
328 | return; | ||
329 | |||
330 | if (!platform->devfreq_governor && !platform->qos_notify) | ||
331 | return; | ||
332 | |||
333 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
334 | |||
335 | profile->dev = dev; | ||
336 | profile->dev_stat.busy = false; | ||
337 | |||
338 | /* Create frequency table */ | ||
339 | err = gk20a_scale_make_freq_table(profile); | ||
340 | if (err || !profile->devfreq_profile.max_state) | ||
341 | goto err_get_freqs; | ||
342 | |||
343 | profile->qos_min_freq = 0; | ||
344 | profile->qos_max_freq = UINT_MAX; | ||
345 | |||
346 | /* Store device profile so we can access it if devfreq governor | ||
347 | * init needs that */ | ||
348 | g->scale_profile = profile; | ||
349 | |||
350 | if (platform->devfreq_governor) { | ||
351 | struct devfreq *devfreq; | ||
352 | |||
353 | profile->devfreq_profile.initial_freq = | ||
354 | profile->devfreq_profile.freq_table[0]; | ||
355 | profile->devfreq_profile.target = gk20a_scale_target; | ||
356 | profile->devfreq_profile.get_dev_status = | ||
357 | gk20a_scale_get_dev_status; | ||
358 | profile->devfreq_profile.get_cur_freq = get_cur_freq; | ||
359 | profile->devfreq_profile.polling_ms = 25; | ||
360 | |||
361 | devfreq = devfreq_add_device(dev, | ||
362 | &profile->devfreq_profile, | ||
363 | platform->devfreq_governor, NULL); | ||
364 | |||
365 | if (IS_ERR(devfreq)) | ||
366 | devfreq = NULL; | ||
367 | |||
368 | l->devfreq = devfreq; | ||
369 | } | ||
370 | |||
371 | /* Should we register QoS callback for this device? */ | ||
372 | if (platform->qos_notify) { | ||
373 | profile->qos_notify_block.notifier_call = | ||
374 | platform->qos_notify; | ||
375 | |||
376 | pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
377 | &profile->qos_notify_block); | ||
378 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
379 | &profile->qos_notify_block); | ||
380 | } | ||
381 | |||
382 | return; | ||
383 | |||
384 | err_get_freqs: | ||
385 | nvgpu_kfree(g, profile); | ||
386 | } | ||
387 | |||
388 | void gk20a_scale_exit(struct device *dev) | ||
389 | { | ||
390 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
391 | struct gk20a *g = platform->g; | ||
392 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
393 | int err; | ||
394 | |||
395 | if (platform->qos_notify) { | ||
396 | pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
397 | &g->scale_profile->qos_notify_block); | ||
398 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
399 | &g->scale_profile->qos_notify_block); | ||
400 | } | ||
401 | |||
402 | if (platform->devfreq_governor) { | ||
403 | err = devfreq_remove_device(l->devfreq); | ||
404 | l->devfreq = NULL; | ||
405 | } | ||
406 | |||
407 | nvgpu_kfree(g, g->scale_profile); | ||
408 | g->scale_profile = NULL; | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * gk20a_scale_hw_init(dev) | ||
413 | * | ||
414 | * Initialize hardware portion of the device | ||
415 | */ | ||
416 | |||
417 | void gk20a_scale_hw_init(struct device *dev) | ||
418 | { | ||
419 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
420 | struct gk20a_scale_profile *profile = platform->g->scale_profile; | ||
421 | |||
422 | /* make sure that scaling has bee initialised */ | ||
423 | if (!profile) | ||
424 | return; | ||
425 | |||
426 | profile->dev_stat.total_time = 0; | ||
427 | profile->last_event_time = ktime_get(); | ||
428 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/scale.h b/drivers/gpu/nvgpu/os/linux/scale.h new file mode 100644 index 00000000..c1e6fe86 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/scale.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * gk20a clock scaling profile | ||
3 | * | ||
4 | * Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef GK20A_SCALE_H | ||
20 | #define GK20A_SCALE_H | ||
21 | |||
22 | #include <linux/devfreq.h> | ||
23 | |||
24 | struct clk; | ||
25 | |||
26 | struct gk20a_scale_profile { | ||
27 | struct device *dev; | ||
28 | ktime_t last_event_time; | ||
29 | struct devfreq_dev_profile devfreq_profile; | ||
30 | struct devfreq_dev_status dev_stat; | ||
31 | struct notifier_block qos_notify_block; | ||
32 | unsigned long qos_min_freq; | ||
33 | unsigned long qos_max_freq; | ||
34 | void *private_data; | ||
35 | }; | ||
36 | |||
37 | /* Initialization and de-initialization for module */ | ||
38 | void gk20a_scale_init(struct device *); | ||
39 | void gk20a_scale_exit(struct device *); | ||
40 | void gk20a_scale_hw_init(struct device *dev); | ||
41 | |||
42 | #if defined(CONFIG_GK20A_DEVFREQ) | ||
43 | /* | ||
44 | * call when performing submit to notify scaling mechanism that the module is | ||
45 | * in use | ||
46 | */ | ||
47 | void gk20a_scale_notify_busy(struct device *); | ||
48 | void gk20a_scale_notify_idle(struct device *); | ||
49 | |||
50 | void gk20a_scale_suspend(struct device *); | ||
51 | void gk20a_scale_resume(struct device *); | ||
52 | int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
53 | unsigned long n, void *p); | ||
54 | #else | ||
55 | static inline void gk20a_scale_notify_busy(struct device *dev) {} | ||
56 | static inline void gk20a_scale_notify_idle(struct device *dev) {} | ||
57 | static inline void gk20a_scale_suspend(struct device *dev) {} | ||
58 | static inline void gk20a_scale_resume(struct device *dev) {} | ||
59 | static inline int gk20a_scale_qos_notify(struct notifier_block *nb, | ||
60 | unsigned long n, void *p) | ||
61 | { | ||
62 | return -ENOSYS; | ||
63 | } | ||
64 | #endif | ||
65 | |||
66 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c new file mode 100644 index 00000000..2ad5aabf --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sched.c | |||
@@ -0,0 +1,676 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #include <asm/barrier.h> | ||
17 | #include <linux/wait.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/poll.h> | ||
20 | #include <uapi/linux/nvgpu.h> | ||
21 | |||
22 | #include <nvgpu/kmem.h> | ||
23 | #include <nvgpu/log.h> | ||
24 | #include <nvgpu/bug.h> | ||
25 | #include <nvgpu/barrier.h> | ||
26 | |||
27 | #include "gk20a/gk20a.h" | ||
28 | #include "gk20a/gr_gk20a.h" | ||
29 | #include "sched.h" | ||
30 | #include "os_linux.h" | ||
31 | #include "ioctl_tsg.h" | ||
32 | |||
33 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
34 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
35 | |||
36 | ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, | ||
37 | size_t size, loff_t *off) | ||
38 | { | ||
39 | struct gk20a_sched_ctrl *sched = filp->private_data; | ||
40 | struct gk20a *g = sched->g; | ||
41 | struct nvgpu_sched_event_arg event = { 0 }; | ||
42 | int err; | ||
43 | |||
44 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, | ||
45 | "filp=%p buf=%p size=%zu", filp, buf, size); | ||
46 | |||
47 | if (size < sizeof(event)) | ||
48 | return -EINVAL; | ||
49 | size = sizeof(event); | ||
50 | |||
51 | nvgpu_mutex_acquire(&sched->status_lock); | ||
52 | while (!sched->status) { | ||
53 | nvgpu_mutex_release(&sched->status_lock); | ||
54 | if (filp->f_flags & O_NONBLOCK) | ||
55 | return -EAGAIN; | ||
56 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq, | ||
57 | sched->status, 0); | ||
58 | if (err) | ||
59 | return err; | ||
60 | nvgpu_mutex_acquire(&sched->status_lock); | ||
61 | } | ||
62 | |||
63 | event.reserved = 0; | ||
64 | event.status = sched->status; | ||
65 | |||
66 | if (copy_to_user(buf, &event, size)) { | ||
67 | nvgpu_mutex_release(&sched->status_lock); | ||
68 | return -EFAULT; | ||
69 | } | ||
70 | |||
71 | sched->status = 0; | ||
72 | |||
73 | nvgpu_mutex_release(&sched->status_lock); | ||
74 | |||
75 | return size; | ||
76 | } | ||
77 | |||
78 | unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait) | ||
79 | { | ||
80 | struct gk20a_sched_ctrl *sched = filp->private_data; | ||
81 | struct gk20a *g = sched->g; | ||
82 | unsigned int mask = 0; | ||
83 | |||
84 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
85 | |||
86 | nvgpu_mutex_acquire(&sched->status_lock); | ||
87 | poll_wait(filp, &sched->readout_wq.wq, wait); | ||
88 | if (sched->status) | ||
89 | mask |= POLLIN | POLLRDNORM; | ||
90 | nvgpu_mutex_release(&sched->status_lock); | ||
91 | |||
92 | return mask; | ||
93 | } | ||
94 | |||
95 | static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched, | ||
96 | struct nvgpu_sched_get_tsgs_args *arg) | ||
97 | { | ||
98 | struct gk20a *g = sched->g; | ||
99 | |||
100 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
101 | arg->size, arg->buffer); | ||
102 | |||
103 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
104 | arg->size = sched->bitmap_size; | ||
105 | return -ENOSPC; | ||
106 | } | ||
107 | |||
108 | nvgpu_mutex_acquire(&sched->status_lock); | ||
109 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
110 | sched->active_tsg_bitmap, sched->bitmap_size)) { | ||
111 | nvgpu_mutex_release(&sched->status_lock); | ||
112 | return -EFAULT; | ||
113 | } | ||
114 | nvgpu_mutex_release(&sched->status_lock); | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched, | ||
120 | struct nvgpu_sched_get_tsgs_args *arg) | ||
121 | { | ||
122 | struct gk20a *g = sched->g; | ||
123 | |||
124 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx", | ||
125 | arg->size, arg->buffer); | ||
126 | |||
127 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
128 | arg->size = sched->bitmap_size; | ||
129 | return -ENOSPC; | ||
130 | } | ||
131 | |||
132 | nvgpu_mutex_acquire(&sched->status_lock); | ||
133 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
134 | sched->recent_tsg_bitmap, sched->bitmap_size)) { | ||
135 | nvgpu_mutex_release(&sched->status_lock); | ||
136 | return -EFAULT; | ||
137 | } | ||
138 | |||
139 | memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); | ||
140 | nvgpu_mutex_release(&sched->status_lock); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, | ||
146 | struct nvgpu_sched_get_tsgs_by_pid_args *arg) | ||
147 | { | ||
148 | struct gk20a *g = sched->g; | ||
149 | struct fifo_gk20a *f = &g->fifo; | ||
150 | struct tsg_gk20a *tsg; | ||
151 | u64 *bitmap; | ||
152 | unsigned int tsgid; | ||
153 | /* pid at user level corresponds to kernel tgid */ | ||
154 | pid_t tgid = (pid_t)arg->pid; | ||
155 | int err = 0; | ||
156 | |||
157 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx", | ||
158 | (pid_t)arg->pid, arg->size, arg->buffer); | ||
159 | |||
160 | if ((arg->size < sched->bitmap_size) || (!arg->buffer)) { | ||
161 | arg->size = sched->bitmap_size; | ||
162 | return -ENOSPC; | ||
163 | } | ||
164 | |||
165 | bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size); | ||
166 | if (!bitmap) | ||
167 | return -ENOMEM; | ||
168 | |||
169 | nvgpu_mutex_acquire(&sched->status_lock); | ||
170 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
171 | if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { | ||
172 | tsg = &f->tsg[tsgid]; | ||
173 | if (tsg->tgid == tgid) | ||
174 | NVGPU_SCHED_SET(tsgid, bitmap); | ||
175 | } | ||
176 | } | ||
177 | nvgpu_mutex_release(&sched->status_lock); | ||
178 | |||
179 | if (copy_to_user((void __user *)(uintptr_t)arg->buffer, | ||
180 | bitmap, sched->bitmap_size)) | ||
181 | err = -EFAULT; | ||
182 | |||
183 | nvgpu_kfree(sched->g, bitmap); | ||
184 | |||
185 | return err; | ||
186 | } | ||
187 | |||
188 | static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, | ||
189 | struct nvgpu_sched_tsg_get_params_args *arg) | ||
190 | { | ||
191 | struct gk20a *g = sched->g; | ||
192 | struct fifo_gk20a *f = &g->fifo; | ||
193 | struct tsg_gk20a *tsg; | ||
194 | u32 tsgid = arg->tsgid; | ||
195 | |||
196 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
197 | |||
198 | if (tsgid >= f->num_channels) | ||
199 | return -EINVAL; | ||
200 | |||
201 | nvgpu_speculation_barrier(); | ||
202 | |||
203 | tsg = &f->tsg[tsgid]; | ||
204 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
205 | return -ENXIO; | ||
206 | |||
207 | arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ | ||
208 | arg->runlist_interleave = tsg->interleave_level; | ||
209 | arg->timeslice = tsg->timeslice_us; | ||
210 | |||
211 | arg->graphics_preempt_mode = | ||
212 | tsg->gr_ctx.graphics_preempt_mode; | ||
213 | arg->compute_preempt_mode = | ||
214 | tsg->gr_ctx.compute_preempt_mode; | ||
215 | |||
216 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
217 | |||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static int gk20a_sched_dev_ioctl_tsg_set_timeslice( | ||
222 | struct gk20a_sched_ctrl *sched, | ||
223 | struct nvgpu_sched_tsg_timeslice_args *arg) | ||
224 | { | ||
225 | struct gk20a *g = sched->g; | ||
226 | struct fifo_gk20a *f = &g->fifo; | ||
227 | struct tsg_gk20a *tsg; | ||
228 | u32 tsgid = arg->tsgid; | ||
229 | int err; | ||
230 | |||
231 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
232 | |||
233 | if (tsgid >= f->num_channels) | ||
234 | return -EINVAL; | ||
235 | |||
236 | nvgpu_speculation_barrier(); | ||
237 | |||
238 | tsg = &f->tsg[tsgid]; | ||
239 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
240 | return -ENXIO; | ||
241 | |||
242 | err = gk20a_busy(g); | ||
243 | if (err) | ||
244 | goto done; | ||
245 | |||
246 | err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); | ||
247 | |||
248 | gk20a_idle(g); | ||
249 | |||
250 | done: | ||
251 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
252 | |||
253 | return err; | ||
254 | } | ||
255 | |||
256 | static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( | ||
257 | struct gk20a_sched_ctrl *sched, | ||
258 | struct nvgpu_sched_tsg_runlist_interleave_args *arg) | ||
259 | { | ||
260 | struct gk20a *g = sched->g; | ||
261 | struct fifo_gk20a *f = &g->fifo; | ||
262 | struct tsg_gk20a *tsg; | ||
263 | u32 tsgid = arg->tsgid; | ||
264 | int err; | ||
265 | |||
266 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
267 | |||
268 | if (tsgid >= f->num_channels) | ||
269 | return -EINVAL; | ||
270 | |||
271 | nvgpu_speculation_barrier(); | ||
272 | |||
273 | tsg = &f->tsg[tsgid]; | ||
274 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
275 | return -ENXIO; | ||
276 | |||
277 | err = gk20a_busy(g); | ||
278 | if (err) | ||
279 | goto done; | ||
280 | |||
281 | err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); | ||
282 | |||
283 | gk20a_idle(g); | ||
284 | |||
285 | done: | ||
286 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
287 | |||
288 | return err; | ||
289 | } | ||
290 | |||
291 | static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched) | ||
292 | { | ||
293 | struct gk20a *g = sched->g; | ||
294 | |||
295 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
296 | |||
297 | nvgpu_mutex_acquire(&sched->control_lock); | ||
298 | sched->control_locked = true; | ||
299 | nvgpu_mutex_release(&sched->control_lock); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched) | ||
304 | { | ||
305 | struct gk20a *g = sched->g; | ||
306 | |||
307 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
308 | |||
309 | nvgpu_mutex_acquire(&sched->control_lock); | ||
310 | sched->control_locked = false; | ||
311 | nvgpu_mutex_release(&sched->control_lock); | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched, | ||
316 | struct nvgpu_sched_api_version_args *args) | ||
317 | { | ||
318 | struct gk20a *g = sched->g; | ||
319 | |||
320 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " "); | ||
321 | |||
322 | args->version = NVGPU_SCHED_API_VERSION; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched, | ||
327 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
328 | { | ||
329 | struct gk20a *g = sched->g; | ||
330 | struct fifo_gk20a *f = &g->fifo; | ||
331 | struct tsg_gk20a *tsg; | ||
332 | u32 tsgid = arg->tsgid; | ||
333 | |||
334 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
335 | |||
336 | if (tsgid >= f->num_channels) | ||
337 | return -EINVAL; | ||
338 | |||
339 | nvgpu_speculation_barrier(); | ||
340 | |||
341 | tsg = &f->tsg[tsgid]; | ||
342 | if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) | ||
343 | return -ENXIO; | ||
344 | |||
345 | nvgpu_mutex_acquire(&sched->status_lock); | ||
346 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
347 | nvgpu_warn(g, "tsgid=%d already referenced", tsgid); | ||
348 | /* unlock status_lock as nvgpu_ioctl_tsg_release locks it */ | ||
349 | nvgpu_mutex_release(&sched->status_lock); | ||
350 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
351 | return -ENXIO; | ||
352 | } | ||
353 | |||
354 | /* keep reference on TSG, will be released on | ||
355 | * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close | ||
356 | */ | ||
357 | NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); | ||
358 | nvgpu_mutex_release(&sched->status_lock); | ||
359 | |||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched, | ||
364 | struct nvgpu_sched_tsg_refcount_args *arg) | ||
365 | { | ||
366 | struct gk20a *g = sched->g; | ||
367 | struct fifo_gk20a *f = &g->fifo; | ||
368 | struct tsg_gk20a *tsg; | ||
369 | u32 tsgid = arg->tsgid; | ||
370 | |||
371 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); | ||
372 | |||
373 | if (tsgid >= f->num_channels) | ||
374 | return -EINVAL; | ||
375 | |||
376 | nvgpu_speculation_barrier(); | ||
377 | |||
378 | nvgpu_mutex_acquire(&sched->status_lock); | ||
379 | if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
380 | nvgpu_mutex_release(&sched->status_lock); | ||
381 | nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid); | ||
382 | return -ENXIO; | ||
383 | } | ||
384 | NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); | ||
385 | nvgpu_mutex_release(&sched->status_lock); | ||
386 | |||
387 | tsg = &f->tsg[tsgid]; | ||
388 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
389 | |||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp) | ||
394 | { | ||
395 | struct nvgpu_os_linux *l = container_of(inode->i_cdev, | ||
396 | struct nvgpu_os_linux, sched.cdev); | ||
397 | struct gk20a *g; | ||
398 | struct gk20a_sched_ctrl *sched; | ||
399 | int err = 0; | ||
400 | |||
401 | g = gk20a_get(&l->g); | ||
402 | if (!g) | ||
403 | return -ENODEV; | ||
404 | sched = &l->sched_ctrl; | ||
405 | |||
406 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g); | ||
407 | |||
408 | if (!sched->sw_ready) { | ||
409 | err = gk20a_busy(g); | ||
410 | if (err) | ||
411 | goto free_ref; | ||
412 | |||
413 | gk20a_idle(g); | ||
414 | } | ||
415 | |||
416 | if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) { | ||
417 | err = -EBUSY; | ||
418 | goto free_ref; | ||
419 | } | ||
420 | |||
421 | memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, | ||
422 | sched->bitmap_size); | ||
423 | memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); | ||
424 | |||
425 | filp->private_data = sched; | ||
426 | nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched); | ||
427 | |||
428 | free_ref: | ||
429 | if (err) | ||
430 | gk20a_put(g); | ||
431 | return err; | ||
432 | } | ||
433 | |||
434 | long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, | ||
435 | unsigned long arg) | ||
436 | { | ||
437 | struct gk20a_sched_ctrl *sched = filp->private_data; | ||
438 | struct gk20a *g = sched->g; | ||
439 | u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE]; | ||
440 | int err = 0; | ||
441 | |||
442 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd)); | ||
443 | |||
444 | if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) || | ||
445 | (_IOC_NR(cmd) == 0) || | ||
446 | (_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) || | ||
447 | (_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE)) | ||
448 | return -EINVAL; | ||
449 | |||
450 | memset(buf, 0, sizeof(buf)); | ||
451 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
452 | if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) | ||
453 | return -EFAULT; | ||
454 | } | ||
455 | |||
456 | switch (cmd) { | ||
457 | case NVGPU_SCHED_IOCTL_GET_TSGS: | ||
458 | err = gk20a_sched_dev_ioctl_get_tsgs(sched, | ||
459 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
460 | break; | ||
461 | case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS: | ||
462 | err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched, | ||
463 | (struct nvgpu_sched_get_tsgs_args *)buf); | ||
464 | break; | ||
465 | case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID: | ||
466 | err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched, | ||
467 | (struct nvgpu_sched_get_tsgs_by_pid_args *)buf); | ||
468 | break; | ||
469 | case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS: | ||
470 | err = gk20a_sched_dev_ioctl_get_params(sched, | ||
471 | (struct nvgpu_sched_tsg_get_params_args *)buf); | ||
472 | break; | ||
473 | case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE: | ||
474 | err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched, | ||
475 | (struct nvgpu_sched_tsg_timeslice_args *)buf); | ||
476 | break; | ||
477 | case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE: | ||
478 | err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched, | ||
479 | (struct nvgpu_sched_tsg_runlist_interleave_args *)buf); | ||
480 | break; | ||
481 | case NVGPU_SCHED_IOCTL_LOCK_CONTROL: | ||
482 | err = gk20a_sched_dev_ioctl_lock_control(sched); | ||
483 | break; | ||
484 | case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: | ||
485 | err = gk20a_sched_dev_ioctl_unlock_control(sched); | ||
486 | break; | ||
487 | case NVGPU_SCHED_IOCTL_GET_API_VERSION: | ||
488 | err = gk20a_sched_dev_ioctl_get_api_version(sched, | ||
489 | (struct nvgpu_sched_api_version_args *)buf); | ||
490 | break; | ||
491 | case NVGPU_SCHED_IOCTL_GET_TSG: | ||
492 | err = gk20a_sched_dev_ioctl_get_tsg(sched, | ||
493 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
494 | break; | ||
495 | case NVGPU_SCHED_IOCTL_PUT_TSG: | ||
496 | err = gk20a_sched_dev_ioctl_put_tsg(sched, | ||
497 | (struct nvgpu_sched_tsg_refcount_args *)buf); | ||
498 | break; | ||
499 | default: | ||
500 | nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd); | ||
501 | err = -ENOTTY; | ||
502 | } | ||
503 | |||
504 | /* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on | ||
505 | * purpose with NULL buffer and/or zero size to discover TSG bitmap | ||
506 | * size. We need to update user arguments in this case too, even | ||
507 | * if we return an error. | ||
508 | */ | ||
509 | if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) { | ||
510 | if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd))) | ||
511 | err = -EFAULT; | ||
512 | } | ||
513 | |||
514 | return err; | ||
515 | } | ||
516 | |||
517 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp) | ||
518 | { | ||
519 | struct gk20a_sched_ctrl *sched = filp->private_data; | ||
520 | struct gk20a *g = sched->g; | ||
521 | struct fifo_gk20a *f = &g->fifo; | ||
522 | struct tsg_gk20a *tsg; | ||
523 | unsigned int tsgid; | ||
524 | |||
525 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); | ||
526 | |||
527 | /* release any reference to TSGs */ | ||
528 | for (tsgid = 0; tsgid < f->num_channels; tsgid++) { | ||
529 | if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { | ||
530 | tsg = &f->tsg[tsgid]; | ||
531 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | ||
532 | } | ||
533 | } | ||
534 | |||
535 | /* unlock control */ | ||
536 | nvgpu_mutex_acquire(&sched->control_lock); | ||
537 | sched->control_locked = false; | ||
538 | nvgpu_mutex_release(&sched->control_lock); | ||
539 | |||
540 | nvgpu_mutex_release(&sched->busy_lock); | ||
541 | gk20a_put(g); | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg) | ||
546 | { | ||
547 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
548 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
549 | int err; | ||
550 | |||
551 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
552 | |||
553 | if (!sched->sw_ready) { | ||
554 | err = gk20a_busy(g); | ||
555 | if (err) { | ||
556 | WARN_ON(err); | ||
557 | return; | ||
558 | } | ||
559 | |||
560 | gk20a_idle(g); | ||
561 | } | ||
562 | |||
563 | nvgpu_mutex_acquire(&sched->status_lock); | ||
564 | NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap); | ||
565 | NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap); | ||
566 | sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN; | ||
567 | nvgpu_mutex_release(&sched->status_lock); | ||
568 | nvgpu_cond_signal_interruptible(&sched->readout_wq); | ||
569 | } | ||
570 | |||
571 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg) | ||
572 | { | ||
573 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
574 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
575 | |||
576 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); | ||
577 | |||
578 | nvgpu_mutex_acquire(&sched->status_lock); | ||
579 | NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap); | ||
580 | |||
581 | /* clear recent_tsg_bitmap as well: if app manager did not | ||
582 | * notice that TSG was previously added, no need to notify it | ||
583 | * if the TSG has been released in the meantime. If the | ||
584 | * TSG gets reallocated, app manager will be notified as usual. | ||
585 | */ | ||
586 | NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap); | ||
587 | |||
588 | /* do not set event_pending, we only want to notify app manager | ||
589 | * when TSGs are added, so that it can apply sched params | ||
590 | */ | ||
591 | nvgpu_mutex_release(&sched->status_lock); | ||
592 | } | ||
593 | |||
594 | int gk20a_sched_ctrl_init(struct gk20a *g) | ||
595 | { | ||
596 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
597 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
598 | struct fifo_gk20a *f = &g->fifo; | ||
599 | int err; | ||
600 | |||
601 | if (sched->sw_ready) | ||
602 | return 0; | ||
603 | |||
604 | sched->g = g; | ||
605 | sched->bitmap_size = roundup(f->num_channels, 64) / 8; | ||
606 | sched->status = 0; | ||
607 | |||
608 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu", | ||
609 | g, sched, sched->bitmap_size); | ||
610 | |||
611 | sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
612 | if (!sched->active_tsg_bitmap) | ||
613 | return -ENOMEM; | ||
614 | |||
615 | sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
616 | if (!sched->recent_tsg_bitmap) { | ||
617 | err = -ENOMEM; | ||
618 | goto free_active; | ||
619 | } | ||
620 | |||
621 | sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size); | ||
622 | if (!sched->ref_tsg_bitmap) { | ||
623 | err = -ENOMEM; | ||
624 | goto free_recent; | ||
625 | } | ||
626 | |||
627 | nvgpu_cond_init(&sched->readout_wq); | ||
628 | |||
629 | err = nvgpu_mutex_init(&sched->status_lock); | ||
630 | if (err) | ||
631 | goto free_ref; | ||
632 | |||
633 | err = nvgpu_mutex_init(&sched->control_lock); | ||
634 | if (err) | ||
635 | goto free_status_lock; | ||
636 | |||
637 | err = nvgpu_mutex_init(&sched->busy_lock); | ||
638 | if (err) | ||
639 | goto free_control_lock; | ||
640 | |||
641 | sched->sw_ready = true; | ||
642 | |||
643 | return 0; | ||
644 | |||
645 | free_control_lock: | ||
646 | nvgpu_mutex_destroy(&sched->control_lock); | ||
647 | free_status_lock: | ||
648 | nvgpu_mutex_destroy(&sched->status_lock); | ||
649 | free_ref: | ||
650 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
651 | free_recent: | ||
652 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
653 | free_active: | ||
654 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
655 | |||
656 | return err; | ||
657 | } | ||
658 | |||
659 | void gk20a_sched_ctrl_cleanup(struct gk20a *g) | ||
660 | { | ||
661 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
662 | struct gk20a_sched_ctrl *sched = &l->sched_ctrl; | ||
663 | |||
664 | nvgpu_kfree(g, sched->active_tsg_bitmap); | ||
665 | nvgpu_kfree(g, sched->recent_tsg_bitmap); | ||
666 | nvgpu_kfree(g, sched->ref_tsg_bitmap); | ||
667 | sched->active_tsg_bitmap = NULL; | ||
668 | sched->recent_tsg_bitmap = NULL; | ||
669 | sched->ref_tsg_bitmap = NULL; | ||
670 | |||
671 | nvgpu_mutex_destroy(&sched->status_lock); | ||
672 | nvgpu_mutex_destroy(&sched->control_lock); | ||
673 | nvgpu_mutex_destroy(&sched->busy_lock); | ||
674 | |||
675 | sched->sw_ready = false; | ||
676 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sched.h b/drivers/gpu/nvgpu/os/linux/sched.h new file mode 100644 index 00000000..a699bbea --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sched.h | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef __NVGPU_SCHED_H | ||
17 | #define __NVGPU_SCHED_H | ||
18 | |||
19 | struct gk20a; | ||
20 | struct gpu_ops; | ||
21 | struct tsg_gk20a; | ||
22 | struct poll_table_struct; | ||
23 | |||
24 | struct gk20a_sched_ctrl { | ||
25 | struct gk20a *g; | ||
26 | |||
27 | struct nvgpu_mutex control_lock; | ||
28 | bool control_locked; | ||
29 | bool sw_ready; | ||
30 | struct nvgpu_mutex status_lock; | ||
31 | struct nvgpu_mutex busy_lock; | ||
32 | |||
33 | u64 status; | ||
34 | |||
35 | size_t bitmap_size; | ||
36 | u64 *active_tsg_bitmap; | ||
37 | u64 *recent_tsg_bitmap; | ||
38 | u64 *ref_tsg_bitmap; | ||
39 | |||
40 | struct nvgpu_cond readout_wq; | ||
41 | }; | ||
42 | |||
43 | int gk20a_sched_dev_release(struct inode *inode, struct file *filp); | ||
44 | int gk20a_sched_dev_open(struct inode *inode, struct file *filp); | ||
45 | long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long); | ||
46 | ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *); | ||
47 | unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *); | ||
48 | |||
49 | void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *); | ||
50 | void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *); | ||
51 | int gk20a_sched_ctrl_init(struct gk20a *); | ||
52 | |||
53 | void gk20a_sched_ctrl_cleanup(struct gk20a *g); | ||
54 | |||
55 | #endif /* __NVGPU_SCHED_H */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sim.c b/drivers/gpu/nvgpu/os/linux/sim.c new file mode 100644 index 00000000..8e964f39 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sim.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/io.h> | ||
18 | #include <linux/highmem.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/linux/vm.h> | ||
23 | #include <nvgpu/bitops.h> | ||
24 | #include <nvgpu/nvgpu_mem.h> | ||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/soc.h> | ||
27 | #include <nvgpu/hw_sim.h> | ||
28 | #include <nvgpu/sim.h> | ||
29 | #include "gk20a/gk20a.h" | ||
30 | #include "platform_gk20a.h" | ||
31 | #include "os_linux.h" | ||
32 | #include "module.h" | ||
33 | |||
34 | void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v) | ||
35 | { | ||
36 | struct sim_nvgpu_linux *sim_linux = | ||
37 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
38 | |||
39 | writel(v, sim_linux->regs + r); | ||
40 | } | ||
41 | |||
42 | u32 sim_readl(struct sim_nvgpu *sim, u32 r) | ||
43 | { | ||
44 | struct sim_nvgpu_linux *sim_linux = | ||
45 | container_of(sim, struct sim_nvgpu_linux, sim); | ||
46 | |||
47 | return readl(sim_linux->regs + r); | ||
48 | } | ||
49 | |||
50 | void nvgpu_remove_sim_support_linux(struct gk20a *g) | ||
51 | { | ||
52 | struct sim_nvgpu_linux *sim_linux; | ||
53 | |||
54 | if (!g->sim) | ||
55 | return; | ||
56 | |||
57 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
58 | if (sim_linux->regs) { | ||
59 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
60 | iounmap(sim_linux->regs); | ||
61 | sim_linux->regs = NULL; | ||
62 | } | ||
63 | nvgpu_kfree(g, sim_linux); | ||
64 | g->sim = NULL; | ||
65 | } | ||
66 | |||
67 | int nvgpu_init_sim_support_linux(struct gk20a *g, | ||
68 | struct platform_device *dev) | ||
69 | { | ||
70 | struct sim_nvgpu_linux *sim_linux; | ||
71 | int err = -ENOMEM; | ||
72 | |||
73 | if (!nvgpu_platform_is_simulation(g)) | ||
74 | return 0; | ||
75 | |||
76 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
77 | if (!sim_linux) | ||
78 | return err; | ||
79 | g->sim = &sim_linux->sim; | ||
80 | g->sim->g = g; | ||
81 | sim_linux->regs = nvgpu_ioremap_resource(dev, | ||
82 | GK20A_SIM_IORESOURCE_MEM, | ||
83 | &sim_linux->reg_mem); | ||
84 | if (IS_ERR(sim_linux->regs)) { | ||
85 | nvgpu_err(g, "failed to remap gk20a sim regs"); | ||
86 | err = PTR_ERR(sim_linux->regs); | ||
87 | goto fail; | ||
88 | } | ||
89 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux; | ||
90 | return 0; | ||
91 | |||
92 | fail: | ||
93 | nvgpu_remove_sim_support_linux(g); | ||
94 | return err; | ||
95 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sim_pci.c b/drivers/gpu/nvgpu/os/linux/sim_pci.c new file mode 100644 index 00000000..d37767b7 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sim_pci.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/io.h> | ||
18 | #include <linux/highmem.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/linux/vm.h> | ||
23 | #include <nvgpu/bitops.h> | ||
24 | #include <nvgpu/nvgpu_mem.h> | ||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/hw_sim_pci.h> | ||
27 | #include <nvgpu/sim.h> | ||
28 | #include "gk20a/gk20a.h" | ||
29 | #include "os_linux.h" | ||
30 | #include "module.h" | ||
31 | |||
32 | static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base) | ||
33 | { | ||
34 | u32 cfg; | ||
35 | bool is_simulation = false; | ||
36 | |||
37 | cfg = nvgpu_readl(g, sim_base + sim_config_r()); | ||
38 | if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v()) | ||
39 | is_simulation = true; | ||
40 | |||
41 | return is_simulation; | ||
42 | } | ||
43 | |||
44 | void nvgpu_remove_sim_support_linux_pci(struct gk20a *g) | ||
45 | { | ||
46 | struct sim_nvgpu_linux *sim_linux; | ||
47 | bool is_simulation; | ||
48 | |||
49 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
50 | |||
51 | if (!is_simulation) { | ||
52 | return; | ||
53 | } | ||
54 | |||
55 | if (!g->sim) { | ||
56 | nvgpu_warn(g, "sim_gk20a not allocated"); | ||
57 | return; | ||
58 | } | ||
59 | sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim); | ||
60 | |||
61 | if (sim_linux->regs) { | ||
62 | sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v()); | ||
63 | sim_linux->regs = NULL; | ||
64 | } | ||
65 | nvgpu_kfree(g, sim_linux); | ||
66 | g->sim = NULL; | ||
67 | } | ||
68 | |||
69 | int nvgpu_init_sim_support_linux_pci(struct gk20a *g) | ||
70 | { | ||
71 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
72 | struct sim_nvgpu_linux *sim_linux; | ||
73 | int err = -ENOMEM; | ||
74 | bool is_simulation; | ||
75 | |||
76 | is_simulation = _nvgpu_pci_is_simulation(g, sim_r()); | ||
77 | __nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation); | ||
78 | |||
79 | if (!is_simulation) | ||
80 | return 0; | ||
81 | |||
82 | sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux)); | ||
83 | if (!sim_linux) | ||
84 | return err; | ||
85 | g->sim = &sim_linux->sim; | ||
86 | g->sim->g = g; | ||
87 | sim_linux->regs = l->regs + sim_r(); | ||
88 | sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci; | ||
89 | |||
90 | return 0; | ||
91 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/soc.c b/drivers/gpu/nvgpu/os/linux/soc.c new file mode 100644 index 00000000..1b27d6f1 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/soc.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | */ | ||
13 | |||
14 | #include <soc/tegra/chip-id.h> | ||
15 | #include <soc/tegra/fuse.h> | ||
16 | #include <soc/tegra/tegra_bpmp.h> | ||
17 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
18 | #include <soc/tegra/virt/syscalls.h> | ||
19 | #endif | ||
20 | |||
21 | #include <nvgpu/soc.h> | ||
22 | #include "os_linux.h" | ||
23 | #include "platform_gk20a.h" | ||
24 | |||
25 | bool nvgpu_platform_is_silicon(struct gk20a *g) | ||
26 | { | ||
27 | return tegra_platform_is_silicon(); | ||
28 | } | ||
29 | |||
30 | bool nvgpu_platform_is_simulation(struct gk20a *g) | ||
31 | { | ||
32 | return tegra_platform_is_vdk(); | ||
33 | } | ||
34 | |||
35 | bool nvgpu_platform_is_fpga(struct gk20a *g) | ||
36 | { | ||
37 | return tegra_platform_is_fpga(); | ||
38 | } | ||
39 | |||
40 | bool nvgpu_is_hypervisor_mode(struct gk20a *g) | ||
41 | { | ||
42 | return is_tegra_hypervisor_mode(); | ||
43 | } | ||
44 | |||
45 | bool nvgpu_is_bpmp_running(struct gk20a *g) | ||
46 | { | ||
47 | return tegra_bpmp_running(); | ||
48 | } | ||
49 | |||
50 | bool nvgpu_is_soc_t194_a01(struct gk20a *g) | ||
51 | { | ||
52 | return ((tegra_get_chip_id() == TEGRA194 && | ||
53 | tegra_chip_get_revision() == TEGRA194_REVISION_A01) ? | ||
54 | true : false); | ||
55 | } | ||
56 | |||
57 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
58 | /* When nvlink is enabled on dGPU, we need to use physical memory addresses. | ||
59 | * There is no SMMU translation. However, the device initially enumerates as a | ||
60 | * PCIe device. As such, when allocation memory for this PCIe device, the DMA | ||
61 | * framework ends up allocating memory using SMMU (if enabled in device tree). | ||
62 | * As a result, when we switch to nvlink, we need to use underlying physical | ||
63 | * addresses, even if memory mappings exist in SMMU. | ||
64 | * In addition, when stage-2 SMMU translation is enabled (for instance when HV | ||
65 | * is enabled), the addresses we get from dma_alloc are IPAs. We need to | ||
66 | * convert them to PA. | ||
67 | */ | ||
68 | static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) | ||
69 | { | ||
70 | struct device *dev = dev_from_gk20a(g); | ||
71 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
72 | struct hyp_ipa_pa_info info; | ||
73 | int err; | ||
74 | u64 pa = 0ULL; | ||
75 | |||
76 | err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa); | ||
77 | if (err < 0) { | ||
78 | /* WAR for bug 2096877 | ||
79 | * hyp_read_ipa_pa_info only looks up RAM mappings. | ||
80 | * assume one to one IPA:PA mapping for syncpt aperture | ||
81 | */ | ||
82 | u64 start = g->syncpt_unit_base; | ||
83 | u64 end = g->syncpt_unit_base + g->syncpt_unit_size; | ||
84 | if ((ipa >= start) && (ipa < end)) { | ||
85 | pa = ipa; | ||
86 | nvgpu_log(g, gpu_dbg_map_v, | ||
87 | "ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n", | ||
88 | ipa, platform->vmid, pa); | ||
89 | } else { | ||
90 | nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d", | ||
91 | ipa, platform->vmid, err); | ||
92 | } | ||
93 | } else { | ||
94 | pa = info.base + info.offset; | ||
95 | nvgpu_log(g, gpu_dbg_map_v, | ||
96 | "ipa=%llx vmid=%d -> pa=%llx " | ||
97 | "base=%llx offset=%llx size=%llx\n", | ||
98 | ipa, platform->vmid, pa, info.base, | ||
99 | info.offset, info.size); | ||
100 | } | ||
101 | return pa; | ||
102 | } | ||
103 | #endif | ||
104 | |||
105 | int nvgpu_init_soc_vars(struct gk20a *g) | ||
106 | { | ||
107 | #ifdef CONFIG_TEGRA_HV_MANAGER | ||
108 | struct device *dev = dev_from_gk20a(g); | ||
109 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
110 | int err; | ||
111 | |||
112 | if (nvgpu_is_hypervisor_mode(g)) { | ||
113 | err = hyp_read_gid(&platform->vmid); | ||
114 | if (err) { | ||
115 | nvgpu_err(g, "failed to read vmid"); | ||
116 | return err; | ||
117 | } | ||
118 | platform->phys_addr = nvgpu_tegra_hv_ipa_pa; | ||
119 | } | ||
120 | #endif | ||
121 | return 0; | ||
122 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.c b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c new file mode 100644 index 00000000..4dd10e6e --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c | |||
@@ -0,0 +1,419 @@ | |||
1 | /* | ||
2 | * Semaphore Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/file.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/hrtimer.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <nvgpu/lock.h> | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/semaphore.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/kref.h> | ||
29 | #include "../linux/channel.h" | ||
30 | |||
31 | #include "../drivers/staging/android/sync.h" | ||
32 | |||
33 | #include "sync_sema_android.h" | ||
34 | |||
35 | static const struct sync_timeline_ops gk20a_sync_timeline_ops; | ||
36 | |||
37 | struct gk20a_sync_timeline { | ||
38 | struct sync_timeline obj; | ||
39 | u32 max; | ||
40 | u32 min; | ||
41 | }; | ||
42 | |||
43 | /** | ||
44 | * The sync framework dups pts when merging fences. We share a single | ||
45 | * refcounted gk20a_sync_pt for each duped pt. | ||
46 | */ | ||
47 | struct gk20a_sync_pt { | ||
48 | struct gk20a *g; | ||
49 | struct nvgpu_ref refcount; | ||
50 | u32 thresh; | ||
51 | struct nvgpu_semaphore *sema; | ||
52 | struct gk20a_sync_timeline *obj; | ||
53 | |||
54 | /* | ||
55 | * Use a spin lock here since it will have better performance | ||
56 | * than a mutex - there should be very little contention on this | ||
57 | * lock. | ||
58 | */ | ||
59 | struct nvgpu_spinlock lock; | ||
60 | }; | ||
61 | |||
62 | struct gk20a_sync_pt_inst { | ||
63 | struct sync_pt pt; | ||
64 | struct gk20a_sync_pt *shared; | ||
65 | }; | ||
66 | |||
67 | /** | ||
68 | * Compares sync pt values a and b, both of which will trigger either before | ||
69 | * or after ref (i.e. a and b trigger before ref, or a and b trigger after | ||
70 | * ref). Supplying ref allows us to handle wrapping correctly. | ||
71 | * | ||
72 | * Returns -1 if a < b (a triggers before b) | ||
73 | * 0 if a = b (a and b trigger at the same time) | ||
74 | * 1 if a > b (b triggers before a) | ||
75 | */ | ||
76 | static int __gk20a_sync_pt_compare_ref( | ||
77 | u32 ref, | ||
78 | u32 a, | ||
79 | u32 b) | ||
80 | { | ||
81 | /* | ||
82 | * We normalize both a and b by subtracting ref from them. | ||
83 | * Denote the normalized values by a_n and b_n. Note that because | ||
84 | * of wrapping, a_n and/or b_n may be negative. | ||
85 | * | ||
86 | * The normalized values a_n and b_n satisfy: | ||
87 | * - a positive value triggers before a negative value | ||
88 | * - a smaller positive value triggers before a greater positive value | ||
89 | * - a smaller negative value (greater in absolute value) triggers | ||
90 | * before a greater negative value (smaller in absolute value). | ||
91 | * | ||
92 | * Thus we can just stick to unsigned arithmetic and compare | ||
93 | * (u32)a_n to (u32)b_n. | ||
94 | * | ||
95 | * Just to reiterate the possible cases: | ||
96 | * | ||
97 | * 1A) ...ref..a....b.... | ||
98 | * 1B) ...ref..b....a.... | ||
99 | * 2A) ...b....ref..a.... b_n < 0 | ||
100 | * 2B) ...a....ref..b.... a_n > 0 | ||
101 | * 3A) ...a....b....ref.. a_n < 0, b_n < 0 | ||
102 | * 3A) ...b....a....ref.. a_n < 0, b_n < 0 | ||
103 | */ | ||
104 | u32 a_n = a - ref; | ||
105 | u32 b_n = b - ref; | ||
106 | if (a_n < b_n) | ||
107 | return -1; | ||
108 | else if (a_n > b_n) | ||
109 | return 1; | ||
110 | else | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt) | ||
115 | { | ||
116 | struct gk20a_sync_pt_inst *pti = | ||
117 | container_of(pt, struct gk20a_sync_pt_inst, pt); | ||
118 | return pti->shared; | ||
119 | } | ||
120 | static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj) | ||
121 | { | ||
122 | if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops)) | ||
123 | return NULL; | ||
124 | return (struct gk20a_sync_timeline *)obj; | ||
125 | } | ||
126 | |||
127 | static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref) | ||
128 | { | ||
129 | struct gk20a_sync_pt *pt = | ||
130 | container_of(ref, struct gk20a_sync_pt, refcount); | ||
131 | struct gk20a *g = pt->g; | ||
132 | |||
133 | if (pt->sema) | ||
134 | nvgpu_semaphore_put(pt->sema); | ||
135 | nvgpu_kfree(g, pt); | ||
136 | } | ||
137 | |||
138 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | ||
139 | struct gk20a *g, | ||
140 | struct gk20a_sync_timeline *obj, | ||
141 | struct nvgpu_semaphore *sema) | ||
142 | { | ||
143 | struct gk20a_sync_pt *shared; | ||
144 | |||
145 | shared = nvgpu_kzalloc(g, sizeof(*shared)); | ||
146 | if (!shared) | ||
147 | return NULL; | ||
148 | |||
149 | nvgpu_ref_init(&shared->refcount); | ||
150 | shared->g = g; | ||
151 | shared->obj = obj; | ||
152 | shared->sema = sema; | ||
153 | shared->thresh = ++obj->max; /* sync framework has a lock */ | ||
154 | |||
155 | nvgpu_spinlock_init(&shared->lock); | ||
156 | |||
157 | nvgpu_semaphore_get(sema); | ||
158 | |||
159 | return shared; | ||
160 | } | ||
161 | |||
162 | static struct sync_pt *gk20a_sync_pt_create_inst( | ||
163 | struct gk20a *g, | ||
164 | struct gk20a_sync_timeline *obj, | ||
165 | struct nvgpu_semaphore *sema) | ||
166 | { | ||
167 | struct gk20a_sync_pt_inst *pti; | ||
168 | |||
169 | pti = (struct gk20a_sync_pt_inst *) | ||
170 | sync_pt_create(&obj->obj, sizeof(*pti)); | ||
171 | if (!pti) | ||
172 | return NULL; | ||
173 | |||
174 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); | ||
175 | if (!pti->shared) { | ||
176 | sync_pt_free(&pti->pt); | ||
177 | return NULL; | ||
178 | } | ||
179 | return &pti->pt; | ||
180 | } | ||
181 | |||
182 | static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt) | ||
183 | { | ||
184 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
185 | if (pt) | ||
186 | nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared); | ||
187 | } | ||
188 | |||
189 | static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt) | ||
190 | { | ||
191 | struct gk20a_sync_pt_inst *pti; | ||
192 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
193 | |||
194 | pti = (struct gk20a_sync_pt_inst *) | ||
195 | sync_pt_create(&pt->obj->obj, sizeof(*pti)); | ||
196 | if (!pti) | ||
197 | return NULL; | ||
198 | pti->shared = pt; | ||
199 | nvgpu_ref_get(&pt->refcount); | ||
200 | return &pti->pt; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * This function must be able to run on the same sync_pt concurrently. This | ||
205 | * requires a lock to protect access to the sync_pt's internal data structures | ||
206 | * which are modified as a side effect of calling this function. | ||
207 | */ | ||
208 | static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | ||
209 | { | ||
210 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
211 | struct gk20a_sync_timeline *obj = pt->obj; | ||
212 | bool signaled = true; | ||
213 | |||
214 | nvgpu_spinlock_acquire(&pt->lock); | ||
215 | if (!pt->sema) | ||
216 | goto done; | ||
217 | |||
218 | /* Acquired == not realeased yet == active == not signaled. */ | ||
219 | signaled = !nvgpu_semaphore_is_acquired(pt->sema); | ||
220 | |||
221 | if (signaled) { | ||
222 | /* Update min if necessary. */ | ||
223 | if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh, | ||
224 | obj->min) == 1) | ||
225 | obj->min = pt->thresh; | ||
226 | |||
227 | /* Release the semaphore to the pool. */ | ||
228 | nvgpu_semaphore_put(pt->sema); | ||
229 | pt->sema = NULL; | ||
230 | } | ||
231 | done: | ||
232 | nvgpu_spinlock_release(&pt->lock); | ||
233 | |||
234 | return signaled; | ||
235 | } | ||
236 | |||
237 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | ||
238 | { | ||
239 | bool a_expired; | ||
240 | bool b_expired; | ||
241 | struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a); | ||
242 | struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b); | ||
243 | |||
244 | if (WARN_ON(pt_a->obj != pt_b->obj)) | ||
245 | return 0; | ||
246 | |||
247 | /* Early out */ | ||
248 | if (a == b) | ||
249 | return 0; | ||
250 | |||
251 | a_expired = gk20a_sync_pt_has_signaled(a); | ||
252 | b_expired = gk20a_sync_pt_has_signaled(b); | ||
253 | if (a_expired && !b_expired) { | ||
254 | /* Easy, a was earlier */ | ||
255 | return -1; | ||
256 | } else if (!a_expired && b_expired) { | ||
257 | /* Easy, b was earlier */ | ||
258 | return 1; | ||
259 | } | ||
260 | |||
261 | /* Both a and b are expired (trigger before min) or not | ||
262 | * expired (trigger after min), so we can use min | ||
263 | * as a reference value for __gk20a_sync_pt_compare_ref. | ||
264 | */ | ||
265 | return __gk20a_sync_pt_compare_ref(pt_a->obj->min, | ||
266 | pt_a->thresh, pt_b->thresh); | ||
267 | } | ||
268 | |||
269 | static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj) | ||
270 | { | ||
271 | return obj->min; | ||
272 | } | ||
273 | |||
274 | static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline, | ||
275 | char *str, int size) | ||
276 | { | ||
277 | struct gk20a_sync_timeline *obj = | ||
278 | (struct gk20a_sync_timeline *)timeline; | ||
279 | snprintf(str, size, "%d", gk20a_sync_timeline_current(obj)); | ||
280 | } | ||
281 | |||
282 | static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | ||
283 | char *str, int size) | ||
284 | { | ||
285 | struct nvgpu_semaphore *s = pt->sema; | ||
286 | |||
287 | snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", | ||
288 | s->location.pool->page_idx, | ||
289 | nvgpu_semaphore_get_value(s), | ||
290 | nvgpu_semaphore_read(s)); | ||
291 | } | ||
292 | |||
293 | static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | ||
294 | int size) | ||
295 | { | ||
296 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
297 | |||
298 | if (pt->sema) { | ||
299 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | snprintf(str, size, "%d", pt->thresh); | ||
304 | } | ||
305 | |||
306 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | ||
307 | .driver_name = "nvgpu_semaphore", | ||
308 | .dup = gk20a_sync_pt_dup_inst, | ||
309 | .has_signaled = gk20a_sync_pt_has_signaled, | ||
310 | .compare = gk20a_sync_pt_compare, | ||
311 | .free_pt = gk20a_sync_pt_free_inst, | ||
312 | .timeline_value_str = gk20a_sync_timeline_value_str, | ||
313 | .pt_value_str = gk20a_sync_pt_value_str, | ||
314 | }; | ||
315 | |||
316 | /* Public API */ | ||
317 | |||
318 | struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
319 | { | ||
320 | struct sync_fence *fence = sync_fence_fdget(fd); | ||
321 | int i; | ||
322 | |||
323 | if (!fence) | ||
324 | return NULL; | ||
325 | |||
326 | for (i = 0; i < fence->num_fences; i++) { | ||
327 | struct fence *pt = fence->cbs[i].sync_pt; | ||
328 | struct sync_pt *spt = sync_pt_from_fence(pt); | ||
329 | struct sync_timeline *t; | ||
330 | |||
331 | if (spt == NULL) { | ||
332 | sync_fence_put(fence); | ||
333 | return NULL; | ||
334 | } | ||
335 | |||
336 | t = sync_pt_parent(spt); | ||
337 | if (t->ops != &gk20a_sync_timeline_ops) { | ||
338 | sync_fence_put(fence); | ||
339 | return NULL; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | return fence; | ||
344 | } | ||
345 | |||
346 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt) | ||
347 | { | ||
348 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt); | ||
349 | struct nvgpu_semaphore *sema; | ||
350 | |||
351 | nvgpu_spinlock_acquire(&pt->lock); | ||
352 | sema = pt->sema; | ||
353 | if (sema) | ||
354 | nvgpu_semaphore_get(sema); | ||
355 | nvgpu_spinlock_release(&pt->lock); | ||
356 | |||
357 | return sema; | ||
358 | } | ||
359 | |||
360 | void gk20a_sync_timeline_signal(struct sync_timeline *timeline) | ||
361 | { | ||
362 | sync_timeline_signal(timeline, 0); | ||
363 | } | ||
364 | |||
365 | void gk20a_sync_timeline_destroy(struct sync_timeline *timeline) | ||
366 | { | ||
367 | sync_timeline_destroy(timeline); | ||
368 | } | ||
369 | |||
370 | struct sync_timeline *gk20a_sync_timeline_create( | ||
371 | const char *name) | ||
372 | { | ||
373 | struct gk20a_sync_timeline *obj; | ||
374 | |||
375 | obj = (struct gk20a_sync_timeline *) | ||
376 | sync_timeline_create(&gk20a_sync_timeline_ops, | ||
377 | sizeof(struct gk20a_sync_timeline), | ||
378 | name); | ||
379 | if (!obj) | ||
380 | return NULL; | ||
381 | obj->max = 0; | ||
382 | obj->min = 0; | ||
383 | return &obj->obj; | ||
384 | } | ||
385 | |||
386 | struct sync_fence *gk20a_sync_fence_create( | ||
387 | struct channel_gk20a *c, | ||
388 | struct nvgpu_semaphore *sema, | ||
389 | const char *fmt, ...) | ||
390 | { | ||
391 | char name[30]; | ||
392 | va_list args; | ||
393 | struct sync_pt *pt; | ||
394 | struct sync_fence *fence; | ||
395 | struct gk20a *g = c->g; | ||
396 | |||
397 | struct nvgpu_channel_linux *os_channel_priv = c->os_priv; | ||
398 | struct nvgpu_os_fence_framework *fence_framework = NULL; | ||
399 | struct gk20a_sync_timeline *timeline = NULL; | ||
400 | |||
401 | fence_framework = &os_channel_priv->fence_framework; | ||
402 | |||
403 | timeline = to_gk20a_timeline(fence_framework->timeline); | ||
404 | |||
405 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); | ||
406 | if (pt == NULL) | ||
407 | return NULL; | ||
408 | |||
409 | va_start(args, fmt); | ||
410 | vsnprintf(name, sizeof(name), fmt, args); | ||
411 | va_end(args); | ||
412 | |||
413 | fence = sync_fence_create(name, pt); | ||
414 | if (fence == NULL) { | ||
415 | sync_pt_free(pt); | ||
416 | return NULL; | ||
417 | } | ||
418 | return fence; | ||
419 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.h b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h new file mode 100644 index 00000000..4fca7bed --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.h | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Semaphore Sync Framework Integration | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _GK20A_SYNC_H_ | ||
20 | #define _GK20A_SYNC_H_ | ||
21 | |||
22 | struct sync_timeline; | ||
23 | struct sync_fence; | ||
24 | struct sync_pt; | ||
25 | struct nvgpu_semaphore; | ||
26 | struct fence; | ||
27 | |||
28 | #ifdef CONFIG_SYNC | ||
29 | struct sync_timeline *gk20a_sync_timeline_create(const char *name); | ||
30 | void gk20a_sync_timeline_destroy(struct sync_timeline *); | ||
31 | void gk20a_sync_timeline_signal(struct sync_timeline *); | ||
32 | struct sync_fence *gk20a_sync_fence_create( | ||
33 | struct channel_gk20a *c, | ||
34 | struct nvgpu_semaphore *, | ||
35 | const char *fmt, ...); | ||
36 | struct sync_fence *gk20a_sync_fence_fdget(int fd); | ||
37 | struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt); | ||
38 | #else | ||
39 | static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {} | ||
40 | static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {} | ||
41 | static inline struct sync_fence *gk20a_sync_fence_fdget(int fd) | ||
42 | { | ||
43 | return NULL; | ||
44 | } | ||
45 | static inline struct sync_timeline *gk20a_sync_timeline_create( | ||
46 | const char *name) { | ||
47 | return NULL; | ||
48 | } | ||
49 | #endif | ||
50 | |||
51 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c new file mode 100644 index 00000000..e5995bb8 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c | |||
@@ -0,0 +1,1205 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <linux/pm_runtime.h> | ||
19 | #include <linux/fb.h> | ||
20 | |||
21 | #include <nvgpu/kmem.h> | ||
22 | #include <nvgpu/nvhost.h> | ||
23 | |||
24 | #include "sysfs.h" | ||
25 | #include "platform_gk20a.h" | ||
26 | #include "gk20a/pmu_gk20a.h" | ||
27 | #include "gk20a/gr_gk20a.h" | ||
28 | #include "gv11b/gr_gv11b.h" | ||
29 | |||
30 | #define PTIMER_FP_FACTOR 1000000 | ||
31 | |||
32 | #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) | ||
33 | |||
34 | static ssize_t elcg_enable_store(struct device *dev, | ||
35 | struct device_attribute *attr, const char *buf, size_t count) | ||
36 | { | ||
37 | struct gk20a *g = get_gk20a(dev); | ||
38 | unsigned long val = 0; | ||
39 | int err; | ||
40 | |||
41 | if (kstrtoul(buf, 10, &val) < 0) | ||
42 | return -EINVAL; | ||
43 | |||
44 | err = gk20a_busy(g); | ||
45 | if (err) | ||
46 | return err; | ||
47 | |||
48 | if (val) { | ||
49 | g->elcg_enabled = true; | ||
50 | gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_AUTO); | ||
51 | } else { | ||
52 | g->elcg_enabled = false; | ||
53 | gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN); | ||
54 | } | ||
55 | |||
56 | gk20a_idle(g); | ||
57 | |||
58 | nvgpu_info(g, "ELCG is %s.", g->elcg_enabled ? "enabled" : | ||
59 | "disabled"); | ||
60 | |||
61 | return count; | ||
62 | } | ||
63 | |||
64 | static ssize_t elcg_enable_read(struct device *dev, | ||
65 | struct device_attribute *attr, char *buf) | ||
66 | { | ||
67 | struct gk20a *g = get_gk20a(dev); | ||
68 | |||
69 | return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0); | ||
70 | } | ||
71 | |||
72 | static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store); | ||
73 | |||
74 | static ssize_t blcg_enable_store(struct device *dev, | ||
75 | struct device_attribute *attr, const char *buf, size_t count) | ||
76 | { | ||
77 | struct gk20a *g = get_gk20a(dev); | ||
78 | unsigned long val = 0; | ||
79 | int err; | ||
80 | |||
81 | if (kstrtoul(buf, 10, &val) < 0) | ||
82 | return -EINVAL; | ||
83 | |||
84 | if (val) | ||
85 | g->blcg_enabled = true; | ||
86 | else | ||
87 | g->blcg_enabled = false; | ||
88 | |||
89 | err = gk20a_busy(g); | ||
90 | if (err) | ||
91 | return err; | ||
92 | |||
93 | if (g->ops.clock_gating.blcg_bus_load_gating_prod) | ||
94 | g->ops.clock_gating.blcg_bus_load_gating_prod(g, | ||
95 | g->blcg_enabled); | ||
96 | if (g->ops.clock_gating.blcg_ce_load_gating_prod) | ||
97 | g->ops.clock_gating.blcg_ce_load_gating_prod(g, | ||
98 | g->blcg_enabled); | ||
99 | if (g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod) | ||
100 | g->ops.clock_gating.blcg_ctxsw_firmware_load_gating_prod(g, | ||
101 | g->blcg_enabled); | ||
102 | if (g->ops.clock_gating.blcg_fb_load_gating_prod) | ||
103 | g->ops.clock_gating.blcg_fb_load_gating_prod(g, | ||
104 | g->blcg_enabled); | ||
105 | if (g->ops.clock_gating.blcg_fifo_load_gating_prod) | ||
106 | g->ops.clock_gating.blcg_fifo_load_gating_prod(g, | ||
107 | g->blcg_enabled); | ||
108 | if (g->ops.clock_gating.blcg_gr_load_gating_prod) | ||
109 | g->ops.clock_gating.blcg_gr_load_gating_prod(g, | ||
110 | g->blcg_enabled); | ||
111 | if (g->ops.clock_gating.blcg_ltc_load_gating_prod) | ||
112 | g->ops.clock_gating.blcg_ltc_load_gating_prod(g, | ||
113 | g->blcg_enabled); | ||
114 | if (g->ops.clock_gating.blcg_pmu_load_gating_prod) | ||
115 | g->ops.clock_gating.blcg_pmu_load_gating_prod(g, | ||
116 | g->blcg_enabled); | ||
117 | if (g->ops.clock_gating.blcg_xbar_load_gating_prod) | ||
118 | g->ops.clock_gating.blcg_xbar_load_gating_prod(g, | ||
119 | g->blcg_enabled); | ||
120 | gk20a_idle(g); | ||
121 | |||
122 | nvgpu_info(g, "BLCG is %s.", g->blcg_enabled ? "enabled" : | ||
123 | "disabled"); | ||
124 | |||
125 | return count; | ||
126 | } | ||
127 | |||
128 | static ssize_t blcg_enable_read(struct device *dev, | ||
129 | struct device_attribute *attr, char *buf) | ||
130 | { | ||
131 | struct gk20a *g = get_gk20a(dev); | ||
132 | |||
133 | return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0); | ||
134 | } | ||
135 | |||
136 | |||
137 | static DEVICE_ATTR(blcg_enable, ROOTRW, blcg_enable_read, blcg_enable_store); | ||
138 | |||
139 | static ssize_t slcg_enable_store(struct device *dev, | ||
140 | struct device_attribute *attr, const char *buf, size_t count) | ||
141 | { | ||
142 | struct gk20a *g = get_gk20a(dev); | ||
143 | unsigned long val = 0; | ||
144 | int err; | ||
145 | |||
146 | if (kstrtoul(buf, 10, &val) < 0) | ||
147 | return -EINVAL; | ||
148 | |||
149 | if (val) | ||
150 | g->slcg_enabled = true; | ||
151 | else | ||
152 | g->slcg_enabled = false; | ||
153 | |||
154 | /* | ||
155 | * TODO: slcg_therm_load_gating is not enabled anywhere during | ||
156 | * init. Therefore, it would be incongruous to add it here. Once | ||
157 | * it is added to init, we should add it here too. | ||
158 | */ | ||
159 | err = gk20a_busy(g); | ||
160 | if (err) | ||
161 | return err; | ||
162 | |||
163 | if (g->ops.clock_gating.slcg_bus_load_gating_prod) | ||
164 | g->ops.clock_gating.slcg_bus_load_gating_prod(g, | ||
165 | g->slcg_enabled); | ||
166 | if (g->ops.clock_gating.slcg_ce2_load_gating_prod) | ||
167 | g->ops.clock_gating.slcg_ce2_load_gating_prod(g, | ||
168 | g->slcg_enabled); | ||
169 | if (g->ops.clock_gating.slcg_chiplet_load_gating_prod) | ||
170 | g->ops.clock_gating.slcg_chiplet_load_gating_prod(g, | ||
171 | g->slcg_enabled); | ||
172 | if (g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod) | ||
173 | g->ops.clock_gating.slcg_ctxsw_firmware_load_gating_prod(g, | ||
174 | g->slcg_enabled); | ||
175 | if (g->ops.clock_gating.slcg_fb_load_gating_prod) | ||
176 | g->ops.clock_gating.slcg_fb_load_gating_prod(g, | ||
177 | g->slcg_enabled); | ||
178 | if (g->ops.clock_gating.slcg_fifo_load_gating_prod) | ||
179 | g->ops.clock_gating.slcg_fifo_load_gating_prod(g, | ||
180 | g->slcg_enabled); | ||
181 | if (g->ops.clock_gating.slcg_gr_load_gating_prod) | ||
182 | g->ops.clock_gating.slcg_gr_load_gating_prod(g, | ||
183 | g->slcg_enabled); | ||
184 | if (g->ops.clock_gating.slcg_ltc_load_gating_prod) | ||
185 | g->ops.clock_gating.slcg_ltc_load_gating_prod(g, | ||
186 | g->slcg_enabled); | ||
187 | if (g->ops.clock_gating.slcg_perf_load_gating_prod) | ||
188 | g->ops.clock_gating.slcg_perf_load_gating_prod(g, | ||
189 | g->slcg_enabled); | ||
190 | if (g->ops.clock_gating.slcg_priring_load_gating_prod) | ||
191 | g->ops.clock_gating.slcg_priring_load_gating_prod(g, | ||
192 | g->slcg_enabled); | ||
193 | if (g->ops.clock_gating.slcg_pmu_load_gating_prod) | ||
194 | g->ops.clock_gating.slcg_pmu_load_gating_prod(g, | ||
195 | g->slcg_enabled); | ||
196 | if (g->ops.clock_gating.slcg_xbar_load_gating_prod) | ||
197 | g->ops.clock_gating.slcg_xbar_load_gating_prod(g, | ||
198 | g->slcg_enabled); | ||
199 | gk20a_idle(g); | ||
200 | |||
201 | nvgpu_info(g, "SLCG is %s.", g->slcg_enabled ? "enabled" : | ||
202 | "disabled"); | ||
203 | |||
204 | return count; | ||
205 | } | ||
206 | |||
207 | static ssize_t slcg_enable_read(struct device *dev, | ||
208 | struct device_attribute *attr, char *buf) | ||
209 | { | ||
210 | struct gk20a *g = get_gk20a(dev); | ||
211 | |||
212 | return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0); | ||
213 | } | ||
214 | |||
215 | static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store); | ||
216 | |||
217 | static ssize_t ptimer_scale_factor_show(struct device *dev, | ||
218 | struct device_attribute *attr, | ||
219 | char *buf) | ||
220 | { | ||
221 | struct gk20a *g = get_gk20a(dev); | ||
222 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
223 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
224 | u32 scaling_factor_fp; | ||
225 | ssize_t res; | ||
226 | |||
227 | if (!src_freq_hz) { | ||
228 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
229 | return -EINVAL; | ||
230 | } | ||
231 | |||
232 | scaling_factor_fp = (u32)(PTIMER_REF_FREQ_HZ) / | ||
233 | ((u32)(src_freq_hz) / | ||
234 | (u32)(PTIMER_FP_FACTOR)); | ||
235 | res = snprintf(buf, | ||
236 | PAGE_SIZE, | ||
237 | "%u.%u\n", | ||
238 | scaling_factor_fp / PTIMER_FP_FACTOR, | ||
239 | scaling_factor_fp % PTIMER_FP_FACTOR); | ||
240 | |||
241 | return res; | ||
242 | |||
243 | } | ||
244 | |||
245 | static DEVICE_ATTR(ptimer_scale_factor, | ||
246 | S_IRUGO, | ||
247 | ptimer_scale_factor_show, | ||
248 | NULL); | ||
249 | |||
250 | static ssize_t ptimer_ref_freq_show(struct device *dev, | ||
251 | struct device_attribute *attr, | ||
252 | char *buf) | ||
253 | { | ||
254 | struct gk20a *g = get_gk20a(dev); | ||
255 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
256 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
257 | ssize_t res; | ||
258 | |||
259 | if (!src_freq_hz) { | ||
260 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ); | ||
265 | |||
266 | return res; | ||
267 | |||
268 | } | ||
269 | |||
270 | static DEVICE_ATTR(ptimer_ref_freq, | ||
271 | S_IRUGO, | ||
272 | ptimer_ref_freq_show, | ||
273 | NULL); | ||
274 | |||
275 | static ssize_t ptimer_src_freq_show(struct device *dev, | ||
276 | struct device_attribute *attr, | ||
277 | char *buf) | ||
278 | { | ||
279 | struct gk20a *g = get_gk20a(dev); | ||
280 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
281 | u32 src_freq_hz = platform->ptimer_src_freq; | ||
282 | ssize_t res; | ||
283 | |||
284 | if (!src_freq_hz) { | ||
285 | nvgpu_err(g, "reference clk_m rate is not set correctly"); | ||
286 | return -EINVAL; | ||
287 | } | ||
288 | |||
289 | res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz); | ||
290 | |||
291 | return res; | ||
292 | |||
293 | } | ||
294 | |||
295 | static DEVICE_ATTR(ptimer_src_freq, | ||
296 | S_IRUGO, | ||
297 | ptimer_src_freq_show, | ||
298 | NULL); | ||
299 | |||
300 | |||
301 | #if defined(CONFIG_PM) | ||
302 | static ssize_t railgate_enable_store(struct device *dev, | ||
303 | struct device_attribute *attr, const char *buf, size_t count) | ||
304 | { | ||
305 | unsigned long railgate_enable = 0; | ||
306 | /* dev is guaranteed to be valid here. Ok to de-reference */ | ||
307 | struct gk20a *g = get_gk20a(dev); | ||
308 | int err; | ||
309 | |||
310 | if (kstrtoul(buf, 10, &railgate_enable) < 0) | ||
311 | return -EINVAL; | ||
312 | |||
313 | if (railgate_enable && !g->can_railgate) { | ||
314 | g->can_railgate = true; | ||
315 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
316 | } else if (railgate_enable == 0 && g->can_railgate) { | ||
317 | g->can_railgate = false; | ||
318 | pm_runtime_set_autosuspend_delay(dev, -1); | ||
319 | } | ||
320 | /* wake-up system to make rail-gating setting effective */ | ||
321 | err = gk20a_busy(g); | ||
322 | if (err) | ||
323 | return err; | ||
324 | gk20a_idle(g); | ||
325 | |||
326 | nvgpu_info(g, "railgate is %s.", g->can_railgate ? | ||
327 | "enabled" : "disabled"); | ||
328 | |||
329 | return count; | ||
330 | } | ||
331 | |||
332 | static ssize_t railgate_enable_read(struct device *dev, | ||
333 | struct device_attribute *attr, char *buf) | ||
334 | { | ||
335 | struct gk20a *g = get_gk20a(dev); | ||
336 | |||
337 | return snprintf(buf, PAGE_SIZE, "%d\n", g->can_railgate ? 1 : 0); | ||
338 | } | ||
339 | |||
340 | static DEVICE_ATTR(railgate_enable, ROOTRW, railgate_enable_read, | ||
341 | railgate_enable_store); | ||
342 | #endif | ||
343 | |||
344 | static ssize_t railgate_delay_store(struct device *dev, | ||
345 | struct device_attribute *attr, | ||
346 | const char *buf, size_t count) | ||
347 | { | ||
348 | int railgate_delay = 0, ret = 0; | ||
349 | struct gk20a *g = get_gk20a(dev); | ||
350 | int err; | ||
351 | |||
352 | if (!g->can_railgate) { | ||
353 | nvgpu_info(g, "does not support power-gating"); | ||
354 | return count; | ||
355 | } | ||
356 | |||
357 | ret = sscanf(buf, "%d", &railgate_delay); | ||
358 | if (ret == 1 && railgate_delay >= 0) { | ||
359 | g->railgate_delay = railgate_delay; | ||
360 | pm_runtime_set_autosuspend_delay(dev, g->railgate_delay); | ||
361 | } else | ||
362 | nvgpu_err(g, "Invalid powergate delay"); | ||
363 | |||
364 | /* wake-up system to make rail-gating delay effective immediately */ | ||
365 | err = gk20a_busy(g); | ||
366 | if (err) | ||
367 | return err; | ||
368 | gk20a_idle(g); | ||
369 | |||
370 | return count; | ||
371 | } | ||
372 | static ssize_t railgate_delay_show(struct device *dev, | ||
373 | struct device_attribute *attr, char *buf) | ||
374 | { | ||
375 | struct gk20a *g = get_gk20a(dev); | ||
376 | |||
377 | return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay); | ||
378 | } | ||
379 | static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show, | ||
380 | railgate_delay_store); | ||
381 | |||
382 | static ssize_t is_railgated_show(struct device *dev, | ||
383 | struct device_attribute *attr, char *buf) | ||
384 | { | ||
385 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
386 | bool is_railgated = 0; | ||
387 | |||
388 | if (platform->is_railgated) | ||
389 | is_railgated = platform->is_railgated(dev); | ||
390 | |||
391 | return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no"); | ||
392 | } | ||
393 | static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL); | ||
394 | |||
395 | static ssize_t counters_show(struct device *dev, | ||
396 | struct device_attribute *attr, char *buf) | ||
397 | { | ||
398 | struct gk20a *g = get_gk20a(dev); | ||
399 | u32 busy_cycles, total_cycles; | ||
400 | ssize_t res; | ||
401 | |||
402 | nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles); | ||
403 | |||
404 | res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles); | ||
405 | |||
406 | return res; | ||
407 | } | ||
408 | static DEVICE_ATTR(counters, S_IRUGO, counters_show, NULL); | ||
409 | |||
410 | static ssize_t counters_show_reset(struct device *dev, | ||
411 | struct device_attribute *attr, char *buf) | ||
412 | { | ||
413 | ssize_t res = counters_show(dev, attr, buf); | ||
414 | struct gk20a *g = get_gk20a(dev); | ||
415 | |||
416 | nvgpu_pmu_reset_load_counters(g); | ||
417 | |||
418 | return res; | ||
419 | } | ||
420 | static DEVICE_ATTR(counters_reset, S_IRUGO, counters_show_reset, NULL); | ||
421 | |||
422 | static ssize_t gk20a_load_show(struct device *dev, | ||
423 | struct device_attribute *attr, | ||
424 | char *buf) | ||
425 | { | ||
426 | struct gk20a *g = get_gk20a(dev); | ||
427 | u32 busy_time; | ||
428 | ssize_t res; | ||
429 | int err; | ||
430 | |||
431 | if (!g->power_on) { | ||
432 | busy_time = 0; | ||
433 | } else { | ||
434 | err = gk20a_busy(g); | ||
435 | if (err) | ||
436 | return err; | ||
437 | |||
438 | nvgpu_pmu_load_update(g); | ||
439 | nvgpu_pmu_load_norm(g, &busy_time); | ||
440 | gk20a_idle(g); | ||
441 | } | ||
442 | |||
443 | res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time); | ||
444 | |||
445 | return res; | ||
446 | } | ||
447 | static DEVICE_ATTR(load, S_IRUGO, gk20a_load_show, NULL); | ||
448 | |||
449 | static ssize_t elpg_enable_store(struct device *dev, | ||
450 | struct device_attribute *attr, const char *buf, size_t count) | ||
451 | { | ||
452 | struct gk20a *g = get_gk20a(dev); | ||
453 | unsigned long val = 0; | ||
454 | int err; | ||
455 | |||
456 | if (kstrtoul(buf, 10, &val) < 0) | ||
457 | return -EINVAL; | ||
458 | |||
459 | if (!g->power_on) { | ||
460 | g->elpg_enabled = val ? true : false; | ||
461 | } else { | ||
462 | err = gk20a_busy(g); | ||
463 | if (err) | ||
464 | return -EAGAIN; | ||
465 | /* | ||
466 | * Since elpg is refcounted, we should not unnecessarily call | ||
467 | * enable/disable if it is already so. | ||
468 | */ | ||
469 | if (val && !g->elpg_enabled) { | ||
470 | g->elpg_enabled = true; | ||
471 | nvgpu_pmu_pg_global_enable(g, true); | ||
472 | |||
473 | } else if (!val && g->elpg_enabled) { | ||
474 | if (g->ops.pmu.pmu_pg_engines_feature_list && | ||
475 | g->ops.pmu.pmu_pg_engines_feature_list(g, | ||
476 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS) != | ||
477 | NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) { | ||
478 | nvgpu_pmu_pg_global_enable(g, false); | ||
479 | g->elpg_enabled = false; | ||
480 | } else { | ||
481 | g->elpg_enabled = false; | ||
482 | nvgpu_pmu_pg_global_enable(g, false); | ||
483 | } | ||
484 | } | ||
485 | gk20a_idle(g); | ||
486 | } | ||
487 | nvgpu_info(g, "ELPG is %s.", g->elpg_enabled ? "enabled" : | ||
488 | "disabled"); | ||
489 | |||
490 | return count; | ||
491 | } | ||
492 | |||
493 | static ssize_t elpg_enable_read(struct device *dev, | ||
494 | struct device_attribute *attr, char *buf) | ||
495 | { | ||
496 | struct gk20a *g = get_gk20a(dev); | ||
497 | |||
498 | return snprintf(buf, PAGE_SIZE, "%d\n", g->elpg_enabled ? 1 : 0); | ||
499 | } | ||
500 | |||
501 | static DEVICE_ATTR(elpg_enable, ROOTRW, elpg_enable_read, elpg_enable_store); | ||
502 | |||
503 | static ssize_t ldiv_slowdown_factor_store(struct device *dev, | ||
504 | struct device_attribute *attr, const char *buf, size_t count) | ||
505 | { | ||
506 | struct gk20a *g = get_gk20a(dev); | ||
507 | unsigned long val = 0; | ||
508 | int err; | ||
509 | |||
510 | if (kstrtoul(buf, 10, &val) < 0) { | ||
511 | nvgpu_err(g, "parse error for input SLOWDOWN factor\n"); | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | if (val >= SLOWDOWN_FACTOR_FPDIV_BYMAX) { | ||
516 | nvgpu_err(g, "Invalid SLOWDOWN factor\n"); | ||
517 | return -EINVAL; | ||
518 | } | ||
519 | |||
520 | if (val == g->ldiv_slowdown_factor) | ||
521 | return count; | ||
522 | |||
523 | if (!g->power_on) { | ||
524 | g->ldiv_slowdown_factor = val; | ||
525 | } else { | ||
526 | err = gk20a_busy(g); | ||
527 | if (err) | ||
528 | return -EAGAIN; | ||
529 | |||
530 | g->ldiv_slowdown_factor = val; | ||
531 | |||
532 | if (g->ops.pmu.pmu_pg_init_param) | ||
533 | g->ops.pmu.pmu_pg_init_param(g, | ||
534 | PMU_PG_ELPG_ENGINE_ID_GRAPHICS); | ||
535 | |||
536 | gk20a_idle(g); | ||
537 | } | ||
538 | |||
539 | nvgpu_info(g, "ldiv_slowdown_factor is %x\n", g->ldiv_slowdown_factor); | ||
540 | |||
541 | return count; | ||
542 | } | ||
543 | |||
544 | static ssize_t ldiv_slowdown_factor_read(struct device *dev, | ||
545 | struct device_attribute *attr, char *buf) | ||
546 | { | ||
547 | struct gk20a *g = get_gk20a(dev); | ||
548 | |||
549 | return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor); | ||
550 | } | ||
551 | |||
552 | static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW, | ||
553 | ldiv_slowdown_factor_read, ldiv_slowdown_factor_store); | ||
554 | |||
555 | static ssize_t mscg_enable_store(struct device *dev, | ||
556 | struct device_attribute *attr, const char *buf, size_t count) | ||
557 | { | ||
558 | struct gk20a *g = get_gk20a(dev); | ||
559 | struct nvgpu_pmu *pmu = &g->pmu; | ||
560 | unsigned long val = 0; | ||
561 | int err; | ||
562 | |||
563 | if (kstrtoul(buf, 10, &val) < 0) | ||
564 | return -EINVAL; | ||
565 | |||
566 | if (!g->power_on) { | ||
567 | g->mscg_enabled = val ? true : false; | ||
568 | } else { | ||
569 | err = gk20a_busy(g); | ||
570 | if (err) | ||
571 | return -EAGAIN; | ||
572 | /* | ||
573 | * Since elpg is refcounted, we should not unnecessarily call | ||
574 | * enable/disable if it is already so. | ||
575 | */ | ||
576 | if (val && !g->mscg_enabled) { | ||
577 | g->mscg_enabled = true; | ||
578 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
579 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
580 | if (!ACCESS_ONCE(pmu->mscg_stat)) { | ||
581 | WRITE_ONCE(pmu->mscg_stat, | ||
582 | PMU_MSCG_ENABLED); | ||
583 | /* make status visible */ | ||
584 | smp_mb(); | ||
585 | } | ||
586 | } | ||
587 | |||
588 | } else if (!val && g->mscg_enabled) { | ||
589 | if (g->ops.pmu.pmu_is_lpwr_feature_supported(g, | ||
590 | PMU_PG_LPWR_FEATURE_MSCG)) { | ||
591 | nvgpu_pmu_pg_global_enable(g, false); | ||
592 | WRITE_ONCE(pmu->mscg_stat, PMU_MSCG_DISABLED); | ||
593 | /* make status visible */ | ||
594 | smp_mb(); | ||
595 | g->mscg_enabled = false; | ||
596 | if (g->elpg_enabled) | ||
597 | nvgpu_pmu_pg_global_enable(g, true); | ||
598 | } | ||
599 | g->mscg_enabled = false; | ||
600 | } | ||
601 | gk20a_idle(g); | ||
602 | } | ||
603 | nvgpu_info(g, "MSCG is %s.", g->mscg_enabled ? "enabled" : | ||
604 | "disabled"); | ||
605 | |||
606 | return count; | ||
607 | } | ||
608 | |||
609 | static ssize_t mscg_enable_read(struct device *dev, | ||
610 | struct device_attribute *attr, char *buf) | ||
611 | { | ||
612 | struct gk20a *g = get_gk20a(dev); | ||
613 | |||
614 | return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0); | ||
615 | } | ||
616 | |||
617 | static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store); | ||
618 | |||
619 | static ssize_t aelpg_param_store(struct device *dev, | ||
620 | struct device_attribute *attr, const char *buf, size_t count) | ||
621 | { | ||
622 | struct gk20a *g = get_gk20a(dev); | ||
623 | int status = 0; | ||
624 | union pmu_ap_cmd ap_cmd; | ||
625 | int *paramlist = (int *)g->pmu.aelpg_param; | ||
626 | u32 defaultparam[5] = { | ||
627 | APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US, | ||
628 | APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US, | ||
629 | APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US, | ||
630 | APCTRL_POWER_BREAKEVEN_DEFAULT_US, | ||
631 | APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT | ||
632 | }; | ||
633 | |||
634 | /* Get each parameter value from input string*/ | ||
635 | sscanf(buf, "%d %d %d %d %d", ¶mlist[0], ¶mlist[1], | ||
636 | ¶mlist[2], ¶mlist[3], ¶mlist[4]); | ||
637 | |||
638 | /* If parameter value is 0 then reset to SW default values*/ | ||
639 | if ((paramlist[0] | paramlist[1] | paramlist[2] | ||
640 | | paramlist[3] | paramlist[4]) == 0x00) { | ||
641 | memcpy(paramlist, defaultparam, sizeof(defaultparam)); | ||
642 | } | ||
643 | |||
644 | /* If aelpg is enabled & pmu is ready then post values to | ||
645 | * PMU else store then post later | ||
646 | */ | ||
647 | if (g->aelpg_enabled && g->pmu.pmu_ready) { | ||
648 | /* Disable AELPG */ | ||
649 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
650 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
651 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
652 | |||
653 | /* Enable AELPG */ | ||
654 | nvgpu_aelpg_init(g); | ||
655 | nvgpu_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); | ||
656 | } | ||
657 | |||
658 | return count; | ||
659 | } | ||
660 | |||
661 | static ssize_t aelpg_param_read(struct device *dev, | ||
662 | struct device_attribute *attr, char *buf) | ||
663 | { | ||
664 | struct gk20a *g = get_gk20a(dev); | ||
665 | |||
666 | return snprintf(buf, PAGE_SIZE, | ||
667 | "%d %d %d %d %d\n", g->pmu.aelpg_param[0], | ||
668 | g->pmu.aelpg_param[1], g->pmu.aelpg_param[2], | ||
669 | g->pmu.aelpg_param[3], g->pmu.aelpg_param[4]); | ||
670 | } | ||
671 | |||
672 | static DEVICE_ATTR(aelpg_param, ROOTRW, | ||
673 | aelpg_param_read, aelpg_param_store); | ||
674 | |||
675 | static ssize_t aelpg_enable_store(struct device *dev, | ||
676 | struct device_attribute *attr, const char *buf, size_t count) | ||
677 | { | ||
678 | struct gk20a *g = get_gk20a(dev); | ||
679 | unsigned long val = 0; | ||
680 | int status = 0; | ||
681 | union pmu_ap_cmd ap_cmd; | ||
682 | int err; | ||
683 | |||
684 | if (kstrtoul(buf, 10, &val) < 0) | ||
685 | return -EINVAL; | ||
686 | |||
687 | err = gk20a_busy(g); | ||
688 | if (err) | ||
689 | return err; | ||
690 | |||
691 | if (g->pmu.pmu_ready) { | ||
692 | if (val && !g->aelpg_enabled) { | ||
693 | g->aelpg_enabled = true; | ||
694 | /* Enable AELPG */ | ||
695 | ap_cmd.enable_ctrl.cmd_id = PMU_AP_CMD_ID_ENABLE_CTRL; | ||
696 | ap_cmd.enable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
697 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
698 | } else if (!val && g->aelpg_enabled) { | ||
699 | g->aelpg_enabled = false; | ||
700 | /* Disable AELPG */ | ||
701 | ap_cmd.disable_ctrl.cmd_id = PMU_AP_CMD_ID_DISABLE_CTRL; | ||
702 | ap_cmd.disable_ctrl.ctrl_id = PMU_AP_CTRL_ID_GRAPHICS; | ||
703 | status = nvgpu_pmu_ap_send_command(g, &ap_cmd, false); | ||
704 | } | ||
705 | } else { | ||
706 | nvgpu_info(g, "PMU is not ready, AELPG request failed"); | ||
707 | } | ||
708 | gk20a_idle(g); | ||
709 | |||
710 | nvgpu_info(g, "AELPG is %s.", g->aelpg_enabled ? "enabled" : | ||
711 | "disabled"); | ||
712 | |||
713 | return count; | ||
714 | } | ||
715 | |||
716 | static ssize_t aelpg_enable_read(struct device *dev, | ||
717 | struct device_attribute *attr, char *buf) | ||
718 | { | ||
719 | struct gk20a *g = get_gk20a(dev); | ||
720 | |||
721 | return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0); | ||
722 | } | ||
723 | |||
724 | static DEVICE_ATTR(aelpg_enable, ROOTRW, | ||
725 | aelpg_enable_read, aelpg_enable_store); | ||
726 | |||
727 | |||
728 | static ssize_t allow_all_enable_read(struct device *dev, | ||
729 | struct device_attribute *attr, char *buf) | ||
730 | { | ||
731 | struct gk20a *g = get_gk20a(dev); | ||
732 | |||
733 | return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0); | ||
734 | } | ||
735 | |||
736 | static ssize_t allow_all_enable_store(struct device *dev, | ||
737 | struct device_attribute *attr, const char *buf, size_t count) | ||
738 | { | ||
739 | struct gk20a *g = get_gk20a(dev); | ||
740 | unsigned long val = 0; | ||
741 | int err; | ||
742 | |||
743 | if (kstrtoul(buf, 10, &val) < 0) | ||
744 | return -EINVAL; | ||
745 | |||
746 | err = gk20a_busy(g); | ||
747 | g->allow_all = (val ? true : false); | ||
748 | gk20a_idle(g); | ||
749 | |||
750 | return count; | ||
751 | } | ||
752 | |||
753 | static DEVICE_ATTR(allow_all, ROOTRW, | ||
754 | allow_all_enable_read, allow_all_enable_store); | ||
755 | |||
756 | static ssize_t emc3d_ratio_store(struct device *dev, | ||
757 | struct device_attribute *attr, const char *buf, size_t count) | ||
758 | { | ||
759 | struct gk20a *g = get_gk20a(dev); | ||
760 | unsigned long val = 0; | ||
761 | |||
762 | if (kstrtoul(buf, 10, &val) < 0) | ||
763 | return -EINVAL; | ||
764 | |||
765 | g->emc3d_ratio = val; | ||
766 | |||
767 | return count; | ||
768 | } | ||
769 | |||
770 | static ssize_t emc3d_ratio_read(struct device *dev, | ||
771 | struct device_attribute *attr, char *buf) | ||
772 | { | ||
773 | struct gk20a *g = get_gk20a(dev); | ||
774 | |||
775 | return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio); | ||
776 | } | ||
777 | |||
778 | static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); | ||
779 | |||
780 | static ssize_t fmax_at_vmin_safe_read(struct device *dev, | ||
781 | struct device_attribute *attr, char *buf) | ||
782 | { | ||
783 | struct gk20a *g = get_gk20a(dev); | ||
784 | unsigned long gpu_fmax_at_vmin_hz = 0; | ||
785 | |||
786 | if (g->ops.clk.get_fmax_at_vmin_safe) | ||
787 | gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g); | ||
788 | |||
789 | return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz)); | ||
790 | } | ||
791 | |||
792 | static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL); | ||
793 | |||
794 | #ifdef CONFIG_PM | ||
795 | static ssize_t force_idle_store(struct device *dev, | ||
796 | struct device_attribute *attr, const char *buf, size_t count) | ||
797 | { | ||
798 | struct gk20a *g = get_gk20a(dev); | ||
799 | unsigned long val = 0; | ||
800 | int err = 0; | ||
801 | |||
802 | if (kstrtoul(buf, 10, &val) < 0) | ||
803 | return -EINVAL; | ||
804 | |||
805 | if (val) { | ||
806 | if (g->forced_idle) | ||
807 | return count; /* do nothing */ | ||
808 | else { | ||
809 | err = __gk20a_do_idle(g, false); | ||
810 | if (!err) { | ||
811 | g->forced_idle = 1; | ||
812 | nvgpu_info(g, "gpu is idle : %d", | ||
813 | g->forced_idle); | ||
814 | } | ||
815 | } | ||
816 | } else { | ||
817 | if (!g->forced_idle) | ||
818 | return count; /* do nothing */ | ||
819 | else { | ||
820 | err = __gk20a_do_unidle(g); | ||
821 | if (!err) { | ||
822 | g->forced_idle = 0; | ||
823 | nvgpu_info(g, "gpu is idle : %d", | ||
824 | g->forced_idle); | ||
825 | } | ||
826 | } | ||
827 | } | ||
828 | |||
829 | return count; | ||
830 | } | ||
831 | |||
832 | static ssize_t force_idle_read(struct device *dev, | ||
833 | struct device_attribute *attr, char *buf) | ||
834 | { | ||
835 | struct gk20a *g = get_gk20a(dev); | ||
836 | |||
837 | return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0); | ||
838 | } | ||
839 | |||
840 | static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); | ||
841 | #endif | ||
842 | |||
843 | static ssize_t tpc_fs_mask_store(struct device *dev, | ||
844 | struct device_attribute *attr, const char *buf, size_t count) | ||
845 | { | ||
846 | struct gk20a *g = get_gk20a(dev); | ||
847 | unsigned long val = 0; | ||
848 | |||
849 | if (kstrtoul(buf, 10, &val) < 0) | ||
850 | return -EINVAL; | ||
851 | |||
852 | if (!g->gr.gpc_tpc_mask) | ||
853 | return -ENODEV; | ||
854 | |||
855 | if (val && val != g->gr.gpc_tpc_mask[0] && g->ops.gr.set_gpc_tpc_mask) { | ||
856 | g->gr.gpc_tpc_mask[0] = val; | ||
857 | g->tpc_fs_mask_user = val; | ||
858 | |||
859 | g->ops.gr.set_gpc_tpc_mask(g, 0); | ||
860 | |||
861 | nvgpu_vfree(g, g->gr.ctx_vars.local_golden_image); | ||
862 | g->gr.ctx_vars.local_golden_image = NULL; | ||
863 | g->gr.ctx_vars.golden_image_initialized = false; | ||
864 | g->gr.ctx_vars.golden_image_size = 0; | ||
865 | /* Cause next poweron to reinit just gr */ | ||
866 | g->gr.sw_ready = false; | ||
867 | } | ||
868 | |||
869 | return count; | ||
870 | } | ||
871 | |||
872 | static ssize_t tpc_fs_mask_read(struct device *dev, | ||
873 | struct device_attribute *attr, char *buf) | ||
874 | { | ||
875 | struct gk20a *g = get_gk20a(dev); | ||
876 | struct gr_gk20a *gr = &g->gr; | ||
877 | u32 gpc_index; | ||
878 | u32 tpc_fs_mask = 0; | ||
879 | int err = 0; | ||
880 | |||
881 | err = gk20a_busy(g); | ||
882 | if (err) | ||
883 | return err; | ||
884 | |||
885 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
886 | if (g->ops.gr.get_gpc_tpc_mask) | ||
887 | tpc_fs_mask |= | ||
888 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index) << | ||
889 | (gr->max_tpc_per_gpc_count * gpc_index); | ||
890 | } | ||
891 | |||
892 | gk20a_idle(g); | ||
893 | |||
894 | return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask); | ||
895 | } | ||
896 | |||
897 | static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store); | ||
898 | |||
899 | static ssize_t min_timeslice_us_read(struct device *dev, | ||
900 | struct device_attribute *attr, char *buf) | ||
901 | { | ||
902 | struct gk20a *g = get_gk20a(dev); | ||
903 | |||
904 | return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us); | ||
905 | } | ||
906 | |||
907 | static ssize_t min_timeslice_us_store(struct device *dev, | ||
908 | struct device_attribute *attr, const char *buf, size_t count) | ||
909 | { | ||
910 | struct gk20a *g = get_gk20a(dev); | ||
911 | unsigned long val; | ||
912 | |||
913 | if (kstrtoul(buf, 10, &val) < 0) | ||
914 | return -EINVAL; | ||
915 | |||
916 | if (val > g->max_timeslice_us) | ||
917 | return -EINVAL; | ||
918 | |||
919 | g->min_timeslice_us = val; | ||
920 | |||
921 | return count; | ||
922 | } | ||
923 | |||
924 | static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read, | ||
925 | min_timeslice_us_store); | ||
926 | |||
927 | static ssize_t max_timeslice_us_read(struct device *dev, | ||
928 | struct device_attribute *attr, char *buf) | ||
929 | { | ||
930 | struct gk20a *g = get_gk20a(dev); | ||
931 | |||
932 | return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us); | ||
933 | } | ||
934 | |||
935 | static ssize_t max_timeslice_us_store(struct device *dev, | ||
936 | struct device_attribute *attr, const char *buf, size_t count) | ||
937 | { | ||
938 | struct gk20a *g = get_gk20a(dev); | ||
939 | unsigned long val; | ||
940 | |||
941 | if (kstrtoul(buf, 10, &val) < 0) | ||
942 | return -EINVAL; | ||
943 | |||
944 | if (val < g->min_timeslice_us) | ||
945 | return -EINVAL; | ||
946 | |||
947 | g->max_timeslice_us = val; | ||
948 | |||
949 | return count; | ||
950 | } | ||
951 | |||
952 | static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read, | ||
953 | max_timeslice_us_store); | ||
954 | |||
955 | static ssize_t czf_bypass_store(struct device *dev, | ||
956 | struct device_attribute *attr, const char *buf, size_t count) | ||
957 | { | ||
958 | struct gk20a *g = get_gk20a(dev); | ||
959 | unsigned long val; | ||
960 | |||
961 | if (kstrtoul(buf, 10, &val) < 0) | ||
962 | return -EINVAL; | ||
963 | |||
964 | if (val >= 4) | ||
965 | return -EINVAL; | ||
966 | |||
967 | g->gr.czf_bypass = val; | ||
968 | |||
969 | return count; | ||
970 | } | ||
971 | |||
972 | static ssize_t czf_bypass_read(struct device *dev, | ||
973 | struct device_attribute *attr, char *buf) | ||
974 | { | ||
975 | struct gk20a *g = get_gk20a(dev); | ||
976 | |||
977 | return sprintf(buf, "%d\n", g->gr.czf_bypass); | ||
978 | } | ||
979 | |||
980 | static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); | ||
981 | |||
982 | static ssize_t pd_max_batches_store(struct device *dev, | ||
983 | struct device_attribute *attr, const char *buf, size_t count) | ||
984 | { | ||
985 | struct gk20a *g = get_gk20a(dev); | ||
986 | unsigned long val; | ||
987 | |||
988 | if (kstrtoul(buf, 10, &val) < 0) | ||
989 | return -EINVAL; | ||
990 | |||
991 | if (val > 64) | ||
992 | return -EINVAL; | ||
993 | |||
994 | g->gr.pd_max_batches = val; | ||
995 | |||
996 | return count; | ||
997 | } | ||
998 | |||
999 | static ssize_t pd_max_batches_read(struct device *dev, | ||
1000 | struct device_attribute *attr, char *buf) | ||
1001 | { | ||
1002 | struct gk20a *g = get_gk20a(dev); | ||
1003 | |||
1004 | return sprintf(buf, "%d\n", g->gr.pd_max_batches); | ||
1005 | } | ||
1006 | |||
1007 | static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); | ||
1008 | |||
1009 | static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, | ||
1010 | struct device_attribute *attr, const char *buf, size_t count) | ||
1011 | { | ||
1012 | struct gk20a *g = get_gk20a(dev); | ||
1013 | struct gr_gk20a *gr = &g->gr; | ||
1014 | unsigned long val = 0; | ||
1015 | int err = -1; | ||
1016 | |||
1017 | if (kstrtoul(buf, 10, &val) < 0) | ||
1018 | return -EINVAL; | ||
1019 | |||
1020 | if (g->ops.gr.get_max_gfxp_wfi_timeout_count) { | ||
1021 | if (val >= g->ops.gr.get_max_gfxp_wfi_timeout_count(g)) | ||
1022 | return -EINVAL; | ||
1023 | } | ||
1024 | |||
1025 | gr->gfxp_wfi_timeout_count = val; | ||
1026 | |||
1027 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
1028 | err = gk20a_busy(g); | ||
1029 | if (err) | ||
1030 | return err; | ||
1031 | |||
1032 | err = gr_gk20a_elpg_protected_call(g, | ||
1033 | g->ops.gr.init_preemption_state(g)); | ||
1034 | |||
1035 | gk20a_idle(g); | ||
1036 | |||
1037 | if (err) | ||
1038 | return err; | ||
1039 | } | ||
1040 | return count; | ||
1041 | } | ||
1042 | |||
1043 | static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, | ||
1044 | struct device_attribute *attr, const char *buf, size_t count) | ||
1045 | { | ||
1046 | struct gk20a *g = get_gk20a(dev); | ||
1047 | struct gr_gk20a *gr = &g->gr; | ||
1048 | int err = -1; | ||
1049 | |||
1050 | if (count > 0 && buf[0] == 's') | ||
1051 | /* sysclk */ | ||
1052 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_SYSCLK; | ||
1053 | else | ||
1054 | /* usec */ | ||
1055 | gr->gfxp_wfi_timeout_unit = GFXP_WFI_TIMEOUT_UNIT_USEC; | ||
1056 | |||
1057 | if (g->ops.gr.init_preemption_state && g->power_on) { | ||
1058 | err = gk20a_busy(g); | ||
1059 | if (err) | ||
1060 | return err; | ||
1061 | |||
1062 | err = gr_gk20a_elpg_protected_call(g, | ||
1063 | g->ops.gr.init_preemption_state(g)); | ||
1064 | |||
1065 | gk20a_idle(g); | ||
1066 | |||
1067 | if (err) | ||
1068 | return err; | ||
1069 | } | ||
1070 | |||
1071 | return count; | ||
1072 | } | ||
1073 | |||
1074 | static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, | ||
1075 | struct device_attribute *attr, char *buf) | ||
1076 | { | ||
1077 | struct gk20a *g = get_gk20a(dev); | ||
1078 | struct gr_gk20a *gr = &g->gr; | ||
1079 | u32 val = gr->gfxp_wfi_timeout_count; | ||
1080 | |||
1081 | return snprintf(buf, PAGE_SIZE, "%d\n", val); | ||
1082 | } | ||
1083 | |||
1084 | static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, | ||
1085 | struct device_attribute *attr, char *buf) | ||
1086 | { | ||
1087 | struct gk20a *g = get_gk20a(dev); | ||
1088 | struct gr_gk20a *gr = &g->gr; | ||
1089 | |||
1090 | if (gr->gfxp_wfi_timeout_unit == GFXP_WFI_TIMEOUT_UNIT_USEC) | ||
1091 | return snprintf(buf, PAGE_SIZE, "usec\n"); | ||
1092 | else | ||
1093 | return snprintf(buf, PAGE_SIZE, "sysclk\n"); | ||
1094 | } | ||
1095 | |||
1096 | static DEVICE_ATTR(gfxp_wfi_timeout_count, (S_IRWXU|S_IRGRP|S_IROTH), | ||
1097 | gfxp_wfi_timeout_count_read, gfxp_wfi_timeout_count_store); | ||
1098 | |||
1099 | static DEVICE_ATTR(gfxp_wfi_timeout_unit, (S_IRWXU|S_IRGRP|S_IROTH), | ||
1100 | gfxp_wfi_timeout_unit_read, gfxp_wfi_timeout_unit_store); | ||
1101 | |||
1102 | void nvgpu_remove_sysfs(struct device *dev) | ||
1103 | { | ||
1104 | device_remove_file(dev, &dev_attr_elcg_enable); | ||
1105 | device_remove_file(dev, &dev_attr_blcg_enable); | ||
1106 | device_remove_file(dev, &dev_attr_slcg_enable); | ||
1107 | device_remove_file(dev, &dev_attr_ptimer_scale_factor); | ||
1108 | device_remove_file(dev, &dev_attr_ptimer_ref_freq); | ||
1109 | device_remove_file(dev, &dev_attr_ptimer_src_freq); | ||
1110 | device_remove_file(dev, &dev_attr_elpg_enable); | ||
1111 | device_remove_file(dev, &dev_attr_mscg_enable); | ||
1112 | device_remove_file(dev, &dev_attr_emc3d_ratio); | ||
1113 | device_remove_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
1114 | |||
1115 | device_remove_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
1116 | |||
1117 | device_remove_file(dev, &dev_attr_counters); | ||
1118 | device_remove_file(dev, &dev_attr_counters_reset); | ||
1119 | device_remove_file(dev, &dev_attr_load); | ||
1120 | device_remove_file(dev, &dev_attr_railgate_delay); | ||
1121 | device_remove_file(dev, &dev_attr_is_railgated); | ||
1122 | #ifdef CONFIG_PM | ||
1123 | device_remove_file(dev, &dev_attr_force_idle); | ||
1124 | device_remove_file(dev, &dev_attr_railgate_enable); | ||
1125 | #endif | ||
1126 | device_remove_file(dev, &dev_attr_aelpg_param); | ||
1127 | device_remove_file(dev, &dev_attr_aelpg_enable); | ||
1128 | device_remove_file(dev, &dev_attr_allow_all); | ||
1129 | device_remove_file(dev, &dev_attr_tpc_fs_mask); | ||
1130 | device_remove_file(dev, &dev_attr_min_timeslice_us); | ||
1131 | device_remove_file(dev, &dev_attr_max_timeslice_us); | ||
1132 | |||
1133 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1134 | nvgpu_nvhost_remove_symlink(get_gk20a(dev)); | ||
1135 | #endif | ||
1136 | |||
1137 | device_remove_file(dev, &dev_attr_czf_bypass); | ||
1138 | device_remove_file(dev, &dev_attr_pd_max_batches); | ||
1139 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
1140 | device_remove_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
1141 | |||
1142 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
1143 | struct kobject *kobj = &dev->kobj; | ||
1144 | struct device *parent = container_of((kobj->parent), | ||
1145 | struct device, kobj); | ||
1146 | sysfs_remove_link(&parent->kobj, "gpu.0"); | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | int nvgpu_create_sysfs(struct device *dev) | ||
1151 | { | ||
1152 | struct gk20a *g = get_gk20a(dev); | ||
1153 | int error = 0; | ||
1154 | |||
1155 | error |= device_create_file(dev, &dev_attr_elcg_enable); | ||
1156 | error |= device_create_file(dev, &dev_attr_blcg_enable); | ||
1157 | error |= device_create_file(dev, &dev_attr_slcg_enable); | ||
1158 | error |= device_create_file(dev, &dev_attr_ptimer_scale_factor); | ||
1159 | error |= device_create_file(dev, &dev_attr_ptimer_ref_freq); | ||
1160 | error |= device_create_file(dev, &dev_attr_ptimer_src_freq); | ||
1161 | error |= device_create_file(dev, &dev_attr_elpg_enable); | ||
1162 | error |= device_create_file(dev, &dev_attr_mscg_enable); | ||
1163 | error |= device_create_file(dev, &dev_attr_emc3d_ratio); | ||
1164 | error |= device_create_file(dev, &dev_attr_ldiv_slowdown_factor); | ||
1165 | |||
1166 | error |= device_create_file(dev, &dev_attr_fmax_at_vmin_safe); | ||
1167 | |||
1168 | error |= device_create_file(dev, &dev_attr_counters); | ||
1169 | error |= device_create_file(dev, &dev_attr_counters_reset); | ||
1170 | error |= device_create_file(dev, &dev_attr_load); | ||
1171 | error |= device_create_file(dev, &dev_attr_railgate_delay); | ||
1172 | error |= device_create_file(dev, &dev_attr_is_railgated); | ||
1173 | #ifdef CONFIG_PM | ||
1174 | error |= device_create_file(dev, &dev_attr_force_idle); | ||
1175 | error |= device_create_file(dev, &dev_attr_railgate_enable); | ||
1176 | #endif | ||
1177 | error |= device_create_file(dev, &dev_attr_aelpg_param); | ||
1178 | error |= device_create_file(dev, &dev_attr_aelpg_enable); | ||
1179 | error |= device_create_file(dev, &dev_attr_allow_all); | ||
1180 | error |= device_create_file(dev, &dev_attr_tpc_fs_mask); | ||
1181 | error |= device_create_file(dev, &dev_attr_min_timeslice_us); | ||
1182 | error |= device_create_file(dev, &dev_attr_max_timeslice_us); | ||
1183 | |||
1184 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
1185 | error |= nvgpu_nvhost_create_symlink(g); | ||
1186 | #endif | ||
1187 | |||
1188 | error |= device_create_file(dev, &dev_attr_czf_bypass); | ||
1189 | error |= device_create_file(dev, &dev_attr_pd_max_batches); | ||
1190 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_count); | ||
1191 | error |= device_create_file(dev, &dev_attr_gfxp_wfi_timeout_unit); | ||
1192 | |||
1193 | if (strcmp(dev_name(dev), "gpu.0")) { | ||
1194 | struct kobject *kobj = &dev->kobj; | ||
1195 | struct device *parent = container_of((kobj->parent), | ||
1196 | struct device, kobj); | ||
1197 | error |= sysfs_create_link(&parent->kobj, | ||
1198 | &dev->kobj, "gpu.0"); | ||
1199 | } | ||
1200 | |||
1201 | if (error) | ||
1202 | nvgpu_err(g, "Failed to create sysfs attributes!\n"); | ||
1203 | |||
1204 | return error; | ||
1205 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.h b/drivers/gpu/nvgpu/os/linux/sysfs.h new file mode 100644 index 00000000..80925844 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sysfs.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | #ifndef NVGPU_SYSFS_H | ||
17 | #define NVGPU_SYSFS_H | ||
18 | |||
19 | struct device; | ||
20 | |||
21 | int nvgpu_create_sysfs(struct device *dev); | ||
22 | void nvgpu_remove_sysfs(struct device *dev); | ||
23 | |||
24 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c new file mode 100644 index 00000000..92c556f2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/thread.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kthread.h> | ||
18 | |||
19 | #include <nvgpu/thread.h> | ||
20 | |||
21 | int nvgpu_thread_proxy(void *threaddata) | ||
22 | { | ||
23 | struct nvgpu_thread *thread = threaddata; | ||
24 | int ret = thread->fn(thread->data); | ||
25 | |||
26 | thread->running = false; | ||
27 | return ret; | ||
28 | } | ||
29 | |||
30 | int nvgpu_thread_create(struct nvgpu_thread *thread, | ||
31 | void *data, | ||
32 | int (*threadfn)(void *data), const char *name) | ||
33 | { | ||
34 | struct task_struct *task = kthread_create(nvgpu_thread_proxy, | ||
35 | thread, name); | ||
36 | if (IS_ERR(task)) | ||
37 | return PTR_ERR(task); | ||
38 | |||
39 | thread->task = task; | ||
40 | thread->fn = threadfn; | ||
41 | thread->data = data; | ||
42 | thread->running = true; | ||
43 | wake_up_process(task); | ||
44 | return 0; | ||
45 | }; | ||
46 | |||
47 | void nvgpu_thread_stop(struct nvgpu_thread *thread) | ||
48 | { | ||
49 | if (thread->task) { | ||
50 | kthread_stop(thread->task); | ||
51 | thread->task = NULL; | ||
52 | } | ||
53 | }; | ||
54 | |||
55 | bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) | ||
56 | { | ||
57 | return kthread_should_stop(); | ||
58 | }; | ||
59 | |||
60 | bool nvgpu_thread_is_running(struct nvgpu_thread *thread) | ||
61 | { | ||
62 | return ACCESS_ONCE(thread->running); | ||
63 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/timers.c b/drivers/gpu/nvgpu/os/linux/timers.c new file mode 100644 index 00000000..d1aa641f --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/timers.c | |||
@@ -0,0 +1,270 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/ktime.h> | ||
18 | #include <linux/delay.h> | ||
19 | |||
20 | #include <nvgpu/timers.h> | ||
21 | #include <nvgpu/soc.h> | ||
22 | |||
23 | #include "gk20a/gk20a.h" | ||
24 | |||
25 | #include "platform_gk20a.h" | ||
26 | |||
27 | /* | ||
28 | * Returns 1 if the platform is pre-Si and should ignore the timeout checking. | ||
29 | * Setting %NVGPU_TIMER_NO_PRE_SI will make this always return 0 (i.e do the | ||
30 | * timeout check regardless of platform). | ||
31 | */ | ||
32 | static int nvgpu_timeout_is_pre_silicon(struct nvgpu_timeout *timeout) | ||
33 | { | ||
34 | if (timeout->flags & NVGPU_TIMER_NO_PRE_SI) | ||
35 | return 0; | ||
36 | |||
37 | return !nvgpu_platform_is_silicon(timeout->g); | ||
38 | } | ||
39 | |||
40 | /** | ||
41 | * nvgpu_timeout_init - Init timer. | ||
42 | * | ||
43 | * @g - nvgpu device. | ||
44 | * @timeout - The timer. | ||
45 | * @duration - Timeout in milliseconds or number of retries. | ||
46 | * @flags - Flags for timer. | ||
47 | * | ||
48 | * This configures the timeout to start the timeout duration now, i.e: when this | ||
49 | * function is called. Available flags to pass to @flags: | ||
50 | * | ||
51 | * %NVGPU_TIMER_CPU_TIMER | ||
52 | * %NVGPU_TIMER_RETRY_TIMER | ||
53 | * %NVGPU_TIMER_NO_PRE_SI | ||
54 | * %NVGPU_TIMER_SILENT_TIMEOUT | ||
55 | * | ||
56 | * If neither %NVGPU_TIMER_CPU_TIMER or %NVGPU_TIMER_RETRY_TIMER is passed then | ||
57 | * a CPU timer is used by default. | ||
58 | */ | ||
59 | int nvgpu_timeout_init(struct gk20a *g, struct nvgpu_timeout *timeout, | ||
60 | u32 duration, unsigned long flags) | ||
61 | { | ||
62 | if (flags & ~NVGPU_TIMER_FLAG_MASK) | ||
63 | return -EINVAL; | ||
64 | |||
65 | memset(timeout, 0, sizeof(*timeout)); | ||
66 | |||
67 | timeout->g = g; | ||
68 | timeout->flags = flags; | ||
69 | |||
70 | if (flags & NVGPU_TIMER_RETRY_TIMER) | ||
71 | timeout->retries.max = duration; | ||
72 | else | ||
73 | timeout->time = ktime_to_ns(ktime_add_ns(ktime_get(), | ||
74 | (s64)NSEC_PER_MSEC * duration)); | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int __nvgpu_timeout_expired_msg_cpu(struct nvgpu_timeout *timeout, | ||
80 | void *caller, | ||
81 | const char *fmt, va_list args) | ||
82 | { | ||
83 | struct gk20a *g = timeout->g; | ||
84 | ktime_t now = ktime_get(); | ||
85 | |||
86 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
87 | return 0; | ||
88 | |||
89 | if (ktime_after(now, ns_to_ktime(timeout->time))) { | ||
90 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
91 | char buf[128]; | ||
92 | |||
93 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
94 | |||
95 | nvgpu_err(g, "Timeout detected @ %pF %s", caller, buf); | ||
96 | } | ||
97 | |||
98 | return -ETIMEDOUT; | ||
99 | } | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int __nvgpu_timeout_expired_msg_retry(struct nvgpu_timeout *timeout, | ||
105 | void *caller, | ||
106 | const char *fmt, va_list args) | ||
107 | { | ||
108 | struct gk20a *g = timeout->g; | ||
109 | |||
110 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
111 | return 0; | ||
112 | |||
113 | if (timeout->retries.attempted >= timeout->retries.max) { | ||
114 | if (!(timeout->flags & NVGPU_TIMER_SILENT_TIMEOUT)) { | ||
115 | char buf[128]; | ||
116 | |||
117 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
118 | |||
119 | nvgpu_err(g, "No more retries @ %pF %s", caller, buf); | ||
120 | } | ||
121 | |||
122 | return -ETIMEDOUT; | ||
123 | } | ||
124 | |||
125 | timeout->retries.attempted++; | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * __nvgpu_timeout_expired_msg - Check if a timeout has expired. | ||
132 | * | ||
133 | * @timeout - The timeout to check. | ||
134 | * @caller - Address of the caller of this function. | ||
135 | * @fmt - The fmt string. | ||
136 | * | ||
137 | * Returns -ETIMEDOUT if the timeout has expired, 0 otherwise. | ||
138 | * | ||
139 | * If a timeout occurs and %NVGPU_TIMER_SILENT_TIMEOUT is not set in the timeout | ||
140 | * then a message is printed based on %fmt. | ||
141 | */ | ||
142 | int __nvgpu_timeout_expired_msg(struct nvgpu_timeout *timeout, | ||
143 | void *caller, const char *fmt, ...) | ||
144 | { | ||
145 | int ret; | ||
146 | va_list args; | ||
147 | |||
148 | va_start(args, fmt); | ||
149 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
150 | ret = __nvgpu_timeout_expired_msg_retry(timeout, caller, fmt, | ||
151 | args); | ||
152 | else | ||
153 | ret = __nvgpu_timeout_expired_msg_cpu(timeout, caller, fmt, | ||
154 | args); | ||
155 | va_end(args); | ||
156 | |||
157 | return ret; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * nvgpu_timeout_peek_expired - Check the status of a timeout. | ||
162 | * | ||
163 | * @timeout - The timeout to check. | ||
164 | * | ||
165 | * Returns non-zero if the timeout is expired, zero otherwise. In the case of | ||
166 | * retry timers this will not increment the underlying retry count. Also if the | ||
167 | * timer has expired no messages will be printed. | ||
168 | * | ||
169 | * This function honors the pre-Si check as well. | ||
170 | */ | ||
171 | int nvgpu_timeout_peek_expired(struct nvgpu_timeout *timeout) | ||
172 | { | ||
173 | if (nvgpu_timeout_is_pre_silicon(timeout)) | ||
174 | return 0; | ||
175 | |||
176 | if (timeout->flags & NVGPU_TIMER_RETRY_TIMER) | ||
177 | return timeout->retries.attempted >= timeout->retries.max; | ||
178 | else | ||
179 | return ktime_after(ktime_get(), ns_to_ktime(timeout->time)); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nvgpu_udelay - Delay for some number of microseconds. | ||
184 | * | ||
185 | * @usecs - Microseconds to wait for. | ||
186 | * | ||
187 | * Wait for at least @usecs microseconds. This is not guaranteed to be perfectly | ||
188 | * accurate. This is normally backed by a busy-loop so this means waits should | ||
189 | * be kept short, below 100us. If longer delays are necessary then | ||
190 | * nvgpu_msleep() should be preferred. | ||
191 | * | ||
192 | * Alternatively, on some platforms, nvgpu_usleep_range() is usable. This | ||
193 | * function will attempt to not use a busy-loop. | ||
194 | */ | ||
195 | void nvgpu_udelay(unsigned int usecs) | ||
196 | { | ||
197 | udelay(usecs); | ||
198 | } | ||
199 | |||
200 | /** | ||
201 | * nvgpu_usleep_range - Sleep for a range of microseconds. | ||
202 | * | ||
203 | * @min_us - Minimum wait time. | ||
204 | * @max_us - Maximum wait time. | ||
205 | * | ||
206 | * Wait for some number of microseconds between @min_us and @max_us. This, | ||
207 | * unlike nvgpu_udelay(), will attempt to sleep for the passed number of | ||
208 | * microseconds instead of busy looping. Not all platforms support this, | ||
209 | * and in that case this reduces to nvgpu_udelay(min_us). | ||
210 | * | ||
211 | * Linux note: this is not safe to use in atomic context. If you are in | ||
212 | * atomic context you must use nvgpu_udelay(). | ||
213 | */ | ||
214 | void nvgpu_usleep_range(unsigned int min_us, unsigned int max_us) | ||
215 | { | ||
216 | usleep_range(min_us, max_us); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * nvgpu_msleep - Sleep for some milliseconds. | ||
221 | * | ||
222 | * @msecs - Sleep for at least this many milliseconds. | ||
223 | * | ||
224 | * Sleep for at least @msecs of milliseconds. For small @msecs (less than 20 ms | ||
225 | * or so) the sleep will be significantly longer due to scheduling overhead and | ||
226 | * mechanics. | ||
227 | */ | ||
228 | void nvgpu_msleep(unsigned int msecs) | ||
229 | { | ||
230 | msleep(msecs); | ||
231 | } | ||
232 | |||
233 | /** | ||
234 | * nvgpu_current_time_ms - Time in milliseconds from a monotonic clock. | ||
235 | * | ||
236 | * Return a clock in millisecond units. The start time of the clock is | ||
237 | * unspecified; the time returned can be compared with older ones to measure | ||
238 | * durations. The source clock does not jump when the system clock is adjusted. | ||
239 | */ | ||
240 | s64 nvgpu_current_time_ms(void) | ||
241 | { | ||
242 | return ktime_to_ms(ktime_get()); | ||
243 | } | ||
244 | |||
245 | /** | ||
246 | * nvgpu_current_time_ns - Time in nanoseconds from a monotonic clock. | ||
247 | * | ||
248 | * Return a clock in nanosecond units. The start time of the clock is | ||
249 | * unspecified; the time returned can be compared with older ones to measure | ||
250 | * durations. The source clock does not jump when the system clock is adjusted. | ||
251 | */ | ||
252 | s64 nvgpu_current_time_ns(void) | ||
253 | { | ||
254 | return ktime_to_ns(ktime_get()); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * nvgpu_hr_timestamp - Opaque 'high resolution' time stamp. | ||
259 | * | ||
260 | * Return a "high resolution" time stamp. It does not really matter exactly what | ||
261 | * it is, so long as it generally returns unique values and monotonically | ||
262 | * increases - wrap around _is_ possible though in a system running for long | ||
263 | * enough. | ||
264 | * | ||
265 | * Note: what high resolution means is system dependent. | ||
266 | */ | ||
267 | u64 nvgpu_hr_timestamp(void) | ||
268 | { | ||
269 | return get_cycles(); | ||
270 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c new file mode 100644 index 00000000..9f6017d3 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Clock Interface | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <nvgpu/vgpu/vgpu.h> | ||
20 | |||
21 | #include "gk20a/gk20a.h" | ||
22 | #include "clk_vgpu.h" | ||
23 | #include "ctrl/ctrlclk.h" | ||
24 | #include "os/linux/platform_gk20a.h" | ||
25 | |||
26 | static unsigned long | ||
27 | vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; | ||
28 | |||
29 | static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain) | ||
30 | { | ||
31 | struct tegra_vgpu_cmd_msg msg = {}; | ||
32 | struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; | ||
33 | int err; | ||
34 | unsigned long ret = 0; | ||
35 | |||
36 | nvgpu_log_fn(g, " "); | ||
37 | |||
38 | switch (api_domain) { | ||
39 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
40 | msg.cmd = TEGRA_VGPU_CMD_GET_GPU_CLK_RATE; | ||
41 | msg.handle = vgpu_get_handle(g); | ||
42 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
43 | err = err ? err : msg.ret; | ||
44 | if (err) | ||
45 | nvgpu_err(g, "%s failed - %d", __func__, err); | ||
46 | else | ||
47 | /* return frequency in Hz */ | ||
48 | ret = p->rate * 1000; | ||
49 | break; | ||
50 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
51 | nvgpu_err(g, "unsupported clock: %u", api_domain); | ||
52 | break; | ||
53 | default: | ||
54 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
55 | break; | ||
56 | } | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static int vgpu_clk_set_rate(struct gk20a *g, | ||
62 | u32 api_domain, unsigned long rate) | ||
63 | { | ||
64 | struct tegra_vgpu_cmd_msg msg = {}; | ||
65 | struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; | ||
66 | int err = -EINVAL; | ||
67 | |||
68 | nvgpu_log_fn(g, " "); | ||
69 | |||
70 | switch (api_domain) { | ||
71 | case CTRL_CLK_DOMAIN_GPCCLK: | ||
72 | msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE; | ||
73 | msg.handle = vgpu_get_handle(g); | ||
74 | |||
75 | /* server dvfs framework requires frequency in kHz */ | ||
76 | p->rate = (u32)(rate / 1000); | ||
77 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
78 | err = err ? err : msg.ret; | ||
79 | if (err) | ||
80 | nvgpu_err(g, "%s failed - %d", __func__, err); | ||
81 | break; | ||
82 | case CTRL_CLK_DOMAIN_PWRCLK: | ||
83 | nvgpu_err(g, "unsupported clock: %u", api_domain); | ||
84 | break; | ||
85 | default: | ||
86 | nvgpu_err(g, "unknown clock: %u", api_domain); | ||
87 | break; | ||
88 | } | ||
89 | |||
90 | return err; | ||
91 | } | ||
92 | |||
93 | static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain) | ||
94 | { | ||
95 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | ||
96 | |||
97 | return priv->constants.max_freq; | ||
98 | } | ||
99 | |||
100 | void vgpu_init_clk_support(struct gk20a *g) | ||
101 | { | ||
102 | g->ops.clk.get_rate = vgpu_clk_get_rate; | ||
103 | g->ops.clk.set_rate = vgpu_clk_set_rate; | ||
104 | g->ops.clk.get_maxrate = vgpu_clk_get_maxrate; | ||
105 | } | ||
106 | |||
107 | long vgpu_clk_round_rate(struct device *dev, unsigned long rate) | ||
108 | { | ||
109 | /* server will handle frequency rounding */ | ||
110 | return rate; | ||
111 | } | ||
112 | |||
113 | int vgpu_clk_get_freqs(struct device *dev, | ||
114 | unsigned long **freqs, int *num_freqs) | ||
115 | { | ||
116 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
117 | struct gk20a *g = platform->g; | ||
118 | struct tegra_vgpu_cmd_msg msg = {}; | ||
119 | struct tegra_vgpu_get_gpu_freq_table_params *p = | ||
120 | &msg.params.get_gpu_freq_table; | ||
121 | unsigned int i; | ||
122 | int err; | ||
123 | |||
124 | nvgpu_log_fn(g, " "); | ||
125 | |||
126 | msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE; | ||
127 | msg.handle = vgpu_get_handle(g); | ||
128 | |||
129 | p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE; | ||
130 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
131 | err = err ? err : msg.ret; | ||
132 | if (err) { | ||
133 | nvgpu_err(g, "%s failed - %d", __func__, err); | ||
134 | return err; | ||
135 | } | ||
136 | |||
137 | /* return frequency in Hz */ | ||
138 | for (i = 0; i < p->num_freqs; i++) | ||
139 | vgpu_freq_table[i] = p->freqs[i] * 1000; | ||
140 | |||
141 | *freqs = vgpu_freq_table; | ||
142 | *num_freqs = p->num_freqs; | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | int vgpu_clk_cap_rate(struct device *dev, unsigned long rate) | ||
148 | { | ||
149 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
150 | struct gk20a *g = platform->g; | ||
151 | struct tegra_vgpu_cmd_msg msg = {}; | ||
152 | struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate; | ||
153 | int err = 0; | ||
154 | |||
155 | nvgpu_log_fn(g, " "); | ||
156 | |||
157 | msg.cmd = TEGRA_VGPU_CMD_CAP_GPU_CLK_RATE; | ||
158 | msg.handle = vgpu_get_handle(g); | ||
159 | p->rate = (u32)rate; | ||
160 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
161 | err = err ? err : msg.ret; | ||
162 | if (err) { | ||
163 | nvgpu_err(g, "%s failed - %d", __func__, err); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | return 0; | ||
168 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h new file mode 100644 index 00000000..8d477643 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/clk_vgpu.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Clock Interface | ||
3 | * | ||
4 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CLK_VIRT_H_ | ||
20 | #define _CLK_VIRT_H_ | ||
21 | |||
22 | void vgpu_init_clk_support(struct gk20a *g); | ||
23 | long vgpu_clk_round_rate(struct device *dev, unsigned long rate); | ||
24 | int vgpu_clk_get_freqs(struct device *dev, | ||
25 | unsigned long **freqs, int *num_freqs); | ||
26 | int vgpu_clk_cap_rate(struct device *dev, unsigned long rate); | ||
27 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c new file mode 100644 index 00000000..6339aef9 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/fecs_trace_vgpu.c | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <uapi/linux/nvgpu.h> | ||
18 | |||
19 | #include <nvgpu/kmem.h> | ||
20 | #include <nvgpu/bug.h> | ||
21 | #include <nvgpu/enabled.h> | ||
22 | #include <nvgpu/ctxsw_trace.h> | ||
23 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
24 | #include <nvgpu/vgpu/tegra_vgpu.h> | ||
25 | #include <nvgpu/vgpu/vgpu.h> | ||
26 | |||
27 | #include "gk20a/gk20a.h" | ||
28 | #include "os/linux/os_linux.h" | ||
29 | #include "vgpu/fecs_trace_vgpu.h" | ||
30 | |||
31 | struct vgpu_fecs_trace { | ||
32 | struct tegra_hv_ivm_cookie *cookie; | ||
33 | struct nvgpu_ctxsw_ring_header *header; | ||
34 | struct nvgpu_ctxsw_trace_entry *entries; | ||
35 | int num_entries; | ||
36 | bool enabled; | ||
37 | void *buf; | ||
38 | }; | ||
39 | |||
40 | int vgpu_fecs_trace_init(struct gk20a *g) | ||
41 | { | ||
42 | struct device *dev = dev_from_gk20a(g); | ||
43 | struct device_node *np = dev->of_node; | ||
44 | struct of_phandle_args args; | ||
45 | struct vgpu_fecs_trace *vcst; | ||
46 | u32 mempool; | ||
47 | int err; | ||
48 | |||
49 | nvgpu_log_fn(g, " "); | ||
50 | |||
51 | vcst = nvgpu_kzalloc(g, sizeof(*vcst)); | ||
52 | if (!vcst) | ||
53 | return -ENOMEM; | ||
54 | |||
55 | err = of_parse_phandle_with_fixed_args(np, | ||
56 | "mempool-fecs-trace", 1, 0, &args); | ||
57 | if (err) { | ||
58 | nvgpu_info(g, "does not support fecs trace"); | ||
59 | goto fail; | ||
60 | } | ||
61 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true); | ||
62 | |||
63 | mempool = args.args[0]; | ||
64 | vcst->cookie = vgpu_ivm_mempool_reserve(mempool); | ||
65 | if (IS_ERR(vcst->cookie)) { | ||
66 | nvgpu_info(g, | ||
67 | "mempool %u reserve failed", mempool); | ||
68 | vcst->cookie = NULL; | ||
69 | err = -EINVAL; | ||
70 | goto fail; | ||
71 | } | ||
72 | |||
73 | vcst->buf = ioremap_cache(vgpu_ivm_get_ipa(vcst->cookie), | ||
74 | vgpu_ivm_get_size(vcst->cookie)); | ||
75 | if (!vcst->buf) { | ||
76 | nvgpu_info(g, "ioremap_cache failed"); | ||
77 | err = -EINVAL; | ||
78 | goto fail; | ||
79 | } | ||
80 | vcst->header = vcst->buf; | ||
81 | vcst->num_entries = vcst->header->num_ents; | ||
82 | if (unlikely(vcst->header->ent_size != sizeof(*vcst->entries))) { | ||
83 | nvgpu_err(g, "entry size mismatch"); | ||
84 | goto fail; | ||
85 | } | ||
86 | vcst->entries = vcst->buf + sizeof(*vcst->header); | ||
87 | g->fecs_trace = (struct gk20a_fecs_trace *)vcst; | ||
88 | |||
89 | return 0; | ||
90 | fail: | ||
91 | iounmap(vcst->buf); | ||
92 | if (vcst->cookie) | ||
93 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
94 | nvgpu_kfree(g, vcst); | ||
95 | return err; | ||
96 | } | ||
97 | |||
98 | int vgpu_fecs_trace_deinit(struct gk20a *g) | ||
99 | { | ||
100 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
101 | |||
102 | iounmap(vcst->buf); | ||
103 | vgpu_ivm_mempool_unreserve(vcst->cookie); | ||
104 | nvgpu_kfree(g, vcst); | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | int vgpu_fecs_trace_enable(struct gk20a *g) | ||
109 | { | ||
110 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
111 | struct tegra_vgpu_cmd_msg msg = { | ||
112 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_ENABLE, | ||
113 | .handle = vgpu_get_handle(g), | ||
114 | }; | ||
115 | int err; | ||
116 | |||
117 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
118 | err = err ? err : msg.ret; | ||
119 | WARN_ON(err); | ||
120 | vcst->enabled = !err; | ||
121 | return err; | ||
122 | } | ||
123 | |||
124 | int vgpu_fecs_trace_disable(struct gk20a *g) | ||
125 | { | ||
126 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
127 | struct tegra_vgpu_cmd_msg msg = { | ||
128 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_DISABLE, | ||
129 | .handle = vgpu_get_handle(g), | ||
130 | }; | ||
131 | int err; | ||
132 | |||
133 | vcst->enabled = false; | ||
134 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
135 | err = err ? err : msg.ret; | ||
136 | WARN_ON(err); | ||
137 | return err; | ||
138 | } | ||
139 | |||
140 | bool vgpu_fecs_trace_is_enabled(struct gk20a *g) | ||
141 | { | ||
142 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
143 | |||
144 | return (vcst && vcst->enabled); | ||
145 | } | ||
146 | |||
147 | int vgpu_fecs_trace_poll(struct gk20a *g) | ||
148 | { | ||
149 | struct tegra_vgpu_cmd_msg msg = { | ||
150 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_POLL, | ||
151 | .handle = vgpu_get_handle(g), | ||
152 | }; | ||
153 | int err; | ||
154 | |||
155 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
156 | err = err ? err : msg.ret; | ||
157 | WARN_ON(err); | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | int vgpu_alloc_user_buffer(struct gk20a *g, void **buf, size_t *size) | ||
162 | { | ||
163 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
164 | |||
165 | *buf = vcst->buf; | ||
166 | *size = vgpu_ivm_get_size(vcst->cookie); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | int vgpu_free_user_buffer(struct gk20a *g) | ||
171 | { | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | int vgpu_mmap_user_buffer(struct gk20a *g, struct vm_area_struct *vma) | ||
176 | { | ||
177 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
178 | unsigned long size = vgpu_ivm_get_size(vcst->cookie); | ||
179 | unsigned long vsize = vma->vm_end - vma->vm_start; | ||
180 | |||
181 | size = min(size, vsize); | ||
182 | size = round_up(size, PAGE_SIZE); | ||
183 | |||
184 | return remap_pfn_range(vma, vma->vm_start, | ||
185 | vgpu_ivm_get_ipa(vcst->cookie) >> PAGE_SHIFT, | ||
186 | size, | ||
187 | vma->vm_page_prot); | ||
188 | } | ||
189 | |||
190 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
191 | int vgpu_fecs_trace_max_entries(struct gk20a *g, | ||
192 | struct nvgpu_ctxsw_trace_filter *filter) | ||
193 | { | ||
194 | struct vgpu_fecs_trace *vcst = (struct vgpu_fecs_trace *)g->fecs_trace; | ||
195 | |||
196 | return vcst->header->num_ents; | ||
197 | } | ||
198 | |||
199 | #if NVGPU_CTXSW_FILTER_SIZE != TEGRA_VGPU_FECS_TRACE_FILTER_SIZE | ||
200 | #error "FECS trace filter size mismatch!" | ||
201 | #endif | ||
202 | |||
203 | int vgpu_fecs_trace_set_filter(struct gk20a *g, | ||
204 | struct nvgpu_ctxsw_trace_filter *filter) | ||
205 | { | ||
206 | struct tegra_vgpu_cmd_msg msg = { | ||
207 | .cmd = TEGRA_VGPU_CMD_FECS_TRACE_SET_FILTER, | ||
208 | .handle = vgpu_get_handle(g), | ||
209 | }; | ||
210 | struct tegra_vgpu_fecs_trace_filter *p = &msg.params.fecs_trace_filter; | ||
211 | int err; | ||
212 | |||
213 | memcpy(&p->tag_bits, &filter->tag_bits, sizeof(p->tag_bits)); | ||
214 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
215 | err = err ? err : msg.ret; | ||
216 | WARN_ON(err); | ||
217 | return err; | ||
218 | } | ||
219 | |||
220 | void vgpu_fecs_trace_data_update(struct gk20a *g) | ||
221 | { | ||
222 | gk20a_ctxsw_trace_wake_up(g, 0); | ||
223 | } | ||
224 | #endif /* CONFIG_GK20A_CTXSW_TRACE */ | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c new file mode 100644 index 00000000..66911626 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/platform_device.h> | ||
18 | |||
19 | #include <nvgpu/nvhost.h> | ||
20 | |||
21 | #include "gk20a/gk20a.h" | ||
22 | #include "os/linux/vgpu/clk_vgpu.h" | ||
23 | #include "os/linux/platform_gk20a.h" | ||
24 | #include "os/linux/os_linux.h" | ||
25 | |||
26 | static int gv11b_vgpu_probe(struct device *dev) | ||
27 | { | ||
28 | struct platform_device *pdev = to_platform_device(dev); | ||
29 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
30 | struct resource *r; | ||
31 | void __iomem *regs; | ||
32 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(platform->g); | ||
33 | struct gk20a *g = platform->g; | ||
34 | int ret; | ||
35 | |||
36 | r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "usermode"); | ||
37 | if (!r) { | ||
38 | nvgpu_err(g, "failed to get usermode regs"); | ||
39 | return -ENXIO; | ||
40 | } | ||
41 | regs = devm_ioremap_resource(dev, r); | ||
42 | if (IS_ERR(regs)) { | ||
43 | nvgpu_err(g, "failed to map usermode regs"); | ||
44 | return PTR_ERR(regs); | ||
45 | } | ||
46 | l->usermode_regs = regs; | ||
47 | |||
48 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
49 | ret = nvgpu_get_nvhost_dev(g); | ||
50 | if (ret) { | ||
51 | l->usermode_regs = NULL; | ||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | ret = nvgpu_nvhost_syncpt_unit_interface_get_aperture(g->nvhost_dev, | ||
56 | &g->syncpt_unit_base, | ||
57 | &g->syncpt_unit_size); | ||
58 | if (ret) { | ||
59 | nvgpu_err(g, "Failed to get syncpt interface"); | ||
60 | return -ENOSYS; | ||
61 | } | ||
62 | g->syncpt_size = nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1); | ||
63 | nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n", | ||
64 | g->syncpt_unit_base, g->syncpt_unit_size, g->syncpt_size); | ||
65 | #endif | ||
66 | vgpu_init_clk_support(platform->g); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | struct gk20a_platform gv11b_vgpu_tegra_platform = { | ||
72 | .has_syncpoints = true, | ||
73 | |||
74 | /* power management configuration */ | ||
75 | .can_railgate_init = false, | ||
76 | .can_elpg_init = false, | ||
77 | .enable_slcg = false, | ||
78 | .enable_blcg = false, | ||
79 | .enable_elcg = false, | ||
80 | .enable_elpg = false, | ||
81 | .enable_aelpg = false, | ||
82 | .can_slcg = false, | ||
83 | .can_blcg = false, | ||
84 | .can_elcg = false, | ||
85 | |||
86 | .ch_wdt_timeout_ms = 5000, | ||
87 | |||
88 | .probe = gv11b_vgpu_probe, | ||
89 | |||
90 | .clk_round_rate = vgpu_clk_round_rate, | ||
91 | .get_clk_freqs = vgpu_clk_get_freqs, | ||
92 | |||
93 | /* frequency scaling configuration */ | ||
94 | .devfreq_governor = "userspace", | ||
95 | |||
96 | .virtual_dev = true, | ||
97 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c new file mode 100644 index 00000000..e4819e7d --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Tegra Virtualized GPU Platform Interface | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <nvgpu/nvhost.h> | ||
20 | |||
21 | #include "gk20a/gk20a.h" | ||
22 | #include "os/linux/platform_gk20a.h" | ||
23 | #include "clk_vgpu.h" | ||
24 | |||
25 | static int gk20a_tegra_probe(struct device *dev) | ||
26 | { | ||
27 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
28 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
29 | int ret; | ||
30 | |||
31 | ret = nvgpu_get_nvhost_dev(platform->g); | ||
32 | if (ret) | ||
33 | return ret; | ||
34 | |||
35 | vgpu_init_clk_support(platform->g); | ||
36 | return 0; | ||
37 | #else | ||
38 | return 0; | ||
39 | #endif | ||
40 | } | ||
41 | |||
42 | struct gk20a_platform vgpu_tegra_platform = { | ||
43 | .has_syncpoints = true, | ||
44 | .aggressive_sync_destroy_thresh = 64, | ||
45 | |||
46 | /* power management configuration */ | ||
47 | .can_railgate_init = false, | ||
48 | .can_elpg_init = false, | ||
49 | .enable_slcg = false, | ||
50 | .enable_blcg = false, | ||
51 | .enable_elcg = false, | ||
52 | .enable_elpg = false, | ||
53 | .enable_aelpg = false, | ||
54 | .can_slcg = false, | ||
55 | .can_blcg = false, | ||
56 | .can_elcg = false, | ||
57 | |||
58 | .ch_wdt_timeout_ms = 5000, | ||
59 | |||
60 | .probe = gk20a_tegra_probe, | ||
61 | |||
62 | .clk_round_rate = vgpu_clk_round_rate, | ||
63 | .get_clk_freqs = vgpu_clk_get_freqs, | ||
64 | |||
65 | /* frequency scaling configuration */ | ||
66 | .devfreq_governor = "userspace", | ||
67 | |||
68 | .virtual_dev = true, | ||
69 | }; | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c new file mode 100644 index 00000000..57aad4b4 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/device.h> | ||
18 | #include <nvgpu/vgpu/vgpu.h> | ||
19 | |||
20 | #include "os/linux/platform_gk20a.h" | ||
21 | |||
22 | static ssize_t vgpu_load_show(struct device *dev, | ||
23 | struct device_attribute *attr, | ||
24 | char *buf) | ||
25 | { | ||
26 | struct gk20a *g = get_gk20a(dev); | ||
27 | struct tegra_vgpu_cmd_msg msg = {0}; | ||
28 | struct tegra_vgpu_gpu_load_params *p = &msg.params.gpu_load; | ||
29 | int err; | ||
30 | |||
31 | msg.cmd = TEGRA_VGPU_CMD_GET_GPU_LOAD; | ||
32 | msg.handle = vgpu_get_handle(g); | ||
33 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
34 | if (err) | ||
35 | return err; | ||
36 | |||
37 | return snprintf(buf, PAGE_SIZE, "%u\n", p->load); | ||
38 | } | ||
39 | static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); | ||
40 | |||
41 | void vgpu_create_sysfs(struct device *dev) | ||
42 | { | ||
43 | if (device_create_file(dev, &dev_attr_load)) | ||
44 | dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); | ||
45 | } | ||
46 | |||
47 | void vgpu_remove_sysfs(struct device *dev) | ||
48 | { | ||
49 | device_remove_file(dev, &dev_attr_load); | ||
50 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c new file mode 100644 index 00000000..950f0d49 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivc.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/types.h> | ||
18 | #include <linux/tegra_gr_comm.h> | ||
19 | |||
20 | #include "os/linux/os_linux.h" | ||
21 | |||
22 | int vgpu_ivc_init(struct gk20a *g, u32 elems, | ||
23 | const size_t *queue_sizes, u32 queue_start, u32 num_queues) | ||
24 | { | ||
25 | struct platform_device *pdev = to_platform_device(dev_from_gk20a(g)); | ||
26 | |||
27 | return tegra_gr_comm_init(pdev, elems, queue_sizes, queue_start, | ||
28 | num_queues); | ||
29 | } | ||
30 | |||
31 | void vgpu_ivc_deinit(u32 queue_start, u32 num_queues) | ||
32 | { | ||
33 | tegra_gr_comm_deinit(queue_start, num_queues); | ||
34 | } | ||
35 | |||
36 | void vgpu_ivc_release(void *handle) | ||
37 | { | ||
38 | tegra_gr_comm_release(handle); | ||
39 | } | ||
40 | |||
41 | u32 vgpu_ivc_get_server_vmid(void) | ||
42 | { | ||
43 | return tegra_gr_comm_get_server_vmid(); | ||
44 | } | ||
45 | |||
46 | int vgpu_ivc_recv(u32 index, void **handle, void **data, | ||
47 | size_t *size, u32 *sender) | ||
48 | { | ||
49 | return tegra_gr_comm_recv(index, handle, data, size, sender); | ||
50 | } | ||
51 | |||
52 | int vgpu_ivc_send(u32 peer, u32 index, void *data, size_t size) | ||
53 | { | ||
54 | return tegra_gr_comm_send(peer, index, data, size); | ||
55 | } | ||
56 | |||
57 | int vgpu_ivc_sendrecv(u32 peer, u32 index, void **handle, | ||
58 | void **data, size_t *size) | ||
59 | { | ||
60 | return tegra_gr_comm_sendrecv(peer, index, handle, data, size); | ||
61 | } | ||
62 | |||
63 | u32 vgpu_ivc_get_peer_self(void) | ||
64 | { | ||
65 | return TEGRA_GR_COMM_ID_SELF; | ||
66 | } | ||
67 | |||
68 | void *vgpu_ivc_oob_get_ptr(u32 peer, u32 index, void **ptr, | ||
69 | size_t *size) | ||
70 | { | ||
71 | return tegra_gr_comm_oob_get_ptr(peer, index, ptr, size); | ||
72 | } | ||
73 | |||
74 | void vgpu_ivc_oob_put_ptr(void *handle) | ||
75 | { | ||
76 | tegra_gr_comm_oob_put_ptr(handle); | ||
77 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c new file mode 100644 index 00000000..bbd444da --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_ivm.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/vgpu/vgpu_ivm.h> | ||
18 | |||
19 | #include <linux/tegra-ivc.h> | ||
20 | |||
21 | #include "os/linux/os_linux.h" | ||
22 | |||
23 | struct tegra_hv_ivm_cookie *vgpu_ivm_mempool_reserve(unsigned int id) | ||
24 | { | ||
25 | return tegra_hv_mempool_reserve(id); | ||
26 | } | ||
27 | |||
28 | int vgpu_ivm_mempool_unreserve(struct tegra_hv_ivm_cookie *cookie) | ||
29 | { | ||
30 | return tegra_hv_mempool_unreserve(cookie); | ||
31 | } | ||
32 | |||
33 | u64 vgpu_ivm_get_ipa(struct tegra_hv_ivm_cookie *cookie) | ||
34 | { | ||
35 | return cookie->ipa; | ||
36 | } | ||
37 | |||
38 | u64 vgpu_ivm_get_size(struct tegra_hv_ivm_cookie *cookie) | ||
39 | { | ||
40 | return cookie->size; | ||
41 | } | ||
42 | |||
43 | void *vgpu_ivm_mempool_map(struct tegra_hv_ivm_cookie *cookie) | ||
44 | { | ||
45 | return ioremap_cache(vgpu_ivm_get_ipa(cookie), | ||
46 | vgpu_ivm_get_size(cookie)); | ||
47 | } | ||
48 | |||
49 | void vgpu_ivm_mempool_unmap(struct tegra_hv_ivm_cookie *cookie, | ||
50 | void *addr) | ||
51 | { | ||
52 | iounmap(addr); | ||
53 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c new file mode 100644 index 00000000..a7612e54 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c | |||
@@ -0,0 +1,475 @@ | |||
1 | /* | ||
2 | * Virtualized GPU for Linux | ||
3 | * | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/mm.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/pm_runtime.h> | ||
22 | #include <linux/pm_qos.h> | ||
23 | #include <linux/platform_device.h> | ||
24 | #include <soc/tegra/chip-id.h> | ||
25 | |||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/debug.h> | ||
30 | #include <nvgpu/soc.h> | ||
31 | #include <nvgpu/ctxsw_trace.h> | ||
32 | #include <nvgpu/defaults.h> | ||
33 | #include <nvgpu/ltc.h> | ||
34 | |||
35 | #include "vgpu_linux.h" | ||
36 | #include "vgpu/fecs_trace_vgpu.h" | ||
37 | #include "clk_vgpu.h" | ||
38 | #include "gk20a/tsg_gk20a.h" | ||
39 | #include "gk20a/channel_gk20a.h" | ||
40 | #include "gk20a/regops_gk20a.h" | ||
41 | #include "gm20b/hal_gm20b.h" | ||
42 | |||
43 | #include "os/linux/module.h" | ||
44 | #include "os/linux/os_linux.h" | ||
45 | #include "os/linux/ioctl.h" | ||
46 | #include "os/linux/scale.h" | ||
47 | #include "os/linux/driver_common.h" | ||
48 | #include "os/linux/platform_gk20a.h" | ||
49 | |||
50 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> | ||
51 | |||
52 | struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g) | ||
53 | { | ||
54 | struct gk20a_platform *plat = gk20a_get_platform(dev_from_gk20a(g)); | ||
55 | |||
56 | return (struct vgpu_priv_data *)plat->vgpu_priv; | ||
57 | } | ||
58 | |||
59 | static void vgpu_remove_support(struct gk20a *g) | ||
60 | { | ||
61 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
62 | |||
63 | vgpu_remove_support_common(g); | ||
64 | |||
65 | /* free mappings to registers, etc*/ | ||
66 | |||
67 | if (l->bar1) { | ||
68 | iounmap(l->bar1); | ||
69 | l->bar1 = NULL; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) | ||
74 | { | ||
75 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
76 | |||
77 | nvgpu_mutex_init(&g->poweron_lock); | ||
78 | nvgpu_mutex_init(&g->poweroff_lock); | ||
79 | nvgpu_mutex_init(&g->ctxsw_disable_lock); | ||
80 | l->regs_saved = l->regs; | ||
81 | l->bar1_saved = l->bar1; | ||
82 | |||
83 | g->aggressive_sync_destroy = platform->aggressive_sync_destroy; | ||
84 | g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; | ||
85 | g->has_syncpoints = platform->has_syncpoints; | ||
86 | g->ptimer_src_freq = platform->ptimer_src_freq; | ||
87 | g->can_railgate = platform->can_railgate_init; | ||
88 | g->railgate_delay = platform->railgate_delay_init; | ||
89 | |||
90 | __nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, | ||
91 | platform->unify_address_spaces); | ||
92 | } | ||
93 | |||
94 | static int vgpu_init_support(struct platform_device *pdev) | ||
95 | { | ||
96 | struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
97 | struct gk20a *g = get_gk20a(&pdev->dev); | ||
98 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
99 | void __iomem *regs; | ||
100 | int err = 0; | ||
101 | |||
102 | if (!r) { | ||
103 | nvgpu_err(g, "failed to get gk20a bar1"); | ||
104 | err = -ENXIO; | ||
105 | goto fail; | ||
106 | } | ||
107 | |||
108 | if (r->name && !strcmp(r->name, "/vgpu")) { | ||
109 | regs = devm_ioremap_resource(&pdev->dev, r); | ||
110 | if (IS_ERR(regs)) { | ||
111 | nvgpu_err(g, "failed to remap gk20a bar1"); | ||
112 | err = PTR_ERR(regs); | ||
113 | goto fail; | ||
114 | } | ||
115 | l->bar1 = regs; | ||
116 | l->bar1_mem = r; | ||
117 | } | ||
118 | |||
119 | nvgpu_mutex_init(&g->dbg_sessions_lock); | ||
120 | nvgpu_mutex_init(&g->client_lock); | ||
121 | |||
122 | nvgpu_init_list_node(&g->profiler_objects); | ||
123 | |||
124 | g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); | ||
125 | if (!g->dbg_regops_tmp_buf) { | ||
126 | nvgpu_err(g, "couldn't allocate regops tmp buf"); | ||
127 | return -ENOMEM; | ||
128 | } | ||
129 | g->dbg_regops_tmp_buf_ops = | ||
130 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
131 | |||
132 | g->remove_support = vgpu_remove_support; | ||
133 | return 0; | ||
134 | |||
135 | fail: | ||
136 | vgpu_remove_support(g); | ||
137 | return err; | ||
138 | } | ||
139 | |||
140 | int vgpu_pm_prepare_poweroff(struct device *dev) | ||
141 | { | ||
142 | struct gk20a *g = get_gk20a(dev); | ||
143 | int ret = 0; | ||
144 | |||
145 | nvgpu_log_fn(g, " "); | ||
146 | |||
147 | if (!g->power_on) | ||
148 | return 0; | ||
149 | |||
150 | ret = gk20a_channel_suspend(g); | ||
151 | if (ret) | ||
152 | return ret; | ||
153 | |||
154 | g->power_on = false; | ||
155 | |||
156 | return ret; | ||
157 | } | ||
158 | |||
159 | int vgpu_pm_finalize_poweron(struct device *dev) | ||
160 | { | ||
161 | struct gk20a *g = get_gk20a(dev); | ||
162 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
163 | int err; | ||
164 | |||
165 | nvgpu_log_fn(g, " "); | ||
166 | |||
167 | if (g->power_on) | ||
168 | return 0; | ||
169 | |||
170 | g->power_on = true; | ||
171 | |||
172 | vgpu_detect_chip(g); | ||
173 | err = vgpu_init_hal(g); | ||
174 | if (err) | ||
175 | goto done; | ||
176 | |||
177 | if (g->ops.ltc.init_fs_state) | ||
178 | g->ops.ltc.init_fs_state(g); | ||
179 | |||
180 | err = nvgpu_init_ltc_support(g); | ||
181 | if (err) { | ||
182 | nvgpu_err(g, "failed to init ltc"); | ||
183 | goto done; | ||
184 | } | ||
185 | |||
186 | err = vgpu_init_mm_support(g); | ||
187 | if (err) { | ||
188 | nvgpu_err(g, "failed to init gk20a mm"); | ||
189 | goto done; | ||
190 | } | ||
191 | |||
192 | err = vgpu_init_fifo_support(g); | ||
193 | if (err) { | ||
194 | nvgpu_err(g, "failed to init gk20a fifo"); | ||
195 | goto done; | ||
196 | } | ||
197 | |||
198 | err = vgpu_init_gr_support(g); | ||
199 | if (err) { | ||
200 | nvgpu_err(g, "failed to init gk20a gr"); | ||
201 | goto done; | ||
202 | } | ||
203 | |||
204 | err = g->ops.chip_init_gpu_characteristics(g); | ||
205 | if (err) { | ||
206 | nvgpu_err(g, "failed to init gk20a gpu characteristics"); | ||
207 | goto done; | ||
208 | } | ||
209 | |||
210 | err = nvgpu_finalize_poweron_linux(l); | ||
211 | if (err) | ||
212 | goto done; | ||
213 | |||
214 | #ifdef CONFIG_GK20A_CTXSW_TRACE | ||
215 | gk20a_ctxsw_trace_init(g); | ||
216 | #endif | ||
217 | gk20a_sched_ctrl_init(g); | ||
218 | gk20a_channel_resume(g); | ||
219 | |||
220 | g->sw_ready = true; | ||
221 | |||
222 | done: | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | static int vgpu_qos_notify(struct notifier_block *nb, | ||
227 | unsigned long n, void *data) | ||
228 | { | ||
229 | struct gk20a_scale_profile *profile = | ||
230 | container_of(nb, struct gk20a_scale_profile, | ||
231 | qos_notify_block); | ||
232 | struct gk20a *g = get_gk20a(profile->dev); | ||
233 | u32 max_freq; | ||
234 | int err; | ||
235 | |||
236 | nvgpu_log_fn(g, " "); | ||
237 | |||
238 | max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS); | ||
239 | err = vgpu_clk_cap_rate(profile->dev, max_freq); | ||
240 | if (err) | ||
241 | nvgpu_err(g, "%s failed, err=%d", __func__, err); | ||
242 | |||
243 | return NOTIFY_OK; /* need notify call further */ | ||
244 | } | ||
245 | |||
246 | static int vgpu_pm_qos_init(struct device *dev) | ||
247 | { | ||
248 | struct gk20a *g = get_gk20a(dev); | ||
249 | struct gk20a_scale_profile *profile = g->scale_profile; | ||
250 | |||
251 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) { | ||
252 | if (!profile) | ||
253 | return -EINVAL; | ||
254 | } else { | ||
255 | profile = nvgpu_kzalloc(g, sizeof(*profile)); | ||
256 | if (!profile) | ||
257 | return -ENOMEM; | ||
258 | g->scale_profile = profile; | ||
259 | } | ||
260 | |||
261 | profile->dev = dev; | ||
262 | profile->qos_notify_block.notifier_call = vgpu_qos_notify; | ||
263 | pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
264 | &profile->qos_notify_block); | ||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | static void vgpu_pm_qos_remove(struct device *dev) | ||
269 | { | ||
270 | struct gk20a *g = get_gk20a(dev); | ||
271 | |||
272 | pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS, | ||
273 | &g->scale_profile->qos_notify_block); | ||
274 | nvgpu_kfree(g, g->scale_profile); | ||
275 | g->scale_profile = NULL; | ||
276 | } | ||
277 | |||
278 | static int vgpu_pm_init(struct device *dev) | ||
279 | { | ||
280 | struct gk20a *g = get_gk20a(dev); | ||
281 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
282 | unsigned long *freqs; | ||
283 | int num_freqs; | ||
284 | int err = 0; | ||
285 | |||
286 | nvgpu_log_fn(g, " "); | ||
287 | |||
288 | if (nvgpu_platform_is_simulation(g)) | ||
289 | return 0; | ||
290 | |||
291 | __pm_runtime_disable(dev, false); | ||
292 | |||
293 | if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) | ||
294 | gk20a_scale_init(dev); | ||
295 | |||
296 | if (l->devfreq) { | ||
297 | /* set min/max frequency based on frequency table */ | ||
298 | err = vgpu_clk_get_freqs(dev, &freqs, &num_freqs); | ||
299 | if (err) | ||
300 | return err; | ||
301 | |||
302 | if (num_freqs < 1) | ||
303 | return -EINVAL; | ||
304 | |||
305 | l->devfreq->min_freq = freqs[0]; | ||
306 | l->devfreq->max_freq = freqs[num_freqs - 1]; | ||
307 | } | ||
308 | |||
309 | err = vgpu_pm_qos_init(dev); | ||
310 | if (err) | ||
311 | return err; | ||
312 | |||
313 | return err; | ||
314 | } | ||
315 | |||
316 | int vgpu_probe(struct platform_device *pdev) | ||
317 | { | ||
318 | struct nvgpu_os_linux *l; | ||
319 | struct gk20a *gk20a; | ||
320 | int err; | ||
321 | struct device *dev = &pdev->dev; | ||
322 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
323 | struct vgpu_priv_data *priv; | ||
324 | |||
325 | if (!platform) { | ||
326 | dev_err(dev, "no platform data\n"); | ||
327 | return -ENODATA; | ||
328 | } | ||
329 | |||
330 | l = kzalloc(sizeof(*l), GFP_KERNEL); | ||
331 | if (!l) { | ||
332 | dev_err(dev, "couldn't allocate gk20a support"); | ||
333 | return -ENOMEM; | ||
334 | } | ||
335 | gk20a = &l->g; | ||
336 | |||
337 | nvgpu_log_fn(gk20a, " "); | ||
338 | |||
339 | nvgpu_init_gk20a(gk20a); | ||
340 | |||
341 | nvgpu_kmem_init(gk20a); | ||
342 | |||
343 | err = nvgpu_init_enabled_flags(gk20a); | ||
344 | if (err) { | ||
345 | kfree(gk20a); | ||
346 | return err; | ||
347 | } | ||
348 | |||
349 | l->dev = dev; | ||
350 | if (tegra_platform_is_vdk()) | ||
351 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | ||
352 | |||
353 | gk20a->is_virtual = true; | ||
354 | |||
355 | priv = nvgpu_kzalloc(gk20a, sizeof(*priv)); | ||
356 | if (!priv) { | ||
357 | kfree(gk20a); | ||
358 | return -ENOMEM; | ||
359 | } | ||
360 | |||
361 | platform->g = gk20a; | ||
362 | platform->vgpu_priv = priv; | ||
363 | |||
364 | err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); | ||
365 | if (err) | ||
366 | return err; | ||
367 | |||
368 | vgpu_init_support(pdev); | ||
369 | |||
370 | vgpu_init_vars(gk20a, platform); | ||
371 | |||
372 | init_rwsem(&l->busy_lock); | ||
373 | |||
374 | nvgpu_spinlock_init(&gk20a->mc_enable_lock); | ||
375 | |||
376 | gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms; | ||
377 | |||
378 | /* Initialize the platform interface. */ | ||
379 | err = platform->probe(dev); | ||
380 | if (err) { | ||
381 | if (err == -EPROBE_DEFER) | ||
382 | nvgpu_info(gk20a, "platform probe failed"); | ||
383 | else | ||
384 | nvgpu_err(gk20a, "platform probe failed"); | ||
385 | return err; | ||
386 | } | ||
387 | |||
388 | if (platform->late_probe) { | ||
389 | err = platform->late_probe(dev); | ||
390 | if (err) { | ||
391 | nvgpu_err(gk20a, "late probe failed"); | ||
392 | return err; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | err = vgpu_comm_init(gk20a); | ||
397 | if (err) { | ||
398 | nvgpu_err(gk20a, "failed to init comm interface"); | ||
399 | return -ENOSYS; | ||
400 | } | ||
401 | |||
402 | priv->virt_handle = vgpu_connect(); | ||
403 | if (!priv->virt_handle) { | ||
404 | nvgpu_err(gk20a, "failed to connect to server node"); | ||
405 | vgpu_comm_deinit(); | ||
406 | return -ENOSYS; | ||
407 | } | ||
408 | |||
409 | err = vgpu_get_constants(gk20a); | ||
410 | if (err) { | ||
411 | vgpu_comm_deinit(); | ||
412 | return err; | ||
413 | } | ||
414 | |||
415 | err = vgpu_pm_init(dev); | ||
416 | if (err) { | ||
417 | nvgpu_err(gk20a, "pm init failed"); | ||
418 | return err; | ||
419 | } | ||
420 | |||
421 | err = nvgpu_thread_create(&priv->intr_handler, gk20a, | ||
422 | vgpu_intr_thread, "gk20a"); | ||
423 | if (err) | ||
424 | return err; | ||
425 | |||
426 | gk20a_debug_init(gk20a, "gpu.0"); | ||
427 | |||
428 | /* Set DMA parameters to allow larger sgt lists */ | ||
429 | dev->dma_parms = &l->dma_parms; | ||
430 | dma_set_max_seg_size(dev, UINT_MAX); | ||
431 | |||
432 | gk20a->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT; | ||
433 | gk20a->timeouts_disabled_by_user = false; | ||
434 | nvgpu_atomic_set(&gk20a->timeouts_disabled_refcount, 0); | ||
435 | |||
436 | vgpu_create_sysfs(dev); | ||
437 | gk20a_init_gr(gk20a); | ||
438 | |||
439 | nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); | ||
440 | gk20a->gr.max_comptag_mem = totalram_pages | ||
441 | >> (10 - (PAGE_SHIFT - 10)); | ||
442 | |||
443 | nvgpu_ref_init(&gk20a->refcount); | ||
444 | |||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | int vgpu_remove(struct platform_device *pdev) | ||
449 | { | ||
450 | struct device *dev = &pdev->dev; | ||
451 | struct gk20a *g = get_gk20a(dev); | ||
452 | |||
453 | nvgpu_log_fn(g, " "); | ||
454 | |||
455 | vgpu_pm_qos_remove(dev); | ||
456 | if (g->remove_support) | ||
457 | g->remove_support(g); | ||
458 | |||
459 | vgpu_comm_deinit(); | ||
460 | gk20a_sched_ctrl_cleanup(g); | ||
461 | gk20a_user_deinit(dev, &nvgpu_class); | ||
462 | vgpu_remove_sysfs(dev); | ||
463 | gk20a_get_platform(dev)->g = NULL; | ||
464 | gk20a_put(g); | ||
465 | |||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | bool vgpu_is_reduced_bar1(struct gk20a *g) | ||
470 | { | ||
471 | struct fifo_gk20a *f = &g->fifo; | ||
472 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
473 | |||
474 | return resource_size(l->bar1_mem) == (resource_size_t)f->userd.size; | ||
475 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h new file mode 100644 index 00000000..38379cf2 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Virtualized GPU Linux Interfaces | ||
3 | * | ||
4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef __VGPU_LINUX_H__ | ||
20 | #define __VGPU_LINUX_H__ | ||
21 | |||
22 | struct device; | ||
23 | struct platform_device; | ||
24 | |||
25 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | ||
26 | |||
27 | #include <nvgpu/vgpu/vgpu.h> | ||
28 | |||
29 | int vgpu_pm_prepare_poweroff(struct device *dev); | ||
30 | int vgpu_pm_finalize_poweron(struct device *dev); | ||
31 | int vgpu_probe(struct platform_device *dev); | ||
32 | int vgpu_remove(struct platform_device *dev); | ||
33 | |||
34 | void vgpu_create_sysfs(struct device *dev); | ||
35 | void vgpu_remove_sysfs(struct device *dev); | ||
36 | #else | ||
37 | /* define placeholders for functions used outside of vgpu */ | ||
38 | |||
39 | static inline int vgpu_pm_prepare_poweroff(struct device *dev) | ||
40 | { | ||
41 | return -ENOSYS; | ||
42 | } | ||
43 | static inline int vgpu_pm_finalize_poweron(struct device *dev) | ||
44 | { | ||
45 | return -ENOSYS; | ||
46 | } | ||
47 | static inline int vgpu_probe(struct platform_device *dev) | ||
48 | { | ||
49 | return -ENOSYS; | ||
50 | } | ||
51 | static inline int vgpu_remove(struct platform_device *dev) | ||
52 | { | ||
53 | return -ENOSYS; | ||
54 | } | ||
55 | #endif | ||
56 | |||
57 | #endif | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vidmem.c b/drivers/gpu/nvgpu/os/linux/vidmem.c new file mode 100644 index 00000000..136d4a10 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vidmem.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <uapi/linux/nvgpu.h> | ||
19 | |||
20 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
21 | #include <linux/platform/tegra/tegra_fd.h> | ||
22 | #endif | ||
23 | |||
24 | #include <nvgpu/dma.h> | ||
25 | #include <nvgpu/enabled.h> | ||
26 | #include <nvgpu/vidmem.h> | ||
27 | #include <nvgpu/nvgpu_mem.h> | ||
28 | #include <nvgpu/page_allocator.h> | ||
29 | |||
30 | #include <nvgpu/linux/vm.h> | ||
31 | #include <nvgpu/linux/dma.h> | ||
32 | #include <nvgpu/linux/vidmem.h> | ||
33 | |||
34 | #include "gk20a/gk20a.h" | ||
35 | #include "gk20a/mm_gk20a.h" | ||
36 | |||
37 | bool nvgpu_addr_is_vidmem_page_alloc(u64 addr) | ||
38 | { | ||
39 | return !!(addr & 1ULL); | ||
40 | } | ||
41 | |||
42 | void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr) | ||
43 | { | ||
44 | /* set bit 0 to indicate vidmem allocation */ | ||
45 | sg_dma_address(sgl) = (addr | 1ULL); | ||
46 | } | ||
47 | |||
48 | struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl) | ||
49 | { | ||
50 | u64 addr; | ||
51 | |||
52 | addr = sg_dma_address(sgl); | ||
53 | |||
54 | if (nvgpu_addr_is_vidmem_page_alloc(addr)) | ||
55 | addr = addr & ~1ULL; | ||
56 | else | ||
57 | WARN_ON(1); | ||
58 | |||
59 | return (struct nvgpu_page_alloc *)(uintptr_t)addr; | ||
60 | } | ||
61 | |||
62 | static struct sg_table *gk20a_vidbuf_map_dma_buf( | ||
63 | struct dma_buf_attachment *attach, enum dma_data_direction dir) | ||
64 | { | ||
65 | struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv; | ||
66 | |||
67 | return buf->mem->priv.sgt; | ||
68 | } | ||
69 | |||
70 | static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, | ||
71 | struct sg_table *sgt, | ||
72 | enum dma_data_direction dir) | ||
73 | { | ||
74 | } | ||
75 | |||
76 | static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | ||
77 | { | ||
78 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
79 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
80 | struct gk20a *g = buf->g; | ||
81 | |||
82 | vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
83 | dmabuf, buf->mem->size >> 10); | ||
84 | |||
85 | if (linux_buf && linux_buf->dmabuf_priv_delete) | ||
86 | linux_buf->dmabuf_priv_delete(linux_buf->dmabuf_priv); | ||
87 | |||
88 | nvgpu_kfree(g, linux_buf); | ||
89 | nvgpu_vidmem_buf_free(g, buf); | ||
90 | |||
91 | gk20a_put(g); | ||
92 | } | ||
93 | |||
94 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | ||
95 | { | ||
96 | WARN_ON("Not supported"); | ||
97 | return NULL; | ||
98 | } | ||
99 | |||
100 | static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf, | ||
101 | unsigned long page_num) | ||
102 | { | ||
103 | WARN_ON("Not supported"); | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) | ||
108 | { | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | |||
112 | static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf, | ||
113 | struct device *dev, void *priv, void (*delete)(void *priv)) | ||
114 | { | ||
115 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
116 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
117 | |||
118 | linux_buf->dmabuf_priv = priv; | ||
119 | linux_buf->dmabuf_priv_delete = delete; | ||
120 | |||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf, | ||
125 | struct device *dev) | ||
126 | { | ||
127 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
128 | struct nvgpu_vidmem_linux *linux_buf = buf->priv; | ||
129 | |||
130 | return linux_buf->dmabuf_priv; | ||
131 | } | ||
132 | |||
133 | static const struct dma_buf_ops gk20a_vidbuf_ops = { | ||
134 | .map_dma_buf = gk20a_vidbuf_map_dma_buf, | ||
135 | .unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf, | ||
136 | .release = gk20a_vidbuf_release, | ||
137 | .kmap_atomic = gk20a_vidbuf_kmap_atomic, | ||
138 | .kmap = gk20a_vidbuf_kmap, | ||
139 | .mmap = gk20a_vidbuf_mmap, | ||
140 | .set_drvdata = gk20a_vidbuf_set_private, | ||
141 | .get_drvdata = gk20a_vidbuf_get_private, | ||
142 | }; | ||
143 | |||
144 | static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf) | ||
145 | { | ||
146 | DEFINE_DMA_BUF_EXPORT_INFO(exp_info); | ||
147 | |||
148 | exp_info.priv = buf; | ||
149 | exp_info.ops = &gk20a_vidbuf_ops; | ||
150 | exp_info.size = buf->mem->size; | ||
151 | exp_info.flags = O_RDWR; | ||
152 | |||
153 | return dma_buf_export(&exp_info); | ||
154 | } | ||
155 | |||
156 | struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | ||
157 | { | ||
158 | struct nvgpu_vidmem_buf *buf = dmabuf->priv; | ||
159 | |||
160 | if (dmabuf->ops != &gk20a_vidbuf_ops) | ||
161 | return NULL; | ||
162 | |||
163 | return buf->g; | ||
164 | } | ||
165 | |||
166 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | ||
167 | { | ||
168 | struct nvgpu_vidmem_buf *buf = NULL; | ||
169 | struct nvgpu_vidmem_linux *priv; | ||
170 | int err, fd; | ||
171 | |||
172 | /* | ||
173 | * This ref is released when the dma_buf is closed. | ||
174 | */ | ||
175 | if (!gk20a_get(g)) | ||
176 | return -ENODEV; | ||
177 | |||
178 | vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes); | ||
179 | |||
180 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | ||
181 | if (!priv) { | ||
182 | err = -ENOMEM; | ||
183 | goto fail; | ||
184 | } | ||
185 | |||
186 | buf = nvgpu_vidmem_user_alloc(g, bytes); | ||
187 | if (IS_ERR(buf)) { | ||
188 | err = PTR_ERR(buf); | ||
189 | goto fail; | ||
190 | } | ||
191 | |||
192 | priv->dmabuf = gk20a_vidbuf_export(buf); | ||
193 | if (IS_ERR(priv->dmabuf)) { | ||
194 | err = PTR_ERR(priv->dmabuf); | ||
195 | goto fail; | ||
196 | } | ||
197 | |||
198 | buf->priv = priv; | ||
199 | |||
200 | #ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD | ||
201 | fd = tegra_alloc_fd(current->files, 1024, O_RDWR); | ||
202 | #else | ||
203 | fd = get_unused_fd_flags(O_RDWR); | ||
204 | #endif | ||
205 | if (fd < 0) { | ||
206 | /* ->release frees what we have done */ | ||
207 | dma_buf_put(priv->dmabuf); | ||
208 | return fd; | ||
209 | } | ||
210 | |||
211 | /* fclose() on this drops one ref, freeing the dma buf */ | ||
212 | fd_install(fd, priv->dmabuf->file); | ||
213 | |||
214 | vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB", | ||
215 | priv->dmabuf, buf->mem->size >> 10); | ||
216 | |||
217 | return fd; | ||
218 | |||
219 | fail: | ||
220 | nvgpu_vidmem_buf_free(g, buf); | ||
221 | nvgpu_kfree(g, priv); | ||
222 | gk20a_put(g); | ||
223 | |||
224 | vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err); | ||
225 | return err; | ||
226 | } | ||
227 | |||
228 | int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | ||
229 | void *buffer, u64 offset, u64 size, u32 cmd) | ||
230 | { | ||
231 | struct nvgpu_vidmem_buf *vidmem_buf; | ||
232 | struct nvgpu_mem *mem; | ||
233 | int err = 0; | ||
234 | |||
235 | if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM) | ||
236 | return -EINVAL; | ||
237 | |||
238 | vidmem_buf = dmabuf->priv; | ||
239 | mem = vidmem_buf->mem; | ||
240 | |||
241 | switch (cmd) { | ||
242 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: | ||
243 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); | ||
244 | break; | ||
245 | |||
246 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE: | ||
247 | nvgpu_mem_wr_n(g, mem, offset, buffer, size); | ||
248 | break; | ||
249 | |||
250 | default: | ||
251 | err = -EINVAL; | ||
252 | } | ||
253 | |||
254 | return err; | ||
255 | } | ||
256 | |||
257 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) | ||
258 | { | ||
259 | nvgpu_free(vidmem->allocator, | ||
260 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); | ||
261 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); | ||
262 | } | ||
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c new file mode 100644 index 00000000..baa77515 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/vm.c | |||
@@ -0,0 +1,332 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/dma-buf.h> | ||
18 | #include <linux/scatterlist.h> | ||
19 | #include <uapi/linux/nvgpu.h> | ||
20 | |||
21 | #include <nvgpu/log.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/rbtree.h> | ||
24 | #include <nvgpu/vm_area.h> | ||
25 | #include <nvgpu/nvgpu_mem.h> | ||
26 | #include <nvgpu/page_allocator.h> | ||
27 | #include <nvgpu/vidmem.h> | ||
28 | |||
29 | #include <nvgpu/linux/vm.h> | ||
30 | #include <nvgpu/linux/vidmem.h> | ||
31 | #include <nvgpu/linux/nvgpu_mem.h> | ||
32 | |||
33 | #include "gk20a/gk20a.h" | ||
34 | #include "gk20a/mm_gk20a.h" | ||
35 | |||
36 | #include "platform_gk20a.h" | ||
37 | #include "os_linux.h" | ||
38 | #include "dmabuf.h" | ||
39 | |||
40 | static u32 nvgpu_vm_translate_linux_flags(struct gk20a *g, u32 flags) | ||
41 | { | ||
42 | u32 core_flags = 0; | ||
43 | |||
44 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | ||
45 | core_flags |= NVGPU_VM_MAP_FIXED_OFFSET; | ||
46 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE) | ||
47 | core_flags |= NVGPU_VM_MAP_CACHEABLE; | ||
48 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT) | ||
49 | core_flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
50 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE) | ||
51 | core_flags |= NVGPU_VM_MAP_UNMAPPED_PTE; | ||
52 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC) | ||
53 | core_flags |= NVGPU_VM_MAP_L3_ALLOC; | ||
54 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) | ||
55 | core_flags |= NVGPU_VM_MAP_DIRECT_KIND_CTRL; | ||
56 | |||
57 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS) | ||
58 | nvgpu_warn(g, "Ignoring deprecated flag: " | ||
59 | "NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS"); | ||
60 | |||
61 | return core_flags; | ||
62 | } | ||
63 | |||
64 | static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | ||
65 | struct vm_gk20a *vm, struct dma_buf *dmabuf, u32 kind) | ||
66 | { | ||
67 | struct nvgpu_rbtree_node *node = NULL; | ||
68 | struct nvgpu_rbtree_node *root = vm->mapped_buffers; | ||
69 | |||
70 | nvgpu_rbtree_enum_start(0, &node, root); | ||
71 | |||
72 | while (node) { | ||
73 | struct nvgpu_mapped_buf *mapped_buffer = | ||
74 | mapped_buffer_from_rbtree_node(node); | ||
75 | |||
76 | if (mapped_buffer->os_priv.dmabuf == dmabuf && | ||
77 | mapped_buffer->kind == kind) | ||
78 | return mapped_buffer; | ||
79 | |||
80 | nvgpu_rbtree_enum_next(&node, node); | ||
81 | } | ||
82 | |||
83 | return NULL; | ||
84 | } | ||
85 | |||
86 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | ||
87 | struct dma_buf **dmabuf, | ||
88 | u64 *offset) | ||
89 | { | ||
90 | struct nvgpu_mapped_buf *mapped_buffer; | ||
91 | struct gk20a *g = gk20a_from_vm(vm); | ||
92 | |||
93 | nvgpu_log_fn(g, "gpu_va=0x%llx", gpu_va); | ||
94 | |||
95 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
96 | |||
97 | mapped_buffer = __nvgpu_vm_find_mapped_buf_range(vm, gpu_va); | ||
98 | if (!mapped_buffer) { | ||
99 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
100 | return -EINVAL; | ||
101 | } | ||
102 | |||
103 | *dmabuf = mapped_buffer->os_priv.dmabuf; | ||
104 | *offset = gpu_va - mapped_buffer->addr; | ||
105 | |||
106 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf) | ||
112 | { | ||
113 | return os_buf->dmabuf->size; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * vm->update_gmmu_lock must be held. This checks to see if we already have | ||
118 | * mapped the passed buffer into this VM. If so, just return the existing | ||
119 | * mapping address. | ||
120 | */ | ||
121 | struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | ||
122 | struct nvgpu_os_buffer *os_buf, | ||
123 | u64 map_addr, | ||
124 | u32 flags, | ||
125 | int kind) | ||
126 | { | ||
127 | struct gk20a *g = gk20a_from_vm(vm); | ||
128 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
129 | |||
130 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) { | ||
131 | mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr); | ||
132 | if (!mapped_buffer) | ||
133 | return NULL; | ||
134 | |||
135 | if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf || | ||
136 | mapped_buffer->kind != (u32)kind) | ||
137 | return NULL; | ||
138 | } else { | ||
139 | mapped_buffer = | ||
140 | __nvgpu_vm_find_mapped_buf_reverse(vm, | ||
141 | os_buf->dmabuf, | ||
142 | kind); | ||
143 | if (!mapped_buffer) | ||
144 | return NULL; | ||
145 | } | ||
146 | |||
147 | if (mapped_buffer->flags != flags) | ||
148 | return NULL; | ||
149 | |||
150 | /* | ||
151 | * If we find the mapping here then that means we have mapped it already | ||
152 | * and the prior pin and get must be undone. | ||
153 | */ | ||
154 | gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, os_buf->attachment, | ||
155 | mapped_buffer->os_priv.sgt); | ||
156 | dma_buf_put(os_buf->dmabuf); | ||
157 | |||
158 | nvgpu_log(g, gpu_dbg_map, | ||
159 | "gv: 0x%04x_%08x + 0x%-7zu " | ||
160 | "[dma: 0x%010llx, pa: 0x%010llx] " | ||
161 | "pgsz=%-3dKb as=%-2d " | ||
162 | "flags=0x%x apt=%s (reused)", | ||
163 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | ||
164 | os_buf->dmabuf->size, | ||
165 | (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl), | ||
166 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | ||
167 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | ||
168 | vm_aspace_id(vm), | ||
169 | mapped_buffer->flags, | ||
170 | nvgpu_aperture_str(g, | ||
171 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
172 | |||
173 | return mapped_buffer; | ||
174 | } | ||
175 | |||
176 | int nvgpu_vm_map_linux(struct vm_gk20a *vm, | ||
177 | struct dma_buf *dmabuf, | ||
178 | u64 offset_align, | ||
179 | u32 flags, | ||
180 | s16 compr_kind, | ||
181 | s16 incompr_kind, | ||
182 | int rw_flag, | ||
183 | u64 buffer_offset, | ||
184 | u64 mapping_size, | ||
185 | struct vm_gk20a_mapping_batch *batch, | ||
186 | u64 *gpu_va) | ||
187 | { | ||
188 | struct gk20a *g = gk20a_from_vm(vm); | ||
189 | struct device *dev = dev_from_gk20a(g); | ||
190 | struct nvgpu_os_buffer os_buf; | ||
191 | struct sg_table *sgt; | ||
192 | struct nvgpu_sgt *nvgpu_sgt = NULL; | ||
193 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | ||
194 | struct dma_buf_attachment *attachment; | ||
195 | u64 map_addr = 0ULL; | ||
196 | int err = 0; | ||
197 | |||
198 | if (flags & NVGPU_VM_MAP_FIXED_OFFSET) | ||
199 | map_addr = offset_align; | ||
200 | |||
201 | sgt = gk20a_mm_pin(dev, dmabuf, &attachment); | ||
202 | if (IS_ERR(sgt)) { | ||
203 | nvgpu_warn(g, "Failed to pin dma_buf!"); | ||
204 | return PTR_ERR(sgt); | ||
205 | } | ||
206 | os_buf.dmabuf = dmabuf; | ||
207 | os_buf.attachment = attachment; | ||
208 | os_buf.dev = dev; | ||
209 | |||
210 | if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) { | ||
211 | err = -EINVAL; | ||
212 | goto clean_up; | ||
213 | } | ||
214 | |||
215 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
216 | if (!nvgpu_sgt) { | ||
217 | err = -ENOMEM; | ||
218 | goto clean_up; | ||
219 | } | ||
220 | |||
221 | mapped_buffer = nvgpu_vm_map(vm, | ||
222 | &os_buf, | ||
223 | nvgpu_sgt, | ||
224 | map_addr, | ||
225 | mapping_size, | ||
226 | buffer_offset, | ||
227 | rw_flag, | ||
228 | flags, | ||
229 | compr_kind, | ||
230 | incompr_kind, | ||
231 | batch, | ||
232 | gk20a_dmabuf_aperture(g, dmabuf)); | ||
233 | |||
234 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
235 | |||
236 | if (IS_ERR(mapped_buffer)) { | ||
237 | err = PTR_ERR(mapped_buffer); | ||
238 | goto clean_up; | ||
239 | } | ||
240 | |||
241 | mapped_buffer->os_priv.dmabuf = dmabuf; | ||
242 | mapped_buffer->os_priv.attachment = attachment; | ||
243 | mapped_buffer->os_priv.sgt = sgt; | ||
244 | |||
245 | *gpu_va = mapped_buffer->addr; | ||
246 | return 0; | ||
247 | |||
248 | clean_up: | ||
249 | gk20a_mm_unpin(dev, dmabuf, attachment, sgt); | ||
250 | |||
251 | return err; | ||
252 | } | ||
253 | |||
254 | int nvgpu_vm_map_buffer(struct vm_gk20a *vm, | ||
255 | int dmabuf_fd, | ||
256 | u64 *offset_align, | ||
257 | u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ | ||
258 | s16 compr_kind, | ||
259 | s16 incompr_kind, | ||
260 | u64 buffer_offset, | ||
261 | u64 mapping_size, | ||
262 | struct vm_gk20a_mapping_batch *batch) | ||
263 | { | ||
264 | struct gk20a *g = gk20a_from_vm(vm); | ||
265 | struct dma_buf *dmabuf; | ||
266 | u64 ret_va; | ||
267 | int err = 0; | ||
268 | |||
269 | /* get ref to the mem handle (released on unmap_locked) */ | ||
270 | dmabuf = dma_buf_get(dmabuf_fd); | ||
271 | if (IS_ERR(dmabuf)) { | ||
272 | nvgpu_warn(g, "%s: fd %d is not a dmabuf", | ||
273 | __func__, dmabuf_fd); | ||
274 | return PTR_ERR(dmabuf); | ||
275 | } | ||
276 | |||
277 | /* verify that we're not overflowing the buffer, i.e. | ||
278 | * (buffer_offset + mapping_size)> dmabuf->size. | ||
279 | * | ||
280 | * Since buffer_offset + mapping_size could overflow, first check | ||
281 | * that mapping size < dmabuf_size, at which point we can subtract | ||
282 | * mapping_size from both sides for the final comparison. | ||
283 | */ | ||
284 | if ((mapping_size > dmabuf->size) || | ||
285 | (buffer_offset > (dmabuf->size - mapping_size))) { | ||
286 | nvgpu_err(g, | ||
287 | "buf size %llx < (offset(%llx) + map_size(%llx))\n", | ||
288 | (u64)dmabuf->size, buffer_offset, mapping_size); | ||
289 | dma_buf_put(dmabuf); | ||
290 | return -EINVAL; | ||
291 | } | ||
292 | |||
293 | err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm)); | ||
294 | if (err) { | ||
295 | dma_buf_put(dmabuf); | ||
296 | return err; | ||
297 | } | ||
298 | |||
299 | err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align, | ||
300 | nvgpu_vm_translate_linux_flags(g, flags), | ||
301 | compr_kind, incompr_kind, | ||
302 | gk20a_mem_flag_none, | ||
303 | buffer_offset, | ||
304 | mapping_size, | ||
305 | batch, | ||
306 | &ret_va); | ||
307 | |||
308 | if (!err) | ||
309 | *offset_align = ret_va; | ||
310 | else | ||
311 | dma_buf_put(dmabuf); | ||
312 | |||
313 | return err; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * This is the function call-back for freeing OS specific components of an | ||
318 | * nvgpu_mapped_buf. This should most likely never be called outside of the | ||
319 | * core MM framework! | ||
320 | * | ||
321 | * Note: the VM lock will be held. | ||
322 | */ | ||
323 | void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer) | ||
324 | { | ||
325 | struct vm_gk20a *vm = mapped_buffer->vm; | ||
326 | |||
327 | gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf, | ||
328 | mapped_buffer->os_priv.attachment, | ||
329 | mapped_buffer->os_priv.sgt); | ||
330 | |||
331 | dma_buf_put(mapped_buffer->os_priv.dmabuf); | ||
332 | } | ||