summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2014-05-05 21:14:22 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:01 -0400
commit1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu
parent69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)
gpu: nvgpu: support gk20a virtualization
The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu')
-rw-r--r--drivers/gpu/nvgpu/vgpu/Makefile10
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c569
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c687
-rw-r--r--drivers/gpu/nvgpu/vgpu/ltc_vgpu.c55
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c425
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c416
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.h41
7 files changed, 2203 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/Makefile b/drivers/gpu/nvgpu/vgpu/Makefile
new file mode 100644
index 00000000..edad7171
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/Makefile
@@ -0,0 +1,10 @@
1GCOV_PROFILE := y
2ccflags-y += -Idrivers/gpu/nvgpu
3ccflags-y += -Wno-multichar
4
5obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) = \
6 ltc_vgpu.o \
7 gr_vgpu.o \
8 fifo_vgpu.o \
9 mm_vgpu.o \
10 vgpu.o
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
new file mode 100644
index 00000000..23dec1f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -0,0 +1,569 @@
1/*
2 * Virtualized GPU Fifo
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18#include "gk20a/hw_fifo_gk20a.h"
19#include "gk20a/hw_ram_gk20a.h"
20
21static void vgpu_channel_bind(struct channel_gk20a *ch)
22{
23 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
24 struct tegra_vgpu_cmd_msg msg;
25 struct tegra_vgpu_channel_config_params *p =
26 &msg.params.channel_config;
27 int err;
28
29 gk20a_dbg_info("bind channel %d", ch->hw_chid);
30
31 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
32 msg.handle = platform->virt_handle;
33 p->handle = ch->virt_ctx;
34 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
35 WARN_ON(err || msg.ret);
36
37 ch->bound = true;
38}
39
40static void vgpu_channel_unbind(struct channel_gk20a *ch)
41{
42 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
43
44 gk20a_dbg_fn("");
45
46 if (ch->bound) {
47 struct tegra_vgpu_cmd_msg msg;
48 struct tegra_vgpu_channel_config_params *p =
49 &msg.params.channel_config;
50 int err;
51
52 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
53 msg.handle = platform->virt_handle;
54 p->handle = ch->virt_ctx;
55 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
56 WARN_ON(err || msg.ret);
57 }
58
59 ch->bound = false;
60
61 /*
62 * if we are agrressive then we can destroy the syncpt
63 * resource at this point
64 * if not, then it will be destroyed at channel_free()
65 */
66 if (ch->sync && ch->sync->aggressive_destroy) {
67 ch->sync->destroy(ch->sync);
68 ch->sync = NULL;
69 }
70}
71
72static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
73{
74 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
77 int err;
78
79 gk20a_dbg_fn("");
80
81 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
82 msg.handle = platform->virt_handle;
83 p->id = ch->hw_chid;
84 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85 if (err || msg.ret) {
86 gk20a_err(dev_from_gk20a(g), "fail");
87 return -ENOMEM;
88 }
89
90 ch->virt_ctx = p->handle;
91 gk20a_dbg_fn("done");
92 return 0;
93}
94
95static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
96{
97 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
98 struct tegra_vgpu_cmd_msg msg;
99 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
100 int err;
101
102 gk20a_dbg_fn("");
103
104 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
105 msg.handle = platform->virt_handle;
106 p->handle = ch->virt_ctx;
107 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
108 WARN_ON(err || msg.ret);
109}
110
111static void vgpu_channel_disable(struct channel_gk20a *ch)
112{
113 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
114 struct tegra_vgpu_cmd_msg msg;
115 struct tegra_vgpu_channel_config_params *p =
116 &msg.params.channel_config;
117 int err;
118
119 gk20a_dbg_fn("");
120
121 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
122 msg.handle = platform->virt_handle;
123 p->handle = ch->virt_ctx;
124 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
125 WARN_ON(err || msg.ret);
126}
127
128static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
129 u32 gpfifo_entries)
130{
131 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
132 struct device __maybe_unused *d = dev_from_gk20a(ch->g);
133 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
141 msg.handle = platform->virt_handle;
142 p->handle = ch->virt_ctx;
143 p->gpfifo_va = gpfifo_base;
144 p->num_entries = gpfifo_entries;
145 p->userd_addr = ch->userd_iova;
146 p->iova = mapping ? 1 : 0;
147 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
148
149 return (err || msg.ret) ? -ENOMEM : 0;
150}
151
152static int init_engine_info(struct fifo_gk20a *f)
153{
154 struct fifo_engine_info_gk20a *gr_info;
155 const u32 gr_sw_id = ENGINE_GR_GK20A;
156
157 gk20a_dbg_fn("");
158
159 /* all we really care about finding is the graphics entry */
160 /* especially early on in sim it probably thinks it has more */
161 f->num_engines = 1;
162
163 gr_info = f->engine_info + gr_sw_id;
164
165 gr_info->sw_id = gr_sw_id;
166 gr_info->name = "gr";
167 /* FIXME: retrieve this from server */
168 gr_info->runlist_id = 0;
169 return 0;
170}
171
172static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
173{
174 struct fifo_engine_info_gk20a *engine_info;
175 struct fifo_runlist_info_gk20a *runlist;
176 struct device *d = dev_from_gk20a(g);
177 u32 runlist_id;
178 u32 i;
179 u64 runlist_size;
180
181 gk20a_dbg_fn("");
182
183 f->max_runlists = fifo_eng_runlist_base__size_1_v();
184 f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
185 f->max_runlists, GFP_KERNEL);
186 if (!f->runlist_info)
187 goto clean_up;
188
189 engine_info = f->engine_info + ENGINE_GR_GK20A;
190 runlist_id = engine_info->runlist_id;
191 runlist = &f->runlist_info[runlist_id];
192
193 runlist->active_channels =
194 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
195 GFP_KERNEL);
196 if (!runlist->active_channels)
197 goto clean_up_runlist_info;
198
199 runlist_size = sizeof(u16) * f->num_channels;
200 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
201 dma_addr_t iova;
202
203 runlist->mem[i].cpuva =
204 dma_alloc_coherent(d,
205 runlist_size,
206 &iova,
207 GFP_KERNEL);
208 if (!runlist->mem[i].cpuva) {
209 dev_err(d, "memory allocation failed\n");
210 goto clean_up_runlist;
211 }
212 runlist->mem[i].iova = iova;
213 runlist->mem[i].size = runlist_size;
214 }
215 mutex_init(&runlist->mutex);
216 init_waitqueue_head(&runlist->runlist_wq);
217
218 /* None of buffers is pinned if this value doesn't change.
219 Otherwise, one of them (cur_buffer) must have been pinned. */
220 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
221
222 gk20a_dbg_fn("done");
223 return 0;
224
225clean_up_runlist:
226 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
227 if (runlist->mem[i].cpuva)
228 dma_free_coherent(d,
229 runlist->mem[i].size,
230 runlist->mem[i].cpuva,
231 runlist->mem[i].iova);
232 runlist->mem[i].cpuva = NULL;
233 runlist->mem[i].iova = 0;
234 }
235
236 kfree(runlist->active_channels);
237 runlist->active_channels = NULL;
238
239clean_up_runlist_info:
240 kfree(f->runlist_info);
241 f->runlist_info = NULL;
242
243clean_up:
244 gk20a_dbg_fn("fail");
245 return -ENOMEM;
246}
247
248static int vgpu_init_fifo_setup_sw(struct gk20a *g)
249{
250 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
251 struct fifo_gk20a *f = &g->fifo;
252 struct device *d = dev_from_gk20a(g);
253 int chid, err = 0;
254 dma_addr_t iova;
255
256 gk20a_dbg_fn("");
257
258 if (f->sw_ready) {
259 gk20a_dbg_fn("skip init");
260 return 0;
261 }
262
263 f->g = g;
264
265 err = vgpu_get_attribute(platform->virt_handle,
266 TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
267 &f->num_channels);
268 if (err)
269 return -ENXIO;
270
271 f->max_engines = ENGINE_INVAL_GK20A;
272
273 f->userd_entry_size = 1 << ram_userd_base_shift_v();
274 f->userd_total_size = f->userd_entry_size * f->num_channels;
275
276 f->userd.cpuva = dma_alloc_coherent(d,
277 f->userd_total_size,
278 &iova,
279 GFP_KERNEL);
280 if (!f->userd.cpuva) {
281 dev_err(d, "memory allocation failed\n");
282 goto clean_up;
283 }
284
285 f->userd.iova = iova;
286 err = gk20a_get_sgtable(d, &f->userd.sgt,
287 f->userd.cpuva, f->userd.iova,
288 f->userd_total_size);
289 if (err) {
290 dev_err(d, "failed to create sg table\n");
291 goto clean_up;
292 }
293
294 /* bar1 va */
295 f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
296 if (!f->userd.gpu_va) {
297 dev_err(d, "gmmu mapping failed\n");
298 goto clean_up;
299 }
300
301 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
302
303 f->userd.size = f->userd_total_size;
304
305 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
306 GFP_KERNEL);
307 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
308 GFP_KERNEL);
309
310 if (!(f->channel && f->engine_info)) {
311 err = -ENOMEM;
312 goto clean_up;
313 }
314
315 init_engine_info(f);
316
317 init_runlist(g, f);
318
319 for (chid = 0; chid < f->num_channels; chid++) {
320 f->channel[chid].userd_cpu_va =
321 f->userd.cpuva + chid * f->userd_entry_size;
322 f->channel[chid].userd_iova =
323 NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
324 + chid * f->userd_entry_size;
325 f->channel[chid].userd_gpu_va =
326 f->userd.gpu_va + chid * f->userd_entry_size;
327
328 gk20a_init_channel_support(g, chid);
329 }
330 mutex_init(&f->ch_inuse_mutex);
331
332 f->deferred_reset_pending = false;
333 mutex_init(&f->deferred_reset_mutex);
334
335 f->sw_ready = true;
336
337 gk20a_dbg_fn("done");
338 return 0;
339
340clean_up:
341 gk20a_dbg_fn("fail");
342 /* FIXME: unmap from bar1 */
343 if (f->userd.sgt)
344 gk20a_free_sgtable(&f->userd.sgt);
345 if (f->userd.cpuva)
346 dma_free_coherent(d,
347 f->userd_total_size,
348 f->userd.cpuva,
349 f->userd.iova);
350 f->userd.cpuva = NULL;
351 f->userd.iova = 0;
352
353 memset(&f->userd, 0, sizeof(struct userd_desc));
354
355 kfree(f->channel);
356 f->channel = NULL;
357 kfree(f->engine_info);
358 f->engine_info = NULL;
359
360 return err;
361}
362
363static int vgpu_init_fifo_setup_hw(struct gk20a *g)
364{
365 gk20a_dbg_fn("");
366
367 /* test write, read through bar1 @ userd region before
368 * turning on the snooping */
369 {
370 struct fifo_gk20a *f = &g->fifo;
371 u32 v, v1 = 0x33, v2 = 0x55;
372
373 u32 bar1_vaddr = f->userd.gpu_va;
374 volatile u32 *cpu_vaddr = f->userd.cpuva;
375
376 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
377 bar1_vaddr);
378
379 v = gk20a_bar1_readl(g, bar1_vaddr);
380
381 *cpu_vaddr = v1;
382 smp_mb();
383
384 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
385 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
386 return -EINVAL;
387 }
388
389 gk20a_bar1_writel(g, bar1_vaddr, v2);
390
391 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
392 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
393 return -EINVAL;
394 }
395
396 /* is it visible to the cpu? */
397 if (*cpu_vaddr != v2) {
398 gk20a_err(dev_from_gk20a(g),
399 "cpu didn't see bar1 write @ %p!",
400 cpu_vaddr);
401 }
402
403 /* put it back */
404 gk20a_bar1_writel(g, bar1_vaddr, v);
405 }
406
407 gk20a_dbg_fn("done");
408
409 return 0;
410}
411
412int vgpu_init_fifo_support(struct gk20a *g)
413{
414 u32 err;
415
416 gk20a_dbg_fn("");
417
418 err = vgpu_init_fifo_setup_sw(g);
419 if (err)
420 return err;
421
422 err = vgpu_init_fifo_setup_hw(g);
423 return err;
424}
425
426static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
427{
428 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
429 struct fifo_gk20a *f = &g->fifo;
430 struct tegra_vgpu_cmd_msg msg;
431 struct tegra_vgpu_channel_config_params *p =
432 &msg.params.channel_config;
433 int err;
434
435 gk20a_dbg_fn("");
436
437 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
438 msg.handle = platform->virt_handle;
439 p->handle = f->channel[hw_chid].virt_ctx;
440 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
441
442 if (err || msg.ret) {
443 gk20a_err(dev_from_gk20a(g),
444 "preempt channel %d failed\n", hw_chid);
445 err = -ENOMEM;
446 }
447
448 return err;
449}
450
451static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
452 u32 num_entries)
453{
454 struct tegra_vgpu_cmd_msg *msg;
455 struct tegra_vgpu_runlist_params *p;
456 size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
457 char *ptr;
458 int err;
459
460 msg = kmalloc(size, GFP_KERNEL);
461 if (!msg)
462 return -1;
463
464 msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
465 msg->handle = handle;
466 p = &msg->params.runlist;
467 p->runlist_id = runlist_id;
468 p->num_entries = num_entries;
469
470 ptr = (char *)msg + sizeof(*msg);
471 memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
472 err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
473
474 err = (err || msg->ret) ? -1 : 0;
475 kfree(msg);
476 return err;
477}
478
479static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
480 u32 hw_chid, bool add,
481 bool wait_for_finish)
482{
483 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
484 struct fifo_gk20a *f = &g->fifo;
485 struct fifo_runlist_info_gk20a *runlist;
486 u16 *runlist_entry = NULL;
487 u32 count = 0;
488
489 gk20a_dbg_fn("");
490
491 runlist = &f->runlist_info[runlist_id];
492
493 /* valid channel, add/remove it from active list.
494 Otherwise, keep active list untouched for suspend/resume. */
495 if (hw_chid != ~0) {
496 if (add) {
497 if (test_and_set_bit(hw_chid,
498 runlist->active_channels) == 1)
499 return 0;
500 } else {
501 if (test_and_clear_bit(hw_chid,
502 runlist->active_channels) == 0)
503 return 0;
504 }
505 }
506
507 if (hw_chid != ~0 || /* add/remove a valid channel */
508 add /* resume to add all channels back */) {
509 u32 chid;
510
511 runlist_entry = runlist->mem[0].cpuva;
512 for_each_set_bit(chid,
513 runlist->active_channels, f->num_channels) {
514 gk20a_dbg_info("add channel %d to runlist", chid);
515 runlist_entry[0] = chid;
516 runlist_entry++;
517 count++;
518 }
519 } else /* suspend to remove all channels */
520 count = 0;
521
522 return vgpu_submit_runlist(platform->virt_handle, runlist_id,
523 runlist->mem[0].cpuva, count);
524}
525
526/* add/remove a channel from runlist
527 special cases below: runlist->active_channels will NOT be changed.
528 (hw_chid == ~0 && !add) means remove all active channels from runlist.
529 (hw_chid == ~0 && add) means restore all active channels on runlist. */
530static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
531 u32 hw_chid, bool add, bool wait_for_finish)
532{
533 struct fifo_runlist_info_gk20a *runlist = NULL;
534 struct fifo_gk20a *f = &g->fifo;
535 u32 ret = 0;
536
537 gk20a_dbg_fn("");
538
539 runlist = &f->runlist_info[runlist_id];
540
541 mutex_lock(&runlist->mutex);
542
543 ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
544 wait_for_finish);
545
546 mutex_unlock(&runlist->mutex);
547 return ret;
548}
549
550static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
551{
552 gk20a_dbg_fn("");
553
554 return 0;
555}
556
557void vgpu_init_fifo_ops(struct gpu_ops *gops)
558{
559 gops->fifo.bind_channel = vgpu_channel_bind;
560 gops->fifo.unbind_channel = vgpu_channel_unbind;
561 gops->fifo.disable_channel = vgpu_channel_disable;
562 gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
563 gops->fifo.free_inst = vgpu_channel_free_inst;
564 gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
565 gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
566 gops->fifo.update_runlist = vgpu_fifo_update_runlist;
567 gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
568}
569
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
new file mode 100644
index 00000000..a7e966da
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -0,0 +1,687 @@
1/*
2 * Virtualized GPU Graphics
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "vgpu/vgpu.h"
17#include "gk20a/hw_gr_gk20a.h"
18
19static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
20{
21 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
22 struct tegra_vgpu_cmd_msg msg;
23 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
24 int err;
25
26 gk20a_dbg_fn("");
27
28 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX;
29 msg.handle = platform->virt_handle;
30 p->handle = c->virt_ctx;
31 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
32
33 return (err || msg.ret) ? -1 : 0;
34}
35
36static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g,
37 struct channel_gk20a *c, bool patch)
38{
39 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
40 struct tegra_vgpu_cmd_msg msg;
41 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
42 int err;
43
44 gk20a_dbg_fn("");
45
46 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX;
47 msg.handle = platform->virt_handle;
48 p->handle = c->virt_ctx;
49 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
50
51 return (err || msg.ret) ? -1 : 0;
52}
53
54/* load saved fresh copy of gloden image into channel gr_ctx */
55static int vgpu_gr_load_golden_ctx_image(struct gk20a *g,
56 struct channel_gk20a *c)
57{
58 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
59 struct tegra_vgpu_cmd_msg msg;
60 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
61 int err;
62
63 gk20a_dbg_fn("");
64
65 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX;
66 msg.handle = platform->virt_handle;
67 p->handle = c->virt_ctx;
68 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
69
70 return (err || msg.ret) ? -1 : 0;
71}
72
73static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
74{
75 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
76
77 gk20a_dbg_fn("");
78
79 vgpu_get_attribute(platform->virt_handle,
80 TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE,
81 &g->gr.ctx_vars.golden_image_size);
82 vgpu_get_attribute(platform->virt_handle,
83 TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE,
84 &g->gr.ctx_vars.zcull_ctxsw_image_size);
85 if (!g->gr.ctx_vars.golden_image_size ||
86 !g->gr.ctx_vars.zcull_ctxsw_image_size)
87 return -ENXIO;
88
89 gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
90 g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
91 return 0;
92}
93
94static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
95{
96 struct gr_gk20a *gr = &g->gr;
97 int attr_buffer_size;
98
99 u32 cb_buffer_size = gr->bundle_cb_default_size *
100 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
101
102 u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
103 gr_scc_pagepool_total_pages_byte_granularity_v();
104
105 gk20a_dbg_fn("");
106
107 attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
108
109 gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
110 gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
111
112 gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
113 gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
114
115 gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
116 gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
117
118 gk20a_dbg_info("priv access map size : %d",
119 gr->ctx_vars.priv_access_map_size);
120 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size =
121 gr->ctx_vars.priv_access_map_size;
122
123 return 0;
124}
125
126static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
127 struct channel_gk20a *c)
128{
129 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
130 struct tegra_vgpu_cmd_msg msg;
131 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
132 struct vm_gk20a *ch_vm = c->vm;
133 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
134 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
135 struct gr_gk20a *gr = &g->gr;
136 u64 gpu_va;
137 u32 i;
138 int err;
139
140 gk20a_dbg_fn("");
141
142 /* FIXME: add VPR support */
143
144 /* Circular Buffer */
145 gpu_va = gk20a_vm_alloc_va(ch_vm,
146 gr->global_ctx_buffer[CIRCULAR].size, 0);
147
148 if (!gpu_va)
149 goto clean_up;
150 g_bfr_va[CIRCULAR_VA] = gpu_va;
151 g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size;
152
153 /* Attribute Buffer */
154 gpu_va = gk20a_vm_alloc_va(ch_vm,
155 gr->global_ctx_buffer[ATTRIBUTE].size, 0);
156
157 if (!gpu_va)
158 goto clean_up;
159 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
160 g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size;
161
162 /* Page Pool */
163 gpu_va = gk20a_vm_alloc_va(ch_vm,
164 gr->global_ctx_buffer[PAGEPOOL].size, 0);
165 if (!gpu_va)
166 goto clean_up;
167 g_bfr_va[PAGEPOOL_VA] = gpu_va;
168 g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size;
169
170 /* Priv register Access Map */
171 gpu_va = gk20a_vm_alloc_va(ch_vm,
172 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0);
173 if (!gpu_va)
174 goto clean_up;
175 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
176 g_bfr_size[PRIV_ACCESS_MAP_VA] =
177 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
178
179 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
180 msg.handle = platform->virt_handle;
181 p->handle = c->virt_ctx;
182 p->cb_va = g_bfr_va[CIRCULAR_VA];
183 p->attr_va = g_bfr_va[ATTRIBUTE_VA];
184 p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
185 p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
186 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
187 if (err || msg.ret)
188 goto clean_up;
189
190 c->ch_ctx.global_ctx_buffer_mapped = true;
191 return 0;
192
193 clean_up:
194 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
195 if (g_bfr_va[i]) {
196 gk20a_vm_free_va(ch_vm, g_bfr_va[i],
197 g_bfr_size[i], 0);
198 g_bfr_va[i] = 0;
199 }
200 }
201 return -ENOMEM;
202}
203
204static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
205{
206 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
207 struct vm_gk20a *ch_vm = c->vm;
208 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
209 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
210 u32 i;
211
212 gk20a_dbg_fn("");
213
214 if (c->ch_ctx.global_ctx_buffer_mapped) {
215 struct tegra_vgpu_cmd_msg msg;
216 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
217 int err;
218
219 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
220 msg.handle = platform->virt_handle;
221 p->handle = c->virt_ctx;
222 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
223 WARN_ON(err || msg.ret);
224 }
225
226 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
227 if (g_bfr_va[i]) {
228 gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0);
229 g_bfr_va[i] = 0;
230 g_bfr_size[i] = 0;
231 }
232 }
233 c->ch_ctx.global_ctx_buffer_mapped = false;
234}
235
236static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g,
237 struct channel_gk20a *c)
238{
239 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
240 struct tegra_vgpu_cmd_msg msg;
241 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
242 struct gr_gk20a *gr = &g->gr;
243 struct gr_ctx_desc *gr_ctx;
244 struct vm_gk20a *ch_vm = c->vm;
245 int err;
246
247 gk20a_dbg_fn("");
248
249 if (gr->ctx_vars.buffer_size == 0)
250 return 0;
251
252 /* alloc channel gr ctx buffer */
253 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
254 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
255
256 gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
257 if (!gr_ctx)
258 return -ENOMEM;
259
260 gr_ctx->size = gr->ctx_vars.buffer_total_size;
261 gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0);
262
263 if (!gr_ctx->gpu_va) {
264 kfree(gr_ctx);
265 return -ENOMEM;
266 }
267
268 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX;
269 msg.handle = platform->virt_handle;
270 p->handle = c->virt_ctx;
271 p->gr_ctx_va = gr_ctx->gpu_va;
272 p->class_num = c->obj_class;
273 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
274
275 if (err || msg.ret) {
276 gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0);
277 err = -ENOMEM;
278 } else
279 c->ch_ctx.gr_ctx = gr_ctx;
280
281 return err;
282}
283
284static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
285{
286 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
287 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
288 struct vm_gk20a *ch_vm = c->vm;
289
290 gk20a_dbg_fn("");
291
292 if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) {
293 struct tegra_vgpu_cmd_msg msg;
294 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
295 int err;
296
297 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
298 msg.handle = platform->virt_handle;
299 p->handle = c->virt_ctx;
300 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
301 WARN_ON(err || msg.ret);
302
303 gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va,
304 ch_ctx->gr_ctx->size, 0);
305 ch_ctx->gr_ctx->gpu_va = 0;
306 kfree(ch_ctx->gr_ctx);
307 }
308}
309
310static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
311 struct channel_gk20a *c)
312{
313 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
314 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
315 struct vm_gk20a *ch_vm = c->vm;
316 struct tegra_vgpu_cmd_msg msg;
317 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
318 int err;
319
320 gk20a_dbg_fn("");
321
322 patch_ctx->size = 128 * sizeof(u32);
323 patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0);
324 if (!patch_ctx->gpu_va)
325 return -ENOMEM;
326
327 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
328 msg.handle = platform->virt_handle;
329 p->handle = c->virt_ctx;
330 p->patch_ctx_va = patch_ctx->gpu_va;
331 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332 if (err || msg.ret) {
333 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
334 err = -ENOMEM;
335 }
336
337 return err;
338}
339
340static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
341{
342 struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
343 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
344 struct vm_gk20a *ch_vm = c->vm;
345
346 gk20a_dbg_fn("");
347
348 if (patch_ctx->gpu_va) {
349 struct tegra_vgpu_cmd_msg msg;
350 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
351 int err;
352
353 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
354 msg.handle = platform->virt_handle;
355 p->handle = c->virt_ctx;
356 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
357 WARN_ON(err || msg.ret);
358
359 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
360 patch_ctx->gpu_va = 0;
361 }
362}
363
364static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
365{
366 gk20a_dbg_fn("");
367
368 vgpu_gr_unmap_global_ctx_buffers(c);
369 vgpu_gr_free_channel_patch_ctx(c);
370 if (!gk20a_is_channel_marked_as_tsg(c))
371 vgpu_gr_free_channel_gr_ctx(c);
372
373 /* zcull_ctx, pm_ctx */
374
375 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
376
377 c->num_objects = 0;
378 c->first_init = false;
379}
380
381static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c,
382 struct nvhost_alloc_obj_ctx_args *args)
383{
384 struct gk20a *g = c->g;
385 struct fifo_gk20a *f = &g->fifo;
386 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
387 struct tsg_gk20a *tsg = NULL;
388 int err = 0;
389
390 gk20a_dbg_fn("");
391
392 /* an address space needs to have been bound at this point.*/
393 if (!gk20a_channel_as_bound(c)) {
394 gk20a_err(dev_from_gk20a(g),
395 "not bound to address space at time"
396 " of grctx allocation");
397 return -EINVAL;
398 }
399
400 if (!g->ops.gr.is_valid_class(g, args->class_num)) {
401 gk20a_err(dev_from_gk20a(g),
402 "invalid obj class 0x%x", args->class_num);
403 err = -EINVAL;
404 goto out;
405 }
406 c->obj_class = args->class_num;
407
408 /* FIXME: add TSG support */
409 if (gk20a_is_channel_marked_as_tsg(c))
410 tsg = &f->tsg[c->tsgid];
411
412 /* allocate gr ctx buffer */
413 if (!ch_ctx->gr_ctx) {
414 err = vgpu_gr_alloc_channel_gr_ctx(g, c);
415 if (err) {
416 gk20a_err(dev_from_gk20a(g),
417 "fail to allocate gr ctx buffer");
418 goto out;
419 }
420 } else {
421 /*TBD: needs to be more subtle about which is
422 * being allocated as some are allowed to be
423 * allocated along same channel */
424 gk20a_err(dev_from_gk20a(g),
425 "too many classes alloc'd on same channel");
426 err = -EINVAL;
427 goto out;
428 }
429
430 /* commit gr ctx buffer */
431 err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
432 if (err) {
433 gk20a_err(dev_from_gk20a(g),
434 "fail to commit gr ctx buffer");
435 goto out;
436 }
437
438 /* allocate patch buffer */
439 if (ch_ctx->patch_ctx.pages == NULL) {
440 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
441 if (err) {
442 gk20a_err(dev_from_gk20a(g),
443 "fail to allocate patch buffer");
444 goto out;
445 }
446 }
447
448 /* map global buffer to channel gpu_va and commit */
449 if (!ch_ctx->global_ctx_buffer_mapped) {
450 err = vgpu_gr_map_global_ctx_buffers(g, c);
451 if (err) {
452 gk20a_err(dev_from_gk20a(g),
453 "fail to map global ctx buffer");
454 goto out;
455 }
456 gr_gk20a_elpg_protected_call(g,
457 vgpu_gr_commit_global_ctx_buffers(g, c, true));
458 }
459
460 /* load golden image */
461 if (!c->first_init) {
462 err = gr_gk20a_elpg_protected_call(g,
463 vgpu_gr_load_golden_ctx_image(g, c));
464 if (err) {
465 gk20a_err(dev_from_gk20a(g),
466 "fail to load golden ctx image");
467 goto out;
468 }
469 c->first_init = true;
470 }
471
472 c->num_objects++;
473
474 gk20a_dbg_fn("done");
475 return 0;
476out:
477 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
478 can be reused so no need to release them.
479 2. golden image load is a one time thing so if
480 they pass, no need to undo. */
481 gk20a_err(dev_from_gk20a(g), "fail");
482 return err;
483}
484
485static int vgpu_gr_free_obj_ctx(struct channel_gk20a *c,
486 struct nvhost_free_obj_ctx_args *args)
487{
488 unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
489
490 gk20a_dbg_fn("");
491
492 if (c->num_objects == 0)
493 return 0;
494
495 c->num_objects--;
496
497 if (c->num_objects == 0) {
498 c->first_init = false;
499 gk20a_disable_channel(c,
500 !c->has_timedout,
501 timeout);
502 }
503
504 return 0;
505}
506
507static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
508{
509 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
510
511 gk20a_dbg_fn("");
512
513 if (vgpu_get_attribute(platform->virt_handle,
514 TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
515 return -ENOMEM;
516
517 if (vgpu_get_attribute(platform->virt_handle,
518 TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
519 &gr->max_tpc_per_gpc_count))
520 return -ENOMEM;
521
522 if (vgpu_get_attribute(platform->virt_handle,
523 TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
524 &gr->max_tpc_count))
525 return -ENOMEM;
526
527 g->ops.gr.bundle_cb_defaults(g);
528 g->ops.gr.cb_size_default(g);
529 g->ops.gr.calc_global_ctx_buffer_size(g);
530 return 0;
531}
532
533static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
534 struct channel_gk20a *c, u64 zcull_va,
535 u32 mode)
536{
537 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
538 struct tegra_vgpu_cmd_msg msg;
539 struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
540 int err;
541
542 gk20a_dbg_fn("");
543
544 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL;
545 msg.handle = platform->virt_handle;
546 p->handle = c->virt_ctx;
547 p->zcull_va = zcull_va;
548 p->mode = mode;
549 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
550
551 return (err || msg.ret) ? -ENOMEM : 0;
552}
553
554static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
555 struct gr_zcull_info *zcull_params)
556{
557 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
558 struct tegra_vgpu_cmd_msg msg;
559 struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
560 int err;
561
562 gk20a_dbg_fn("");
563
564 msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO;
565 msg.handle = platform->virt_handle;
566 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
567 if (err || msg.ret)
568 return -ENOMEM;
569
570 zcull_params->width_align_pixels = p->width_align_pixels;
571 zcull_params->height_align_pixels = p->height_align_pixels;
572 zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots;
573 zcull_params->aliquot_total = p->aliquot_total;
574 zcull_params->region_byte_multiplier = p->region_byte_multiplier;
575 zcull_params->region_header_size = p->region_header_size;
576 zcull_params->subregion_header_size = p->subregion_header_size;
577 zcull_params->subregion_width_align_pixels =
578 p->subregion_width_align_pixels;
579 zcull_params->subregion_height_align_pixels =
580 p->subregion_height_align_pixels;
581 zcull_params->subregion_count = p->subregion_count;
582
583 return 0;
584}
585
586static void vgpu_remove_gr_support(struct gr_gk20a *gr)
587{
588 gk20a_dbg_fn("");
589
590 gk20a_allocator_destroy(&gr->comp_tags);
591}
592
593static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
594{
595 struct gr_gk20a *gr = &g->gr;
596 int err;
597
598 gk20a_dbg_fn("");
599
600 if (gr->sw_ready) {
601 gk20a_dbg_fn("skip init");
602 return 0;
603 }
604
605 gr->g = g;
606
607 err = vgpu_gr_init_gr_config(g, gr);
608 if (err)
609 goto clean_up;
610
611 err = vgpu_gr_init_ctx_state(g, gr);
612 if (err)
613 goto clean_up;
614
615 err = g->ops.ltc.init_comptags(g, gr);
616 if (err)
617 goto clean_up;
618
619 err = vgpu_gr_alloc_global_ctx_buffers(g);
620 if (err)
621 goto clean_up;
622
623 mutex_init(&gr->ctx_mutex);
624
625 gr->remove_support = vgpu_remove_gr_support;
626 gr->sw_ready = true;
627
628 gk20a_dbg_fn("done");
629 return 0;
630
631clean_up:
632 gk20a_err(dev_from_gk20a(g), "fail");
633 vgpu_remove_gr_support(gr);
634 return err;
635}
636
637int vgpu_init_gr_support(struct gk20a *g)
638{
639 gk20a_dbg_fn("");
640
641 return vgpu_gr_init_gr_setup_sw(g);
642}
643
644struct gr_isr_data {
645 u32 addr;
646 u32 data_lo;
647 u32 data_hi;
648 u32 curr_ctx;
649 u32 chid;
650 u32 offset;
651 u32 sub_chan;
652 u32 class_num;
653};
654
655static int vgpu_gr_handle_notify_pending(struct gk20a *g,
656 struct gr_isr_data *isr_data)
657{
658 struct fifo_gk20a *f = &g->fifo;
659 struct channel_gk20a *ch = &f->channel[isr_data->chid];
660
661 gk20a_dbg_fn("");
662 wake_up(&ch->notifier_wq);
663 return 0;
664}
665
666int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
667{
668 struct gr_isr_data isr_data;
669
670 gk20a_dbg_fn("");
671
672 isr_data.chid = info->chid;
673
674 if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY)
675 vgpu_gr_handle_notify_pending(g, &isr_data);
676
677 return 0;
678}
679
680void vgpu_init_gr_ops(struct gpu_ops *gops)
681{
682 gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
683 gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx;
684 gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx;
685 gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull;
686 gops->gr.get_zcull_info = vgpu_gr_get_zcull_info;
687}
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
new file mode 100644
index 00000000..ddff23b7
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -0,0 +1,55 @@
1/*
2 * Virtualized GPU L2
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "vgpu/vgpu.h"
17
18static int vgpu_determine_L2_size_bytes(struct gk20a *g)
19{
20 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
21 u32 cache_size = 0;
22
23 gk20a_dbg_fn("");
24
25 if (vgpu_get_attribute(platform->virt_handle,
26 TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size))
27 dev_err(dev_from_gk20a(g), "unable to get L2 size");
28
29 return cache_size;
30}
31
32static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
33{
34 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
35 u32 max_comptag_lines = 0;
36
37 gk20a_dbg_fn("");
38
39 vgpu_get_attribute(platform->virt_handle,
40 TEGRA_VGPU_ATTRIB_COMPTAG_LINES, &max_comptag_lines);
41 if (max_comptag_lines < 2)
42 return -ENXIO;
43
44 gk20a_allocator_init(&gr->comp_tags, "comptag",
45 1, /* start */
46 max_comptag_lines - 1, /* length*/
47 1); /* align */
48 return 0;
49}
50
51void vgpu_init_ltc_ops(struct gpu_ops *gops)
52{
53 gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes;
54 gops->ltc.init_comptags = vgpu_ltc_init_comptags;
55}
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
new file mode 100644
index 00000000..6ed1dece
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -0,0 +1,425 @@
1/*
2 * Virtualized GPU Memory Management
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18
19/* note: keep the page sizes sorted lowest to highest here */
20static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
22
23static int vgpu_init_mm_setup_sw(struct gk20a *g)
24{
25 struct mm_gk20a *mm = &g->mm;
26
27 gk20a_dbg_fn("");
28
29 if (mm->sw_ready) {
30 gk20a_dbg_fn("skip init");
31 return 0;
32 }
33
34 mm->g = g;
35 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
36 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
37 mm->pde_stride = mm->big_page_size << 10;
38 mm->pde_stride_shift = ilog2(mm->pde_stride);
39 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
40
41 /*TBD: make channel vm size configurable */
42 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
43
44 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
45
46 mm->sw_ready = true;
47
48 return 0;
49}
50
51int vgpu_init_mm_support(struct gk20a *g)
52{
53 gk20a_dbg_fn("");
54
55 return vgpu_init_mm_setup_sw(g);
56}
57
58static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
59 u64 map_offset,
60 struct sg_table *sgt,
61 u64 buffer_offset,
62 u64 size,
63 int pgsz_idx,
64 u8 kind_v,
65 u32 ctag_offset,
66 u32 flags,
67 int rw_flag,
68 bool clear_ctags)
69{
70 int err = 0;
71 struct device *d = dev_from_vm(vm);
72 struct gk20a *g = gk20a_from_vm(vm);
73 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
74 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
77 u64 addr = gk20a_mm_iova_addr(sgt->sgl);
78 u8 prot;
79
80 gk20a_dbg_fn("");
81
82 /* Allocate (or validate when map_offset != 0) the virtual address. */
83 if (!map_offset) {
84 map_offset = gk20a_vm_alloc_va(vm, size,
85 pgsz_idx);
86 if (!map_offset) {
87 gk20a_err(d, "failed to allocate va space");
88 err = -ENOMEM;
89 goto fail;
90 }
91 }
92
93 if (rw_flag == gk20a_mem_flag_read_only)
94 prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
95 else if (rw_flag == gk20a_mem_flag_write_only)
96 prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
97 else
98 prot = TEGRA_VGPU_MAP_PROT_NONE;
99
100 msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
101 msg.handle = platform->virt_handle;
102 p->handle = vm->handle;
103 p->addr = addr;
104 p->gpu_va = map_offset;
105 p->size = size;
106 p->pgsz_idx = pgsz_idx;
107 p->iova = mapping ? 1 : 0;
108 p->kind = kind_v;
109 p->cacheable =
110 (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
111 p->prot = prot;
112 p->ctag_offset = ctag_offset;
113 p->clear_ctags = clear_ctags;
114 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
115 if (err || msg.ret)
116 goto fail;
117
118 vm->tlb_dirty = true;
119 return map_offset;
120fail:
121 gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
122 return 0;
123}
124
125static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
126 u64 vaddr,
127 u64 size,
128 int pgsz_idx,
129 bool va_allocated,
130 int rw_flag)
131{
132 struct gk20a *g = gk20a_from_vm(vm);
133 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 if (va_allocated) {
141 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
142 if (err) {
143 dev_err(dev_from_vm(vm),
144 "failed to free va");
145 return;
146 }
147 }
148
149 msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
150 msg.handle = platform->virt_handle;
151 p->handle = vm->handle;
152 p->gpu_va = vaddr;
153 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
154 if (err || msg.ret)
155 dev_err(dev_from_vm(vm),
156 "failed to update gmmu ptes on unmap");
157
158 vm->tlb_dirty = true;
159}
160
161static void vgpu_vm_remove_support(struct vm_gk20a *vm)
162{
163 struct gk20a *g = vm->mm->g;
164 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
165 struct mapped_buffer_node *mapped_buffer;
166 struct vm_reserved_va_node *va_node, *va_node_tmp;
167 struct tegra_vgpu_cmd_msg msg;
168 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
169 struct rb_node *node;
170 int err;
171
172 gk20a_dbg_fn("");
173 mutex_lock(&vm->update_gmmu_lock);
174
175 /* TBD: add a flag here for the unmap code to recognize teardown
176 * and short-circuit any otherwise expensive operations. */
177
178 node = rb_first(&vm->mapped_buffers);
179 while (node) {
180 mapped_buffer =
181 container_of(node, struct mapped_buffer_node, node);
182 gk20a_vm_unmap_locked(mapped_buffer);
183 node = rb_first(&vm->mapped_buffers);
184 }
185
186 /* destroy remaining reserved memory areas */
187 list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
188 reserved_va_list) {
189 list_del(&va_node->reserved_va_list);
190 kfree(va_node);
191 }
192
193 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
194 msg.handle = platform->virt_handle;
195 p->handle = vm->handle;
196 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
197 WARN_ON(err || msg.ret);
198
199 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
200 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
201
202 mutex_unlock(&vm->update_gmmu_lock);
203
204 /* release zero page if used */
205 if (vm->zero_page_cpuva)
206 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
207 vm->zero_page_cpuva, vm->zero_page_iova);
208
209 /* vm is not used anymore. release it. */
210 kfree(vm);
211}
212
213u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
214{
215 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
216 struct dma_iommu_mapping *mapping =
217 to_dma_iommu_mapping(dev_from_gk20a(g));
218 u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
219 struct tegra_vgpu_cmd_msg msg;
220 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
221 int err;
222
223 msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
224 msg.handle = platform->virt_handle;
225 p->addr = addr;
226 p->size = size;
227 p->iova = mapping ? 1 : 0;
228 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
229 if (err || msg.ret)
230 addr = 0;
231 else
232 addr = p->gpu_va;
233
234 return addr;
235}
236
237/* address space interfaces for the gk20a module */
238static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
239{
240 struct gk20a_as *as = as_share->as;
241 struct gk20a *g = gk20a_from_as(as);
242 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
243 struct tegra_vgpu_cmd_msg msg;
244 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
245 struct mm_gk20a *mm = &g->mm;
246 struct vm_gk20a *vm;
247 u64 vma_size;
248 u32 num_pages, low_hole_pages;
249 char name[32];
250 int err;
251
252 gk20a_dbg_fn("");
253
254 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
255 if (!vm)
256 return -ENOMEM;
257
258 as_share->vm = vm;
259
260 vm->mm = mm;
261 vm->as_share = as_share;
262
263 vm->big_pages = true;
264
265 vm->va_start = mm->pde_stride; /* create a one pde hole */
266 vm->va_limit = mm->channel.size; /* note this means channel.size is
267 really just the max */
268
269 msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
270 msg.handle = platform->virt_handle;
271 p->size = vm->va_limit;
272 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
273 if (err || msg.ret)
274 return -ENOMEM;
275
276 vm->handle = p->handle;
277
278 /* low-half: alloc small pages */
279 /* high-half: alloc big pages */
280 vma_size = mm->channel.size >> 1;
281
282 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
283 gmmu_page_sizes[gmmu_page_size_small]>>10);
284 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
285
286 /* num_pages above is without regard to the low-side hole. */
287 low_hole_pages = (vm->va_start >>
288 gmmu_page_shifts[gmmu_page_size_small]);
289
290 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
291 low_hole_pages, /* start */
292 num_pages - low_hole_pages, /* length */
293 1); /* align */
294
295 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
296 gmmu_page_sizes[gmmu_page_size_big]>>10);
297
298 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
299 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
300 num_pages, /* start */
301 num_pages, /* length */
302 1); /* align */
303
304 vm->mapped_buffers = RB_ROOT;
305
306 mutex_init(&vm->update_gmmu_lock);
307 kref_init(&vm->ref);
308 INIT_LIST_HEAD(&vm->reserved_va_list);
309
310 vm->enable_ctag = true;
311
312 return 0;
313}
314
315static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
316 struct channel_gk20a *ch)
317{
318 struct vm_gk20a *vm = as_share->vm;
319 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
320 struct tegra_vgpu_cmd_msg msg;
321 struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
322 int err;
323
324 gk20a_dbg_fn("");
325
326 ch->vm = vm;
327 msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
328 msg.handle = platform->virt_handle;
329 p->as_handle = vm->handle;
330 p->chan_handle = ch->virt_ctx;
331 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332
333 if (err || msg.ret) {
334 ch->vm = NULL;
335 err = -ENOMEM;
336 }
337
338 return err;
339}
340
341static void vgpu_cache_maint(u64 handle, u8 op)
342{
343 struct tegra_vgpu_cmd_msg msg;
344 struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
345 int err;
346
347 msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
348 msg.handle = handle;
349 p->op = op;
350 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
351 WARN_ON(err || msg.ret);
352}
353
354static int vgpu_mm_fb_flush(struct gk20a *g)
355{
356 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
357
358 gk20a_dbg_fn("");
359
360 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
361 return 0;
362}
363
364static void vgpu_mm_l2_invalidate(struct gk20a *g)
365{
366 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
367
368 gk20a_dbg_fn("");
369
370 vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
371}
372
373static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
374{
375 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
376 u8 op;
377
378 gk20a_dbg_fn("");
379
380 if (invalidate)
381 op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
382 else
383 op = TEGRA_VGPU_L2_MAINT_FLUSH;
384
385 vgpu_cache_maint(platform->virt_handle, op);
386}
387
388static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
389{
390 struct gk20a *g = gk20a_from_vm(vm);
391 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
392 struct tegra_vgpu_cmd_msg msg;
393 struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
394 int err;
395
396 gk20a_dbg_fn("");
397
398 /* No need to invalidate if tlb is clean */
399 mutex_lock(&vm->update_gmmu_lock);
400 if (!vm->tlb_dirty) {
401 mutex_unlock(&vm->update_gmmu_lock);
402 return;
403 }
404
405 msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
406 msg.handle = platform->virt_handle;
407 p->handle = vm->handle;
408 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
409 WARN_ON(err || msg.ret);
410 vm->tlb_dirty = false;
411 mutex_unlock(&vm->update_gmmu_lock);
412}
413
414void vgpu_init_mm_ops(struct gpu_ops *gops)
415{
416 gops->mm.gmmu_map = vgpu_locked_gmmu_map;
417 gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
418 gops->mm.vm_remove = vgpu_vm_remove_support;
419 gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
420 gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
421 gops->mm.fb_flush = vgpu_mm_fb_flush;
422 gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
423 gops->mm.l2_flush = vgpu_mm_l2_flush;
424 gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
425}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
new file mode 100644
index 00000000..cfe307ff
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -0,0 +1,416 @@
1/*
2 * Virtualized GPU
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/kthread.h>
17#include <linux/delay.h>
18#include <linux/dma-mapping.h>
19#include <linux/pm_runtime.h>
20#include "vgpu/vgpu.h"
21#include "gk20a/debug_gk20a.h"
22#include "gk20a/hal_gk20a.h"
23#include "gk20a/hw_mc_gk20a.h"
24
25static inline int vgpu_comm_init(struct platform_device *pdev)
26{
27 size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
28
29 return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3,
30 queue_sizes, TEGRA_VGPU_QUEUE_CMD,
31 ARRAY_SIZE(queue_sizes));
32}
33
34static inline void vgpu_comm_deinit(void)
35{
36 size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
37
38 tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD,
39 ARRAY_SIZE(queue_sizes));
40}
41
42int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
43 size_t size_out)
44{
45 void *handle;
46 size_t size = size_in;
47 void *data = msg;
48 int err;
49
50 err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT,
51 tegra_gr_comm_get_server_vmid(),
52 TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size);
53 if (!err) {
54 WARN_ON(size < size_out);
55 memcpy(msg, data, size_out);
56 tegra_gr_comm_release(handle);
57 }
58
59 return err;
60}
61
62static u64 vgpu_connect(void)
63{
64 struct tegra_vgpu_cmd_msg msg;
65 struct tegra_vgpu_connect_params *p = &msg.params.connect;
66 int err;
67
68 msg.cmd = TEGRA_VGPU_CMD_CONNECT;
69 p->module = TEGRA_VGPU_MODULE_GPU;
70 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
71
72 return (err || msg.ret) ? 0 : p->handle;
73}
74
75int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
76{
77 struct tegra_vgpu_cmd_msg msg;
78 struct tegra_vgpu_attrib_params *p = &msg.params.attrib;
79 int err;
80
81 msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE;
82 msg.handle = handle;
83 p->attrib = attrib;
84 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85
86 if (err || msg.ret)
87 return -1;
88
89 *value = p->value;
90 return 0;
91}
92
93static int vgpu_intr_thread(void *dev_id)
94{
95 struct gk20a *g = dev_id;
96
97 while (true) {
98 struct tegra_vgpu_intr_msg *msg;
99 u32 sender;
100 void *handle;
101 size_t size;
102 int err;
103
104 err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT,
105 TEGRA_VGPU_QUEUE_INTR, &handle,
106 (void **)&msg, &size, &sender);
107 if (WARN_ON(err))
108 continue;
109
110 if (msg->event == TEGRA_VGPU_EVENT_ABORT) {
111 tegra_gr_comm_release(handle);
112 break;
113 }
114
115 if (msg->unit == TEGRA_VGPU_INTR_GR)
116 vgpu_gr_isr(g, &msg->info.gr_intr);
117
118 tegra_gr_comm_release(handle);
119 }
120
121 while (!kthread_should_stop())
122 msleep(10);
123 return 0;
124}
125
126static void vgpu_remove_support(struct platform_device *dev)
127{
128 struct gk20a *g = get_gk20a(dev);
129 struct gk20a_platform *platform = gk20a_get_platform(dev);
130 struct tegra_vgpu_intr_msg msg;
131 int err;
132
133 if (g->pmu.remove_support)
134 g->pmu.remove_support(&g->pmu);
135
136 if (g->gr.remove_support)
137 g->gr.remove_support(&g->gr);
138
139 if (g->fifo.remove_support)
140 g->fifo.remove_support(&g->fifo);
141
142 if (g->mm.remove_support)
143 g->mm.remove_support(&g->mm);
144
145 msg.event = TEGRA_VGPU_EVENT_ABORT;
146 err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT,
147 TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR,
148 &msg, sizeof(msg));
149 WARN_ON(err);
150 kthread_stop(platform->intr_handler);
151
152 /* free mappings to registers, etc*/
153
154 if (g->bar1) {
155 iounmap(g->bar1);
156 g->bar1 = 0;
157 }
158}
159
160static int vgpu_init_support(struct platform_device *dev)
161{
162 struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, 0);
163 struct gk20a *g = get_gk20a(dev);
164 int err = 0;
165
166 if (!r) {
167 dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n");
168 err = -ENXIO;
169 goto fail;
170 }
171
172 g->bar1 = devm_request_and_ioremap(&dev->dev, r);
173 if (!g->bar1) {
174 dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
175 err = -ENXIO;
176 goto fail;
177 }
178
179 mutex_init(&g->dbg_sessions_lock);
180 mutex_init(&g->client_lock);
181
182 g->remove_support = vgpu_remove_support;
183 return 0;
184
185 fail:
186 vgpu_remove_support(dev);
187 return err;
188}
189
190int vgpu_pm_prepare_poweroff(struct device *dev)
191{
192 struct platform_device *pdev = to_platform_device(dev);
193 struct gk20a *g = get_gk20a(pdev);
194 int ret = 0;
195
196 gk20a_dbg_fn("");
197
198 if (!g->power_on)
199 return 0;
200
201 ret = gk20a_channel_suspend(g);
202 if (ret)
203 return ret;
204
205 g->power_on = false;
206
207 return ret;
208}
209
210static void vgpu_detect_chip(struct gk20a *g)
211{
212 struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
213 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
214
215 u32 mc_boot_0_value;
216
217 if (vgpu_get_attribute(platform->virt_handle,
218 TEGRA_VGPU_ATTRIB_PMC_BOOT_0,
219 &mc_boot_0_value)) {
220 gk20a_err(dev_from_gk20a(g), "failed to detect chip");
221 return;
222 }
223
224 gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
225 NVHOST_GPU_ARCHITECTURE_SHIFT;
226 gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
227 gpu->rev =
228 (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
229 mc_boot_0_minor_revision_v(mc_boot_0_value);
230
231 gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
232 g->gpu_characteristics.arch,
233 g->gpu_characteristics.impl,
234 g->gpu_characteristics.rev);
235}
236
237static int vgpu_init_hal(struct gk20a *g)
238{
239 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
240
241 switch (ver) {
242 case GK20A_GPUID_GK20A:
243 gk20a_dbg_info("gk20a detected");
244 /* init gk20a ops then override with virt extensions */
245 gk20a_init_hal(&g->ops);
246 vgpu_init_fifo_ops(&g->ops);
247 vgpu_init_gr_ops(&g->ops);
248 vgpu_init_ltc_ops(&g->ops);
249 vgpu_init_mm_ops(&g->ops);
250 break;
251 default:
252 gk20a_err(&g->dev->dev, "no support for %x", ver);
253 return -ENODEV;
254 }
255
256 return 0;
257}
258
259int vgpu_pm_finalize_poweron(struct device *dev)
260{
261 struct platform_device *pdev = to_platform_device(dev);
262 struct gk20a *g = get_gk20a(pdev);
263 int err;
264
265 gk20a_dbg_fn("");
266
267 if (g->power_on)
268 return 0;
269
270 g->power_on = true;
271
272 vgpu_detect_chip(g);
273 err = vgpu_init_hal(g);
274 if (err)
275 goto done;
276
277 err = vgpu_init_mm_support(g);
278 if (err) {
279 gk20a_err(dev, "failed to init gk20a mm");
280 goto done;
281 }
282
283 err = vgpu_init_fifo_support(g);
284 if (err) {
285 gk20a_err(dev, "failed to init gk20a fifo");
286 goto done;
287 }
288
289 err = vgpu_init_gr_support(g);
290 if (err) {
291 gk20a_err(dev, "failed to init gk20a gr");
292 goto done;
293 }
294
295 err = gk20a_init_gpu_characteristics(g);
296 if (err) {
297 gk20a_err(dev, "failed to init gk20a gpu characteristics");
298 goto done;
299 }
300
301 gk20a_channel_resume(g);
302
303done:
304 return err;
305}
306
307static int vgpu_pm_init(struct platform_device *dev)
308{
309 int err = 0;
310
311 gk20a_dbg_fn("");
312
313 pm_runtime_enable(&dev->dev);
314 return err;
315}
316
317int vgpu_probe(struct platform_device *dev)
318{
319 struct gk20a *gk20a;
320 int err;
321 struct gk20a_platform *platform = gk20a_get_platform(dev);
322
323 if (!platform) {
324 dev_err(&dev->dev, "no platform data\n");
325 return -ENODATA;
326 }
327
328 gk20a_dbg_fn("");
329
330 gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
331 if (!gk20a) {
332 dev_err(&dev->dev, "couldn't allocate gk20a support");
333 return -ENOMEM;
334 }
335
336 platform->g = gk20a;
337 gk20a->dev = dev;
338
339 err = gk20a_user_init(dev);
340 if (err)
341 return err;
342
343 vgpu_init_support(dev);
344
345 init_rwsem(&gk20a->busy_lock);
346
347 spin_lock_init(&gk20a->mc_enable_lock);
348
349 /* Initialize the platform interface. */
350 err = platform->probe(dev);
351 if (err) {
352 dev_err(&dev->dev, "platform probe failed");
353 return err;
354 }
355
356 err = vgpu_pm_init(dev);
357 if (err) {
358 dev_err(&dev->dev, "pm init failed");
359 return err;
360 }
361
362 if (platform->late_probe) {
363 err = platform->late_probe(dev);
364 if (err) {
365 dev_err(&dev->dev, "late probe failed");
366 return err;
367 }
368 }
369
370 err = vgpu_comm_init(dev);
371 if (err) {
372 dev_err(&dev->dev, "failed to init comm interface\n");
373 return -ENOSYS;
374 }
375
376 platform->virt_handle = vgpu_connect();
377 if (!platform->virt_handle) {
378 dev_err(&dev->dev, "failed to connect to server node\n");
379 vgpu_comm_deinit();
380 return -ENOSYS;
381 }
382
383 platform->intr_handler = kthread_run(vgpu_intr_thread, gk20a, "gk20a");
384 if (IS_ERR(platform->intr_handler))
385 return -ENOMEM;
386
387 gk20a_debug_init(dev);
388
389 /* Set DMA parameters to allow larger sgt lists */
390 dev->dev.dma_parms = &gk20a->dma_parms;
391 dma_set_max_seg_size(&dev->dev, UINT_MAX);
392
393 gk20a->gr_idle_timeout_default =
394 CONFIG_GK20A_DEFAULT_TIMEOUT;
395 gk20a->timeouts_enabled = true;
396
397 gk20a_create_sysfs(dev);
398 gk20a_init_gr(gk20a);
399
400 return 0;
401}
402
403int vgpu_remove(struct platform_device *dev)
404{
405 struct gk20a *g = get_gk20a(dev);
406 gk20a_dbg_fn("");
407
408 if (g->remove_support)
409 g->remove_support(dev);
410
411 vgpu_comm_deinit();
412 gk20a_user_deinit(dev);
413 gk20a_get_platform(dev)->g = NULL;
414 kfree(g);
415 return 0;
416}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h
new file mode 100644
index 00000000..445a1c90
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.h
@@ -0,0 +1,41 @@
1/*
2 * Virtualized GPU Interfaces
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _VIRT_H_
17#define _VIRT_H_
18
19#include <linux/tegra_gr_comm.h>
20#include <linux/tegra_vgpu.h>
21#include "gk20a/gk20a.h"
22
23int vgpu_pm_prepare_poweroff(struct device *dev);
24int vgpu_pm_finalize_poweron(struct device *dev);
25int vgpu_probe(struct platform_device *dev);
26int vgpu_remove(struct platform_device *dev);
27u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size);
28int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
29void vgpu_init_fifo_ops(struct gpu_ops *gops);
30void vgpu_init_gr_ops(struct gpu_ops *gops);
31void vgpu_init_ltc_ops(struct gpu_ops *gops);
32void vgpu_init_mm_ops(struct gpu_ops *gops);
33int vgpu_init_mm_support(struct gk20a *g);
34int vgpu_init_gr_support(struct gk20a *g);
35int vgpu_init_fifo_support(struct gk20a *g);
36
37int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value);
38int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
39 size_t size_out);
40
41#endif