summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
diff options
context:
space:
mode:
authorAingara Paramakuru <aparamakuru@nvidia.com>2014-05-05 21:14:22 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:01 -0400
commit1fd722f592c2e0523c5e399a2406a4e387057188 (patch)
tree3425fb1a08ec2ccc6397e39c73a5579117e00a05 /drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
parent69e0cd3dfd8f39bc8d3529325001dcacd774f669 (diff)
gpu: nvgpu: support gk20a virtualization
The nvgpu driver now supports using the Tegra graphics virtualization interfaces to support gk20a in a virtualized environment. Bug 1509608 Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676 Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com> Reviewed-on: http://git-master/r/440122 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu/fifo_vgpu.c')
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c569
1 files changed, 569 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
new file mode 100644
index 00000000..23dec1f3
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -0,0 +1,569 @@
1/*
2 * Virtualized GPU Fifo
3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h"
18#include "gk20a/hw_fifo_gk20a.h"
19#include "gk20a/hw_ram_gk20a.h"
20
21static void vgpu_channel_bind(struct channel_gk20a *ch)
22{
23 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
24 struct tegra_vgpu_cmd_msg msg;
25 struct tegra_vgpu_channel_config_params *p =
26 &msg.params.channel_config;
27 int err;
28
29 gk20a_dbg_info("bind channel %d", ch->hw_chid);
30
31 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
32 msg.handle = platform->virt_handle;
33 p->handle = ch->virt_ctx;
34 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
35 WARN_ON(err || msg.ret);
36
37 ch->bound = true;
38}
39
40static void vgpu_channel_unbind(struct channel_gk20a *ch)
41{
42 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
43
44 gk20a_dbg_fn("");
45
46 if (ch->bound) {
47 struct tegra_vgpu_cmd_msg msg;
48 struct tegra_vgpu_channel_config_params *p =
49 &msg.params.channel_config;
50 int err;
51
52 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
53 msg.handle = platform->virt_handle;
54 p->handle = ch->virt_ctx;
55 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
56 WARN_ON(err || msg.ret);
57 }
58
59 ch->bound = false;
60
61 /*
62 * if we are agrressive then we can destroy the syncpt
63 * resource at this point
64 * if not, then it will be destroyed at channel_free()
65 */
66 if (ch->sync && ch->sync->aggressive_destroy) {
67 ch->sync->destroy(ch->sync);
68 ch->sync = NULL;
69 }
70}
71
72static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
73{
74 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
75 struct tegra_vgpu_cmd_msg msg;
76 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
77 int err;
78
79 gk20a_dbg_fn("");
80
81 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
82 msg.handle = platform->virt_handle;
83 p->id = ch->hw_chid;
84 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85 if (err || msg.ret) {
86 gk20a_err(dev_from_gk20a(g), "fail");
87 return -ENOMEM;
88 }
89
90 ch->virt_ctx = p->handle;
91 gk20a_dbg_fn("done");
92 return 0;
93}
94
95static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
96{
97 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
98 struct tegra_vgpu_cmd_msg msg;
99 struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
100 int err;
101
102 gk20a_dbg_fn("");
103
104 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
105 msg.handle = platform->virt_handle;
106 p->handle = ch->virt_ctx;
107 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
108 WARN_ON(err || msg.ret);
109}
110
111static void vgpu_channel_disable(struct channel_gk20a *ch)
112{
113 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
114 struct tegra_vgpu_cmd_msg msg;
115 struct tegra_vgpu_channel_config_params *p =
116 &msg.params.channel_config;
117 int err;
118
119 gk20a_dbg_fn("");
120
121 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
122 msg.handle = platform->virt_handle;
123 p->handle = ch->virt_ctx;
124 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
125 WARN_ON(err || msg.ret);
126}
127
128static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
129 u32 gpfifo_entries)
130{
131 struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
132 struct device __maybe_unused *d = dev_from_gk20a(ch->g);
133 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
134 struct tegra_vgpu_cmd_msg msg;
135 struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
136 int err;
137
138 gk20a_dbg_fn("");
139
140 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
141 msg.handle = platform->virt_handle;
142 p->handle = ch->virt_ctx;
143 p->gpfifo_va = gpfifo_base;
144 p->num_entries = gpfifo_entries;
145 p->userd_addr = ch->userd_iova;
146 p->iova = mapping ? 1 : 0;
147 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
148
149 return (err || msg.ret) ? -ENOMEM : 0;
150}
151
152static int init_engine_info(struct fifo_gk20a *f)
153{
154 struct fifo_engine_info_gk20a *gr_info;
155 const u32 gr_sw_id = ENGINE_GR_GK20A;
156
157 gk20a_dbg_fn("");
158
159 /* all we really care about finding is the graphics entry */
160 /* especially early on in sim it probably thinks it has more */
161 f->num_engines = 1;
162
163 gr_info = f->engine_info + gr_sw_id;
164
165 gr_info->sw_id = gr_sw_id;
166 gr_info->name = "gr";
167 /* FIXME: retrieve this from server */
168 gr_info->runlist_id = 0;
169 return 0;
170}
171
172static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
173{
174 struct fifo_engine_info_gk20a *engine_info;
175 struct fifo_runlist_info_gk20a *runlist;
176 struct device *d = dev_from_gk20a(g);
177 u32 runlist_id;
178 u32 i;
179 u64 runlist_size;
180
181 gk20a_dbg_fn("");
182
183 f->max_runlists = fifo_eng_runlist_base__size_1_v();
184 f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
185 f->max_runlists, GFP_KERNEL);
186 if (!f->runlist_info)
187 goto clean_up;
188
189 engine_info = f->engine_info + ENGINE_GR_GK20A;
190 runlist_id = engine_info->runlist_id;
191 runlist = &f->runlist_info[runlist_id];
192
193 runlist->active_channels =
194 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
195 GFP_KERNEL);
196 if (!runlist->active_channels)
197 goto clean_up_runlist_info;
198
199 runlist_size = sizeof(u16) * f->num_channels;
200 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
201 dma_addr_t iova;
202
203 runlist->mem[i].cpuva =
204 dma_alloc_coherent(d,
205 runlist_size,
206 &iova,
207 GFP_KERNEL);
208 if (!runlist->mem[i].cpuva) {
209 dev_err(d, "memory allocation failed\n");
210 goto clean_up_runlist;
211 }
212 runlist->mem[i].iova = iova;
213 runlist->mem[i].size = runlist_size;
214 }
215 mutex_init(&runlist->mutex);
216 init_waitqueue_head(&runlist->runlist_wq);
217
218 /* None of buffers is pinned if this value doesn't change.
219 Otherwise, one of them (cur_buffer) must have been pinned. */
220 runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
221
222 gk20a_dbg_fn("done");
223 return 0;
224
225clean_up_runlist:
226 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
227 if (runlist->mem[i].cpuva)
228 dma_free_coherent(d,
229 runlist->mem[i].size,
230 runlist->mem[i].cpuva,
231 runlist->mem[i].iova);
232 runlist->mem[i].cpuva = NULL;
233 runlist->mem[i].iova = 0;
234 }
235
236 kfree(runlist->active_channels);
237 runlist->active_channels = NULL;
238
239clean_up_runlist_info:
240 kfree(f->runlist_info);
241 f->runlist_info = NULL;
242
243clean_up:
244 gk20a_dbg_fn("fail");
245 return -ENOMEM;
246}
247
248static int vgpu_init_fifo_setup_sw(struct gk20a *g)
249{
250 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
251 struct fifo_gk20a *f = &g->fifo;
252 struct device *d = dev_from_gk20a(g);
253 int chid, err = 0;
254 dma_addr_t iova;
255
256 gk20a_dbg_fn("");
257
258 if (f->sw_ready) {
259 gk20a_dbg_fn("skip init");
260 return 0;
261 }
262
263 f->g = g;
264
265 err = vgpu_get_attribute(platform->virt_handle,
266 TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
267 &f->num_channels);
268 if (err)
269 return -ENXIO;
270
271 f->max_engines = ENGINE_INVAL_GK20A;
272
273 f->userd_entry_size = 1 << ram_userd_base_shift_v();
274 f->userd_total_size = f->userd_entry_size * f->num_channels;
275
276 f->userd.cpuva = dma_alloc_coherent(d,
277 f->userd_total_size,
278 &iova,
279 GFP_KERNEL);
280 if (!f->userd.cpuva) {
281 dev_err(d, "memory allocation failed\n");
282 goto clean_up;
283 }
284
285 f->userd.iova = iova;
286 err = gk20a_get_sgtable(d, &f->userd.sgt,
287 f->userd.cpuva, f->userd.iova,
288 f->userd_total_size);
289 if (err) {
290 dev_err(d, "failed to create sg table\n");
291 goto clean_up;
292 }
293
294 /* bar1 va */
295 f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
296 if (!f->userd.gpu_va) {
297 dev_err(d, "gmmu mapping failed\n");
298 goto clean_up;
299 }
300
301 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
302
303 f->userd.size = f->userd_total_size;
304
305 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
306 GFP_KERNEL);
307 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
308 GFP_KERNEL);
309
310 if (!(f->channel && f->engine_info)) {
311 err = -ENOMEM;
312 goto clean_up;
313 }
314
315 init_engine_info(f);
316
317 init_runlist(g, f);
318
319 for (chid = 0; chid < f->num_channels; chid++) {
320 f->channel[chid].userd_cpu_va =
321 f->userd.cpuva + chid * f->userd_entry_size;
322 f->channel[chid].userd_iova =
323 NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
324 + chid * f->userd_entry_size;
325 f->channel[chid].userd_gpu_va =
326 f->userd.gpu_va + chid * f->userd_entry_size;
327
328 gk20a_init_channel_support(g, chid);
329 }
330 mutex_init(&f->ch_inuse_mutex);
331
332 f->deferred_reset_pending = false;
333 mutex_init(&f->deferred_reset_mutex);
334
335 f->sw_ready = true;
336
337 gk20a_dbg_fn("done");
338 return 0;
339
340clean_up:
341 gk20a_dbg_fn("fail");
342 /* FIXME: unmap from bar1 */
343 if (f->userd.sgt)
344 gk20a_free_sgtable(&f->userd.sgt);
345 if (f->userd.cpuva)
346 dma_free_coherent(d,
347 f->userd_total_size,
348 f->userd.cpuva,
349 f->userd.iova);
350 f->userd.cpuva = NULL;
351 f->userd.iova = 0;
352
353 memset(&f->userd, 0, sizeof(struct userd_desc));
354
355 kfree(f->channel);
356 f->channel = NULL;
357 kfree(f->engine_info);
358 f->engine_info = NULL;
359
360 return err;
361}
362
363static int vgpu_init_fifo_setup_hw(struct gk20a *g)
364{
365 gk20a_dbg_fn("");
366
367 /* test write, read through bar1 @ userd region before
368 * turning on the snooping */
369 {
370 struct fifo_gk20a *f = &g->fifo;
371 u32 v, v1 = 0x33, v2 = 0x55;
372
373 u32 bar1_vaddr = f->userd.gpu_va;
374 volatile u32 *cpu_vaddr = f->userd.cpuva;
375
376 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
377 bar1_vaddr);
378
379 v = gk20a_bar1_readl(g, bar1_vaddr);
380
381 *cpu_vaddr = v1;
382 smp_mb();
383
384 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
385 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
386 return -EINVAL;
387 }
388
389 gk20a_bar1_writel(g, bar1_vaddr, v2);
390
391 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
392 gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
393 return -EINVAL;
394 }
395
396 /* is it visible to the cpu? */
397 if (*cpu_vaddr != v2) {
398 gk20a_err(dev_from_gk20a(g),
399 "cpu didn't see bar1 write @ %p!",
400 cpu_vaddr);
401 }
402
403 /* put it back */
404 gk20a_bar1_writel(g, bar1_vaddr, v);
405 }
406
407 gk20a_dbg_fn("done");
408
409 return 0;
410}
411
412int vgpu_init_fifo_support(struct gk20a *g)
413{
414 u32 err;
415
416 gk20a_dbg_fn("");
417
418 err = vgpu_init_fifo_setup_sw(g);
419 if (err)
420 return err;
421
422 err = vgpu_init_fifo_setup_hw(g);
423 return err;
424}
425
426static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
427{
428 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
429 struct fifo_gk20a *f = &g->fifo;
430 struct tegra_vgpu_cmd_msg msg;
431 struct tegra_vgpu_channel_config_params *p =
432 &msg.params.channel_config;
433 int err;
434
435 gk20a_dbg_fn("");
436
437 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
438 msg.handle = platform->virt_handle;
439 p->handle = f->channel[hw_chid].virt_ctx;
440 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
441
442 if (err || msg.ret) {
443 gk20a_err(dev_from_gk20a(g),
444 "preempt channel %d failed\n", hw_chid);
445 err = -ENOMEM;
446 }
447
448 return err;
449}
450
451static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
452 u32 num_entries)
453{
454 struct tegra_vgpu_cmd_msg *msg;
455 struct tegra_vgpu_runlist_params *p;
456 size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
457 char *ptr;
458 int err;
459
460 msg = kmalloc(size, GFP_KERNEL);
461 if (!msg)
462 return -1;
463
464 msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
465 msg->handle = handle;
466 p = &msg->params.runlist;
467 p->runlist_id = runlist_id;
468 p->num_entries = num_entries;
469
470 ptr = (char *)msg + sizeof(*msg);
471 memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
472 err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
473
474 err = (err || msg->ret) ? -1 : 0;
475 kfree(msg);
476 return err;
477}
478
479static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
480 u32 hw_chid, bool add,
481 bool wait_for_finish)
482{
483 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
484 struct fifo_gk20a *f = &g->fifo;
485 struct fifo_runlist_info_gk20a *runlist;
486 u16 *runlist_entry = NULL;
487 u32 count = 0;
488
489 gk20a_dbg_fn("");
490
491 runlist = &f->runlist_info[runlist_id];
492
493 /* valid channel, add/remove it from active list.
494 Otherwise, keep active list untouched for suspend/resume. */
495 if (hw_chid != ~0) {
496 if (add) {
497 if (test_and_set_bit(hw_chid,
498 runlist->active_channels) == 1)
499 return 0;
500 } else {
501 if (test_and_clear_bit(hw_chid,
502 runlist->active_channels) == 0)
503 return 0;
504 }
505 }
506
507 if (hw_chid != ~0 || /* add/remove a valid channel */
508 add /* resume to add all channels back */) {
509 u32 chid;
510
511 runlist_entry = runlist->mem[0].cpuva;
512 for_each_set_bit(chid,
513 runlist->active_channels, f->num_channels) {
514 gk20a_dbg_info("add channel %d to runlist", chid);
515 runlist_entry[0] = chid;
516 runlist_entry++;
517 count++;
518 }
519 } else /* suspend to remove all channels */
520 count = 0;
521
522 return vgpu_submit_runlist(platform->virt_handle, runlist_id,
523 runlist->mem[0].cpuva, count);
524}
525
526/* add/remove a channel from runlist
527 special cases below: runlist->active_channels will NOT be changed.
528 (hw_chid == ~0 && !add) means remove all active channels from runlist.
529 (hw_chid == ~0 && add) means restore all active channels on runlist. */
530static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
531 u32 hw_chid, bool add, bool wait_for_finish)
532{
533 struct fifo_runlist_info_gk20a *runlist = NULL;
534 struct fifo_gk20a *f = &g->fifo;
535 u32 ret = 0;
536
537 gk20a_dbg_fn("");
538
539 runlist = &f->runlist_info[runlist_id];
540
541 mutex_lock(&runlist->mutex);
542
543 ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
544 wait_for_finish);
545
546 mutex_unlock(&runlist->mutex);
547 return ret;
548}
549
550static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
551{
552 gk20a_dbg_fn("");
553
554 return 0;
555}
556
557void vgpu_init_fifo_ops(struct gpu_ops *gops)
558{
559 gops->fifo.bind_channel = vgpu_channel_bind;
560 gops->fifo.unbind_channel = vgpu_channel_unbind;
561 gops->fifo.disable_channel = vgpu_channel_disable;
562 gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
563 gops->fifo.free_inst = vgpu_channel_free_inst;
564 gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
565 gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
566 gops->fifo.update_runlist = vgpu_fifo_update_runlist;
567 gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
568}
569